All modules for which code is available
- coax._core.policy
- coax._core.q
- coax._core.random_policy
- coax._core.reward_function
- coax._core.stochastic_q
- coax._core.stochastic_reward_function
- coax._core.stochastic_transition_model
- coax._core.stochastic_v
- coax._core.successor_state_q
- coax._core.transition_model
- coax._core.v
- coax._core.value_based_policy
- coax._core.worker
- coax.envs._connect_four
- coax.experience_replay._prioritized
- coax.experience_replay._simple
- coax.model_updaters._model_updater
- coax.policy_objectives._deterministic_pg
- coax.policy_objectives._ppo_clip
- coax.policy_objectives._soft_pg
- coax.policy_objectives._vanilla_pg
- coax.proba_dists._categorical
- coax.proba_dists._composite
- coax.proba_dists._discretized_interval
- coax.proba_dists._empirical_quantile
- coax.proba_dists._normal
- coax.proba_dists._squashed_normal
- coax.regularizers._entropy
- coax.regularizers._kl_div
- coax.reward_tracing._montecarlo
- coax.reward_tracing._nstep
- coax.reward_tracing._transition
- coax.td_learning._clippeddoubleqlearning
- coax.td_learning._doubleqlearning
- coax.td_learning._expectedsarsa
- coax.td_learning._qlearning
- coax.td_learning._sarsa
- coax.td_learning._simple_td
- coax.td_learning._softclippeddoubleqlearning
- coax.td_learning._softqlearning
- coax.utils._action_noise
- coax.utils._array
- coax.utils._dmc_gym
- coax.utils._jit
- coax.utils._misc
- coax.utils._quantile_funcs
- coax.utils._segment_tree
- coax.value_losses._losses
- coax.value_transforms._base
- coax.value_transforms._log_transform
- coax.wrappers._box_spaces
- coax.wrappers._frame_stacking
- coax.wrappers._meta_policy
- coax.wrappers._train_monitor