|
bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
|
The ExpectedSARSA class. Simple implementation of the expected SARSA algorithm. More...
#include <expected_sarsa.h>


Public Types | |
| typedef TDAlgoBase< EnvTp >::env_type | env_type |
| env_t | |
| typedef TDAlgoBase< EnvTp >::action_type | action_type |
| action_t | |
| typedef TDAlgoBase< EnvTp >::state_type | state_type |
| state_t | |
| typedef ActionSelector | action_selector_type |
| action_selector_t | |
Public Types inherited from cuberl::rl::algos::td::TDAlgoBase< EnvTp > | |
| typedef EnvTp | env_type |
| env_t | |
| typedef env_type::action_type | action_type |
| action_t | |
| typedef env_type::state_type | state_type |
| state_t | |
Public Types inherited from cuberl::rl::algos::RLSolverBase< EnvType > | |
| typedef EnvType | env_type |
Public Member Functions | |
| ExpectedSARSA (uint_t n_episodes, real_t tolerance, real_t gamma, real_t eta, uint_t plot_f, env_type &env, uint_t max_num_iterations_per_episode, const ActionSelector &selector) | |
| Constructor. | |
| ExpectedSARSA (TDAlgoConfig config, env_type &env, const ActionSelector &selector) | |
| Constructor. | |
| virtual void | on_episode () override final |
| on_episode. Performs the iterations for one training episode | |
Public Member Functions inherited from cuberl::rl::algos::td::TDAlgoBase< EnvTp > | |
| virtual | ~TDAlgoBase ()=default |
| Destructor. | |
Public Member Functions inherited from cuberl::rl::algos::RLSolverBase< EnvType > | |
| virtual | ~RLSolverBase ()=default |
| Destructor. | |
| virtual void | actions_before_training_begins (env_type &)=0 |
| actions_before_training_begins. Execute any actions the algorithm needs before starting the iterations | |
| virtual void | actions_after_training_ends (env_type &)=0 |
| actions_after_training_ends. Actions to execute after the training iterations have finisehd | |
| virtual void | actions_before_episode_begins (env_type &, uint_t) |
| actions_before_training_episode | |
| virtual void | actions_after_episode_ends (env_type &, uint_t, const EpisodeInfo &) |
| actions_after_training_episode | |
| virtual EpisodeInfo | on_training_episode (env_type &, uint_t)=0 |
| on_episode Do one on_episode of the algorithm | |
Additional Inherited Members | |
Protected Member Functions inherited from cuberl::rl::algos::td::TDAlgoBase< EnvTp > | |
| TDAlgoBase ()=default | |
| DPAlgoBase. | |
Protected Member Functions inherited from cuberl::rl::algos::RLSolverBase< EnvType > | |
| RLSolverBase ()=default | |
| Constructor. | |
The ExpectedSARSA class. Simple implementation of the expected SARSA algorithm.
| typedef ActionSelector cuberl::rl::algos::td::ExpectedSARSA< EnvTp, ActionSelector >::action_selector_type |
action_selector_t
| typedef TDAlgoBase<EnvTp>::action_type cuberl::rl::algos::td::ExpectedSARSA< EnvTp, ActionSelector >::action_type |
action_t
| typedef TDAlgoBase<EnvTp>::env_type cuberl::rl::algos::td::ExpectedSARSA< EnvTp, ActionSelector >::env_type |
env_t
| typedef TDAlgoBase<EnvTp>::state_type cuberl::rl::algos::td::ExpectedSARSA< EnvTp, ActionSelector >::state_type |
state_t
| cuberl::rl::algos::td::ExpectedSARSA< EnvTp, ActionSelector >::ExpectedSARSA | ( | uint_t | n_episodes, |
| real_t | tolerance, | ||
| real_t | gamma, | ||
| real_t | eta, | ||
| uint_t | plot_f, | ||
| env_type & | env, | ||
| uint_t | max_num_iterations_per_episode, | ||
| const ActionSelector & | selector | ||
| ) |
Constructor.
| cuberl::rl::algos::td::ExpectedSARSA< EnvTp, ActionSelector >::ExpectedSARSA | ( | TDAlgoConfig | config, |
| env_type & | env, | ||
| const ActionSelector & | selector | ||
| ) |
Constructor.
|
finaloverridevirtual |
on_episode. Performs the iterations for one training episode