1#ifndef POLICY_IMPROVEMENT_H
2#define POLICY_IMPROVEMENT_H
13namespace rl::algos::dp
21 template<
typename EnvType,
typename PolicyType>
113 template<
typename EnvType,
typename PolicyType>
122 policy_adaptor_(val_func.size(), action_space_size, policy)
125 template<
typename EnvType,
typename PolicyType>
129 auto start = std::chrono::steady_clock::now();
131 std::map<std::string, std::any> options;
133 for(
uint_t s=0; s<env.n_states(); ++s){
137 options.insert_or_assign(
"state", s);
138 options.insert_or_assign(
"state_actions", std::any(state_actions));
139 policy_ = policy_adaptor_(options);
142 auto end = std::chrono::steady_clock::now();
143 std::chrono::duration<real_t> elapsed_seconds = end-start;
146 info.episode_index = episode_idx;
147 info.episode_iterations = env.n_states();
148 info.total_time = elapsed_seconds;
The DPSolverBase class.
Definition dp_algo_base.h:21
RLSolverBase< EnvType >::env_type env_type
The environment type the solver is using.
Definition dp_algo_base.h:27
The PolicyImprovement class. PolicyImprovement is not a real algorithm in the sense that it looks for...
Definition policy_improvement.h:23
const policy_type & policy() const
policy
Definition policy_improvement.h:76
cuberl::rl::policies::StochasticAdaptorPolicy< policy_type > policy_adaptor_
How to adapt the policy.
Definition policy_improvement.h:110
policy_type & policy_
policy_
Definition policy_improvement.h:105
PolicyType policy_type
policy_type
Definition policy_improvement.h:34
virtual void actions_after_episode_ends(env_type &, uint_t, const EpisodeInfo &) override
actions_after_training_episode
Definition policy_improvement.h:64
PolicyImprovement(uint_t action_space_size, real_t gamma, const DynVec< real_t > &val_func, policy_type &policy)
IterativePolicyEval.
Definition policy_improvement.h:114
real_t gamma_
gamma_
Definition policy_improvement.h:95
virtual void actions_after_training_ends(env_type &) override
actions_after_training_ends. Actions to execute after the training iterations have finisehd
Definition policy_improvement.h:54
DPSolverBase< EnvType >::env_type env_type
env_t
Definition policy_improvement.h:29
virtual void actions_before_training_begins(env_type &) override
actions_before_training_begins. Execute any actions the algorithm needs before starting the iteration...
Definition policy_improvement.h:48
void set_value_function(const DynVec< real_t > &v)
set_value_function
Definition policy_improvement.h:88
DynVec< real_t > v_
v_
Definition policy_improvement.h:100
virtual void actions_before_episode_begins(env_type &, uint_t) override
actions_before_training_episode
Definition policy_improvement.h:59
policy_type & policy()
policy
Definition policy_improvement.h:82
virtual EpisodeInfo on_training_episode(env_type &env, uint_t episode_idx) override
on_episode Do one on_episode of the algorithm
Definition policy_improvement.h:127
The StochasticAdaptorPolicy class.
Definition policy_stochastic_adaptor.h:27
double real_t
real_t
Definition bitrl_types.h:23
Eigen::RowVectorX< T > DynVec
Dynamically sized row vector.
Definition bitrl_types.h:74
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
auto state_actions_from_v(const WorldTp &env, const DynVec< real_t > &v, real_t gamma, uint_t state) -> DynVec< real_t >
Given the state index returns the list of actions under the provided value functions.
Definition utils.h:23
Various utilities used when working with RL problems.
Definition cuberl_types.h:16
The EpisodeInfo struct.
Definition episode_info.h:19