bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
policy_improvement.h
Go to the documentation of this file.
1#ifndef POLICY_IMPROVEMENT_H
2#define POLICY_IMPROVEMENT_H
3
7
8#include <any>
9#include <map>
10#include <string>
11
12namespace cuberl{
13namespace rl::algos::dp
14{
15
21 template<typename EnvType, typename PolicyType>
22 class PolicyImprovement: public DPSolverBase<EnvType>
23 {
24 public:
25
30
34 typedef PolicyType policy_type;
35
39 PolicyImprovement(uint_t action_space_size,
40 real_t gamma,
41 const DynVec<real_t>& val_func,
43
48 virtual void actions_before_training_begins(env_type& /*env*/)override{}
49
54 virtual void actions_after_training_ends(env_type& /*env*/)override{}
55
59 virtual void actions_before_episode_begins(env_type&, uint_t /*episode_idx*/)override{}
60
64 virtual void actions_after_episode_ends(env_type&, uint_t /*episode_idx*/,
65 const EpisodeInfo& /*einfo*/)override{}
66
70 virtual EpisodeInfo on_training_episode(env_type& env, uint_t episode_idx) override;
71
76 const policy_type& policy()const{return policy_;}
77
83
89
90 protected:
91
96
101
106
111 };
112
113 template<typename EnvType, typename PolicyType>
115 real_t gamma, const DynVec<real_t>& val_func,
116 policy_type& policy)
117 :
118 DPSolverBase<EnvType>(),
119 gamma_(gamma),
120 v_(val_func),
121 policy_(policy),
122 policy_adaptor_(val_func.size(), action_space_size, policy)
123 {}
124
125 template<typename EnvType, typename PolicyType>
128
129 auto start = std::chrono::steady_clock::now();
130
131 std::map<std::string, std::any> options;
132
133 for(uint_t s=0; s<env.n_states(); ++s){
134
135 auto state_actions = state_actions_from_v(env, v_, gamma_, s);
136
137 options.insert_or_assign("state", s);
138 options.insert_or_assign("state_actions", std::any(state_actions));
139 policy_ = policy_adaptor_(options);
140 }
141
142 auto end = std::chrono::steady_clock::now();
143 std::chrono::duration<real_t> elapsed_seconds = end-start;
144
145 EpisodeInfo info;
146 info.episode_index = episode_idx;
147 info.episode_iterations = env.n_states();
148 info.total_time = elapsed_seconds;
149 return info;
150 }
151
152
153}
154}
155
156#endif // POLICY_IMPROVEMENT_H
The DPSolverBase class.
Definition dp_algo_base.h:21
RLSolverBase< EnvType >::env_type env_type
The environment type the solver is using.
Definition dp_algo_base.h:27
The PolicyImprovement class. PolicyImprovement is not a real algorithm in the sense that it looks for...
Definition policy_improvement.h:23
const policy_type & policy() const
policy
Definition policy_improvement.h:76
cuberl::rl::policies::StochasticAdaptorPolicy< policy_type > policy_adaptor_
How to adapt the policy.
Definition policy_improvement.h:110
policy_type & policy_
policy_
Definition policy_improvement.h:105
PolicyType policy_type
policy_type
Definition policy_improvement.h:34
virtual void actions_after_episode_ends(env_type &, uint_t, const EpisodeInfo &) override
actions_after_training_episode
Definition policy_improvement.h:64
PolicyImprovement(uint_t action_space_size, real_t gamma, const DynVec< real_t > &val_func, policy_type &policy)
IterativePolicyEval.
Definition policy_improvement.h:114
real_t gamma_
gamma_
Definition policy_improvement.h:95
virtual void actions_after_training_ends(env_type &) override
actions_after_training_ends. Actions to execute after the training iterations have finisehd
Definition policy_improvement.h:54
DPSolverBase< EnvType >::env_type env_type
env_t
Definition policy_improvement.h:29
virtual void actions_before_training_begins(env_type &) override
actions_before_training_begins. Execute any actions the algorithm needs before starting the iteration...
Definition policy_improvement.h:48
void set_value_function(const DynVec< real_t > &v)
set_value_function
Definition policy_improvement.h:88
DynVec< real_t > v_
v_
Definition policy_improvement.h:100
virtual void actions_before_episode_begins(env_type &, uint_t) override
actions_before_training_episode
Definition policy_improvement.h:59
policy_type & policy()
policy
Definition policy_improvement.h:82
virtual EpisodeInfo on_training_episode(env_type &env, uint_t episode_idx) override
on_episode Do one on_episode of the algorithm
Definition policy_improvement.h:127
The StochasticAdaptorPolicy class.
Definition policy_stochastic_adaptor.h:27
double real_t
real_t
Definition bitrl_types.h:23
Eigen::RowVectorX< T > DynVec
Dynamically sized row vector.
Definition bitrl_types.h:74
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
auto state_actions_from_v(const WorldTp &env, const DynVec< real_t > &v, real_t gamma, uint_t state) -> DynVec< real_t >
Given the state index returns the list of actions under the provided value functions.
Definition utils.h:23
Various utilities used when working with RL problems.
Definition cuberl_types.h:16
The EpisodeInfo struct.
Definition episode_info.h:19