4#ifndef FIRST_VISIT_MC_H
5#define FIRST_VISIT_MC_H
7#include "cuberl/base/cubeai_config.h"
14#ifdef CUBEAI_PRINT_DBG_MSGS
15 #include <boost/log/trivial.hpp>
24namespace rl::algos::mc
42 template<
typename EnvType,
typename TrajectoryGenerator,
typename DecayLRSchedule,
typename DiscountGenerator>
81 TrajectoryGenerator& trajectory_gen,
82 DecayLRSchedule& decay_lr_schedule,
116 void save(
const std::string& filename)
const;
138 TrajectoryGenerator trajectory_gen_;
144 DecayLRSchedule decay_lr_schedule_;
154 template<
typename EnvType,
155 typename TrajectoryGenerator,
typename DecayLRSchedule,
typename DiscountGenerator>
158 TrajectoryGenerator& trajectory_gen,
159 DecayLRSchedule& decay_lr_schedule,
163 config_(solver_config),
164 trajectory_gen_(trajectory_gen),
165 decay_lr_schedule_(decay_lr_schedule),
166 discount_generator_(discount_generator)
170 template<
typename EnvType,
171 typename TrajectoryGenerator,
typename DecayLRSchedule,
typename DiscountGenerator>
174 TrajectoryGenerator, DecayLRSchedule, DiscountGenerator>::actions_before_training_begins(
env_type& env){
176 v_.resize(env.n_states());
177 std::for_each(v_.begin(), v_.end(),
178 [](
auto& item){item = 0.0;});
181 template<
typename EnvType,
182 typename TrajectoryGenerator,
typename DecayLRSchedule,
typename DiscountGenerator>
185 TrajectoryGenerator, DecayLRSchedule, DiscountGenerator>::on_training_episode(
env_type& env,
189 auto start = std::chrono::steady_clock::now();
193 auto trajectory = trajectory_gen_(env, config_.max_steps);
195 const auto trajectory_size = std::distance(trajectory.begin(), trajectory.end());
197#ifdef CUBEAI_PRINT_DBG_MSGS
198 if(trajectory_size == 0){
199 BOOST_LOG_TRIVIAL(warning)<<
"Trajectory size="<<trajectory_size<<std::endl;
206 std::vector<real_t> rewards;
207 rewards.reserve(trajectory_size);
209 auto time_step_itr = trajectory.begin();
210 for(; time_step_itr != trajectory.end(); ++time_step_itr){
211 auto time_step = *time_step_itr;
212 rewards.push_back(time_step.reward());
216 auto discounts = discount_generator_(trajectory, config_.max_steps);
219 auto alpha = decay_lr_schedule_(config_.init_alpha, episode_idx);
221 std::vector<bool> visited(env.n_states(),
false);
222 time_step_itr = trajectory.begin();
223 for(
uint_t count=0; time_step_itr != trajectory.end(); ++time_step_itr, ++count){
225 auto time_step = *time_step_itr;
227 if(visited[time_step.observation()])
230 visited[time_step.observation()] =
true;
234 auto n_steps = std::distance(time_step_itr, trajectory.end());
241 auto mc_error = G - v_[time_step.observation()];
244 v_[time_step.observation()] += alpha * mc_error;
247 auto end = std::chrono::steady_clock::now();
248 std::chrono::duration<real_t> elapsed_seconds = end-start;
250 episode_info.episode_index = episode_idx;
251 episode_info.total_time = elapsed_seconds;
252 episode_info.episode_iterations = std::distance(trajectory.begin(), trajectory.end());
Definition first_visit_mc.h:44
void actions_before_training_begins(env_type &env)
actions_before_training_begins. Execute any actions the algorithm needs before starting the iteration...
Definition first_visit_mc.h:174
void actions_after_training_ends(env_type &)
actions_after_training_ends. Actions to execute after the training iterations have finisehd
Definition first_visit_mc.h:95
DecayLRSchedule decay_lr_schedule_type
Definition first_visit_mc.h:61
EnvType env_type
The environment type.
Definition first_visit_mc.h:51
DiscountGenerator discount_generator_type
Definition first_visit_mc.h:66
void actions_after_episode_ends(env_type &, uint_t, const EpisodeInfo &)
actions_after_training_episode
Definition first_visit_mc.h:105
TrajectoryGenerator trajectory_generator_type
Definition first_visit_mc.h:56
void save(const std::string &filename) const
save the results
void actions_before_episode_begins(env_type &, uint_t)
actions_before_training_episode
Definition first_visit_mc.h:100
EpisodeInfo on_training_episode(env_type &env, uint_t episode_idx)
on_episode Do one on_episode of the algorithm
Definition first_visit_mc.h:185
env_type::time_step_type time_step_type
The time step type used by the environment.
Definition first_visit_mc.h:73
const std::string INVALID_STR
Invalid string.
Definition bitrl_consts.h:26
double real_t
real_t
Definition bitrl_types.h:23
Eigen::RowVectorX< T > DynVec
Dynamically sized row vector.
Definition bitrl_types.h:74
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
std::iterator_traits< IteratorType >::value_type dot_product(IteratorType bv1, IteratorType ev1, IteratorType bv2, IteratorType ev2)
Definition vector_math.h:610
std::vector< T > extract_subvector(const std::vector< T > &vec, uint_t end, bool up_to=true)
Definition vector_math.h:477
Various utilities used when working with RL problems.
Definition cuberl_types.h:16
The EpisodeInfo struct.
Definition episode_info.h:19
Definition first_visit_mc.h:28
real_t gamma
Definition first_visit_mc.h:29
uint_t max_steps
Definition first_visit_mc.h:34
uint_t n_episodes
Definition first_visit_mc.h:35
real_t tolerance
Definition first_visit_mc.h:30
real_t alpha_decay_ratio
Definition first_visit_mc.h:33
std::string save_path
Definition first_visit_mc.h:36
real_t init_alpha
Definition first_visit_mc.h:31
real_t min_alpha
Definition first_visit_mc.h:32