1#ifndef VALUE_ITERATION_H
2#define VALUE_ITERATION_H
20namespace rl::algos::dp
36 template<
typename EnvType>
82 void save(
const std::string& filename)
const;
104 template<
typename EnvType>
112 template<
typename EnvType>
118 template<
typename EnvType>
124 auto start = std::chrono::steady_clock::now();
128 for(
uint_t s=0; s< env.n_states(); ++s){
134 delta = std::max(delta, std::fabs(v_[s] - v));
139 if(delta < config_.tolerance){
140 info.stop_training =
true;
143 auto end = std::chrono::steady_clock::now();
144 std::chrono::duration<real_t> elapsed_seconds = end-start;
146 info.episode_index = episode_idx;
147 info.episode_iterations = env.n_states();
148 info.total_time = elapsed_seconds;
152 info.episode_reward = delta;
157 template<
typename EnvType>
161 save(config_.save_path);
166 template<
typename EnvType>
175 for(
uint_t s=0; s < static_cast<uint_t>(v_.size()); ++s){
176 auto row = std::make_tuple(s, v_[s]);
181 template<
typename EnvType>
188 config_.gamma,policy);
The CSVWriter class. Handles writing into CSV file format.
Definition csv_file_writer.h:22
void write_column_names(const std::vector< std::string > &col_names, bool write_header=true)
Write the column names.
Definition csv_file_writer.cpp:16
void write_row(const std::vector< T > &vals)
Write a row of the file.
Definition csv_file_writer.h:89
virtual void open() override
Open the file for writing.
Definition file_writer_base.cpp:21
The DPSolverBase class.
Definition dp_algo_base.h:21
RLSolverBase< EnvType >::env_type env_type
The environment type the solver is using.
Definition dp_algo_base.h:27
ValueIteration class.
Definition value_iteration.h:38
ValueIteration(const ValueIterationConfig config)
ValueIteration.
Definition value_iteration.h:105
void save(const std::string &filename) const
Definition value_iteration.h:168
virtual void actions_before_training_begins(env_type &env) override
actions_before_training_begins. Execute any actions the algorithm needs before starting the iteration...
Definition value_iteration.h:114
DPSolverBase< EnvType >::env_type env_type
env_t
Definition value_iteration.h:44
virtual void actions_after_episode_ends(env_type &, uint_t, const EpisodeInfo &) override
actions_after_training_episode
Definition value_iteration.h:71
virtual void actions_before_episode_begins(env_type &, uint_t) override
actions_before_training_episode
Definition value_iteration.h:66
virtual void actions_after_training_ends(env_type &) override
actions_after_training_ends. Actions to execute after the training iterations have finisehd
Definition value_iteration.h:159
virtual EpisodeInfo on_training_episode(env_type &env, uint_t episode_idx) override
on_episode Do one on_episode of the algorithm
Definition value_iteration.h:120
cuberl::rl::policies::MaxTabularPolicy build_policy(const env_type &env) const
Definition value_iteration.h:183
class MaxTabularPolicy
Definition max_tabular_policy.h:30
const real_t TOLERANCE
Tolerance used around the library.
Definition bitrl_consts.h:31
const std::string INVALID_STR
Invalid string.
Definition bitrl_consts.h:26
double real_t
real_t
Definition bitrl_types.h:23
Eigen::RowVectorX< T > DynVec
Dynamically sized row vector.
Definition bitrl_types.h:74
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
auto state_actions_from_v(const WorldTp &env, const DynVec< real_t > &v, real_t gamma, uint_t state) -> DynVec< real_t >
Given the state index returns the list of actions under the provided value functions.
Definition utils.h:23
Various utilities used when working with RL problems.
Definition cuberl_types.h:16
The EpisodeInfo struct.
Definition episode_info.h:19
The ValueIterationConfig struct.
Definition value_iteration.h:27
std::string save_path
Definition value_iteration.h:30
real_t tolerance
Definition value_iteration.h:29
real_t gamma
Definition value_iteration.h:28
Definition max_tabular_policy.h:125
void build_from_state_function(const EnvType &env, const DynVec< real_t > &v, real_t gamma, MaxTabularPolicy &policy)
Definition max_tabular_policy.h:139