1#ifndef VALUE_ITERATION_H
2#define VALUE_ITERATION_H
4#include "cuberl/base/cubeai_config.h"
21namespace rl::algos::dp
37 template<
typename EnvType>
83 void save(
const std::string& filename)
const;
105 template<
typename EnvType>
113 template<
typename EnvType>
119 template<
typename EnvType>
125 auto start = std::chrono::steady_clock::now();
129 for(
uint_t s=0; s< env.n_states(); ++s){
135 delta = std::max(delta, std::fabs(v_[s] - v));
140 if(delta < config_.tolerance){
141 info.stop_training =
true;
144 auto end = std::chrono::steady_clock::now();
145 std::chrono::duration<real_t> elapsed_seconds = end-start;
147 info.episode_index = episode_idx;
148 info.episode_iterations = env.n_states();
149 info.total_time = elapsed_seconds;
153 info.episode_reward = delta;
158 template<
typename EnvType>
162 save(config_.save_path);
167 template<
typename EnvType>
176 for(
uint_t s=0; s < static_cast<uint_t>(v_.size()); ++s){
177 auto row = std::make_tuple(s, v_[s]);
182 template<
typename EnvType>
189 config_.gamma,policy);
The CSVWriter class. Handles writing into CSV file format.
Definition csv_file_writer.h:22
void write_column_names(const std::vector< std::string > &col_names, bool write_header=true)
Write the column names.
Definition csv_file_writer.cpp:16
void write_row(const std::vector< T > &vals)
Write a row of the file.
Definition csv_file_writer.h:89
virtual void open() override
Open the file for writing.
Definition file_writer_base.cpp:21
The DPSolverBase class.
Definition dp_algo_base.h:21
RLSolverBase< EnvType >::env_type env_type
The environment type the solver is using.
Definition dp_algo_base.h:27
ValueIteration class.
Definition value_iteration.h:39
ValueIteration(const ValueIterationConfig config)
ValueIteration.
Definition value_iteration.h:106
void save(const std::string &filename) const
Definition value_iteration.h:169
virtual void actions_before_training_begins(env_type &env) override
actions_before_training_begins. Execute any actions the algorithm needs before starting the iteration...
Definition value_iteration.h:115
DPSolverBase< EnvType >::env_type env_type
env_t
Definition value_iteration.h:45
virtual void actions_after_episode_ends(env_type &, uint_t, const EpisodeInfo &) override
actions_after_training_episode
Definition value_iteration.h:72
virtual void actions_before_episode_begins(env_type &, uint_t) override
actions_before_training_episode
Definition value_iteration.h:67
virtual void actions_after_training_ends(env_type &) override
actions_after_training_ends. Actions to execute after the training iterations have finisehd
Definition value_iteration.h:160
virtual EpisodeInfo on_training_episode(env_type &env, uint_t episode_idx) override
on_episode Do one on_episode of the algorithm
Definition value_iteration.h:121
cuberl::rl::policies::MaxTabularPolicy build_policy(const env_type &env) const
Definition value_iteration.h:184
class MaxTabularPolicy
Definition max_tabular_policy.h:30
const real_t TOLERANCE
Tolerance used around the library.
Definition bitrl_consts.h:31
const std::string INVALID_STR
Invalid string.
Definition bitrl_consts.h:26
double real_t
real_t
Definition bitrl_types.h:23
Eigen::RowVectorX< T > DynVec
Dynamically sized row vector.
Definition bitrl_types.h:74
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
auto state_actions_from_v(const WorldTp &env, const DynVec< real_t > &v, real_t gamma, uint_t state) -> DynVec< real_t >
Given the state index returns the list of actions under the provided value functions.
Definition utils.h:23
Various utilities used when working with RL problems.
Definition cuberl_types.h:16
The EpisodeInfo struct.
Definition episode_info.h:19
The ValueIterationConfig struct.
Definition value_iteration.h:28
std::string save_path
Definition value_iteration.h:31
real_t tolerance
Definition value_iteration.h:30
real_t gamma
Definition value_iteration.h:29
Definition max_tabular_policy.h:125
void build_from_state_function(const EnvType &env, const DynVec< real_t > &v, real_t gamma, MaxTabularPolicy &policy)
Definition max_tabular_policy.h:139