7#include "cuberl/base/cubeai_config.h"
25template<
typename StateTp>
27get_table_values_(
const std::map<StateTp,DynVec<real_t>>& table,
const StateTp& state ){
29 auto itr = table.find(state);
31 if(itr == table.end()){
32 assert(
false &&
"Invalid state given");
40template<
typename StateTp>
42get_table_values_(std::map<StateTp,DynVec<real_t>>& table,
const StateTp& state ){
44 auto itr = table.find(state);
46 if(itr == table.end()){
47 assert(
false &&
"Invalid state given");
91 template<
typename VectorType>
95template<
typename VectorType>
102 std::uniform_real_distribution<> real_dist_(0.0, 1.0);
106 return arg_max(values);
110 return distrib_(gen);
137template<
typename TableTp>
182 return q_table_1(state, action);
189 return q_table_2(state, action);
197 q_table_1(state, action) = value;
205 q_table_2(state, action) = value;
211template<
typename KeyTp>
236 void initialize(
const std::vector<index_type>& indices,
action_type n_actions,
real_t init_value);
252template<
typename KeyTp>
261 for(
uint_t i=0; i< indices.size(); ++i){
263 q_table_1[indices[i]] = init_vals;
264 q_table_2[indices[i]] = init_vals;
268template<
typename KeyTp>
273 static_assert (index == 1 || index == 2,
"Invalid index for template parameter");
275 return get_table_values_(q_table_1, state)[action];
278 return get_table_values_(q_table_2, state)[action];
282template<
typename KeyTp>
289 static_assert (index == 1 || index == 2,
"Invalid index for template parameter");
292 auto& vals1 = get_table_values_(q_table_1, state);
293 vals1[action] = value;
296 auto& vals2 = get_table_values_(q_table_2, state);
297 vals2[action] = value;
307 template<
typename TableTp,
typename StateTp>
309 const StateTp& state,
uint_t n_actions);
314 template<
typename TableTp,
typename StateTp>
320template<
typename TableTp,
typename StateTp>
323 const StateTp& state,
uint_t ){
325 const auto& vals1 = get_table_values_(q1_table, state);
326 const auto& vals2 = get_table_values_(q2_table, state);
327 auto sum = vals1 + vals2;
332template<
typename TableTp,
typename StateTp>
336 const auto& vals = get_table_values_(q_table, state);
double real_t
real_t
Definition bitrl_types.h:23
Eigen::RowVectorX< T > DynVec
Dynamically sized row vector.
Definition bitrl_types.h:74
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
Eigen::MatrixX< T > DynMat
Dynamically sized matrix to use around the library.
Definition bitrl_types.h:49
Definition dummy_agent.h:8
uint_t max_action(const DynMat< real_t > &qtable, uint_t state, uint_t n_actions)
max_action
The with_decay_epsilon_option_mixin struct.
Definition rl_mixins.h:71
uint_t n_actions
Definition rl_mixins.h:77
EpsilonDecayOptionType decay_op
Definition rl_mixins.h:79
real_t decay_eps(uint_t episode_index)
decay_eps
uint_t seed
Definition rl_mixins.h:78
real_t max_eps
Definition rl_mixins.h:75
real_t min_eps
Definition rl_mixins.h:74
real_t eps_init
Definition rl_mixins.h:72
real_t eps
Definition rl_mixins.h:73
uint_t choose_action_index(const VectorType &values) const
Definition rl_mixins.h:97
real_t epsilon_decay
Definition rl_mixins.h:76
Definition rl_mixins.h:302
static uint_t max_action(const TableTp &q1_table, const TableTp &q2_table, const StateTp &state, uint_t n_actions)
Returns the max action by averaging the state values from the two tables.
Definition rl_mixins.h:322
DynMat< value_type > q_table_2
q_table_2
Definition rl_mixins.h:159
DynMat< value_type > q_table_1
q_table_1
Definition rl_mixins.h:154
void initialize(const std::vector< index_type > &indices, action_type n_actions, real_t init_value)
initialize
uint_t index_type
Definition rl_mixins.h:146
real_t value_type
Definition rl_mixins.h:149
void set(const state_type &state, const action_type action, const value_type value)
value_type get(const state_type &state, const action_type action) const
uint_t state_type
Definition rl_mixins.h:147
uint_t action_type
Definition rl_mixins.h:148
uint_t action_type
Definition rl_mixins.h:217
value_type get(const state_type &state, const action_type action) const
KeyTp state_type
Definition rl_mixins.h:216
std::map< KeyTp, DynVec< real_t > > q_table_1
q_table_1
Definition rl_mixins.h:223
KeyTp index_type
Definition rl_mixins.h:215
std::map< KeyTp, DynVec< real_t > > q_table_2
q_table_2
Definition rl_mixins.h:228
real_t value_type
Definition rl_mixins.h:218
Definition rl_mixins.h:138
The WithQTableMixin struct.
Definition rl_mixins.h:118
void initialize(state_type n_states, action_type n_actions, real_t init_value)
initialize
real_t value_type
Definition rl_mixins.h:121
DynMat< value_type > q_table
q_table
Definition rl_mixins.h:126
uint_t action_type
Definition rl_mixins.h:120
uint_t state_type
Definition rl_mixins.h:119