bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
epsilon_double_qtable_greedy_policy.h
Go to the documentation of this file.
1#ifndef EPSILON_DOUBLE_QTABLE_GREEDY_POLICY_H
2#define EPSILON_DOUBLE_QTABLE_GREEDY_POLICY_H
3
4#include "cubeai/base/cubeai_types.h"
5#include "cubeai/rl/epsilon_decay_options.h"
6#include "cubeai/rl/rl_mixins.h"
7
8namespace cubeai {
9namespace rl {
10namespace policies {
11
15template<typename TableType>
16class EpsilonDoubleQTableGreedyPolicy: protected with_decay_epsilon_option_mixin, protected with_double_q_table_max_action_mixin
17{
18public:
19
23 typedef TableType table_type;
24
28 using with_decay_epsilon_option_mixin::choose_action_index;
29
33 explicit EpsilonDoubleQTableGreedyPolicy(real_t eps, uint_t n_actions,
34 EpsilonDecayOptionType decay_op,
35 real_t min_eps = 0.01, real_t max_eps=1.0,
36 real_t eps_decay=0.2, uint_t seed=0);
37
41 template<typename StateTp>
42 uint_t operator()(const TableType& q1, const TableType& q2, const StateTp& state)const;
43
47 //template<typename VectorType>
48 //uint_t choose_action_index(const VectorType& values)const;
49
54 void adjust_on_episode(uint_t episode)noexcept;
55
59 void reset()noexcept{this->with_decay_epsilon_option_mixin::eps = this->with_decay_epsilon_option_mixin::eps_init;}
60
65 void set_epsilon_decay_factor(real_t eps_decay)noexcept{this->with_decay_epsilon_option_mixin::epsilon_decay = eps_decay;}
66
70 real_t eps_value()const noexcept{return this->with_decay_epsilon_option_mixin::eps;}
71
76 void set_seed(const uint_t seed)noexcept{this->with_decay_epsilon_option_mixin::seed = seed;}
77
78};
79
80template<typename TableType>
82 EpsilonDecayOptionType decay_op,
83 real_t min_eps, real_t max_eps, real_t eps_decay, uint_t seed)
84 :
85 with_decay_epsilon_option_mixin({eps, eps, min_eps, max_eps, eps_decay, n_actions, seed, decay_op})
86{}
87
88template<typename TableType>
89template<typename StateTp>
90uint_t
91EpsilonDoubleQTableGreedyPolicy<TableType>::operator()(const TableType& q1, const TableType& q2, const StateTp& state)const{
92
93
94 std::mt19937 gen(this->with_decay_epsilon_option_mixin::seed);
95
96 // generate a number in [0, 1]
97 std::uniform_real_distribution<> real_dist_(0.0, 1.0);
98
99 if(real_dist_(gen) > this->with_decay_epsilon_option_mixin::eps){
100 // select greedy action with probability 1 - epsilon
101 return this->with_double_q_table_max_action_mixin::max_action(q1, q2, state,
102 this->with_decay_epsilon_option_mixin::n_actions);
103 }
104
105 std::uniform_int_distribution<> distrib_(0, this->with_decay_epsilon_option_mixin::n_actions - 1);
106 return distrib_(gen);
107}
108
109template<typename TableType>
110void
112 this->with_decay_epsilon_option_mixin::eps = this->with_decay_epsilon_option_mixin::decay_eps(episode);
113}
114
115/*template<typename TableType>
116template<typename VectorType>
117uint_t
118EpsilonDoubleQTableGreedyPolicy<TableType>::choose_action_index(const VectorType& values)const{
119
120 std::mt19937 gen(this->with_decay_epsilon_option_mixin::seed);
121
122 // generate a number in [0, 1]
123 std::uniform_real_distribution<> real_dist_(0.0, 1.0);
124
125 if(real_dist_(gen) > this->with_decay_epsilon_option_mixin::eps){
126 // select greedy action with probability 1 - epsilon
127 return arg_max(values);
128 }
129
130 std::uniform_int_distribution<> distrib_(0, this->with_decay_epsilon_option_mixin::n_actions - 1);
131 return distrib_(gen);
132
133}*/
134
135}
136
137}
138
139}
140
141#endif // EPSILON_DOUBLE_QTABLE_GREEDY_POLICY_H
Definition epsilon_double_qtable_greedy_policy.h:17
void set_epsilon_decay_factor(real_t eps_decay) noexcept
set_epsilon_decay_factor
Definition epsilon_double_qtable_greedy_policy.h:65
TableType table_type
table_type
Definition epsilon_double_qtable_greedy_policy.h:23
void adjust_on_episode(uint_t episode) noexcept
choose_action_index
Definition epsilon_double_qtable_greedy_policy.h:111
uint_t operator()(const TableType &q1, const TableType &q2, const StateTp &state) const
operator()
Definition epsilon_double_qtable_greedy_policy.h:91
real_t eps_value() const noexcept
eps_value
Definition epsilon_double_qtable_greedy_policy.h:70
void set_seed(const uint_t seed) noexcept
set_seed
Definition epsilon_double_qtable_greedy_policy.h:76
EpsilonDoubleQTableGreedyPolicy(real_t eps, uint_t n_actions, EpsilonDecayOptionType decay_op, real_t min_eps=0.01, real_t max_eps=1.0, real_t eps_decay=0.2, uint_t seed=0)
EpsilonDoubleQTableGreedyPolicy.
Definition epsilon_double_qtable_greedy_policy.h:81
void reset() noexcept
reset
Definition epsilon_double_qtable_greedy_policy.h:59
Definition mc_tree_search_solver.h:22