1#ifndef EPSILON_DOUBLE_QTABLE_GREEDY_POLICY_H
2#define EPSILON_DOUBLE_QTABLE_GREEDY_POLICY_H
4#include "cubeai/base/cubeai_types.h"
5#include "cubeai/rl/epsilon_decay_options.h"
6#include "cubeai/rl/rl_mixins.h"
15template<
typename TableType>
28 using with_decay_epsilon_option_mixin::choose_action_index;
34 EpsilonDecayOptionType decay_op,
35 real_t min_eps = 0.01, real_t max_eps=1.0,
36 real_t eps_decay=0.2, uint_t seed=0);
41 template<
typename StateTp>
42 uint_t
operator()(
const TableType& q1,
const TableType& q2,
const StateTp& state)
const;
59 void reset()noexcept{this->with_decay_epsilon_option_mixin::eps = this->with_decay_epsilon_option_mixin::eps_init;}
70 real_t
eps_value()const noexcept{
return this->with_decay_epsilon_option_mixin::eps;}
76 void set_seed(
const uint_t seed)
noexcept{this->with_decay_epsilon_option_mixin::seed = seed;}
80template<
typename TableType>
82 EpsilonDecayOptionType decay_op,
83 real_t min_eps, real_t max_eps, real_t eps_decay, uint_t seed)
85 with_decay_epsilon_option_mixin({eps, eps, min_eps, max_eps, eps_decay, n_actions, seed, decay_op})
88template<
typename TableType>
89template<
typename StateTp>
94 std::mt19937 gen(this->with_decay_epsilon_option_mixin::seed);
97 std::uniform_real_distribution<> real_dist_(0.0, 1.0);
99 if(real_dist_(gen) > this->with_decay_epsilon_option_mixin::eps){
101 return this->with_double_q_table_max_action_mixin::max_action(q1, q2, state,
102 this->with_decay_epsilon_option_mixin::n_actions);
105 std::uniform_int_distribution<> distrib_(0, this->with_decay_epsilon_option_mixin::n_actions - 1);
106 return distrib_(gen);
109template<
typename TableType>
112 this->with_decay_epsilon_option_mixin::eps = this->with_decay_epsilon_option_mixin::decay_eps(episode);
Definition epsilon_double_qtable_greedy_policy.h:17
void set_epsilon_decay_factor(real_t eps_decay) noexcept
set_epsilon_decay_factor
Definition epsilon_double_qtable_greedy_policy.h:65
TableType table_type
table_type
Definition epsilon_double_qtable_greedy_policy.h:23
void adjust_on_episode(uint_t episode) noexcept
choose_action_index
Definition epsilon_double_qtable_greedy_policy.h:111
uint_t operator()(const TableType &q1, const TableType &q2, const StateTp &state) const
operator()
Definition epsilon_double_qtable_greedy_policy.h:91
real_t eps_value() const noexcept
eps_value
Definition epsilon_double_qtable_greedy_policy.h:70
void set_seed(const uint_t seed) noexcept
set_seed
Definition epsilon_double_qtable_greedy_policy.h:76
EpsilonDoubleQTableGreedyPolicy(real_t eps, uint_t n_actions, EpsilonDecayOptionType decay_op, real_t min_eps=0.01, real_t max_eps=1.0, real_t eps_decay=0.2, uint_t seed=0)
EpsilonDoubleQTableGreedyPolicy.
Definition epsilon_double_qtable_greedy_policy.h:81
void reset() noexcept
reset
Definition epsilon_double_qtable_greedy_policy.h:59
Definition mc_tree_search_solver.h:22