1#ifndef EPSILON_GREEDY_POLICY_H
2#define EPSILON_GREEDY_POLICY_H
4#include "cuberl/base/cubeai_config.h"
66 template<
typename MapType>
73 template<
typename VecType>
79 template<
typename MatType>
87 template<
typename VecTp>
106 void reset()noexcept{eps_ = eps_init_;}
137 mutable std::mt19937 generator_;
152epsilon_decay_(epsilon_decay),
180template<
typename VecType>
185 std::uniform_real_distribution<> real_dist_(0.0, 1.0);
187 if(real_dist_(generator_) > eps_){
193 return random_policy_(vec);
197template<
typename VecTp>
201 std::uniform_real_distribution<> real_dist_(0.0, 1.0);
203 if(real_dist_(generator_) > eps_){
209 return random_policy_(vec);
The EpsilonGreedyPolicy class.
Definition epsilon_greedy_policy.h:30
EpsilonGreedyPolicy(real_t eps)
Constructor. Creates an epsilon-greedy tabular policy.
Definition epsilon_greedy_policy.h:160
void reset() noexcept
Reset the policy.
Definition epsilon_greedy_policy.h:106
static constexpr real_t MIN_EPS
Definition epsilon_greedy_policy.h:39
EpsilonDecayOption decay_option() const noexcept
Returns the decay option.
Definition epsilon_greedy_policy.h:122
real_t eps_value() const noexcept
Returns the value of the epsilon.
Definition epsilon_greedy_policy.h:111
static constexpr real_t MAX_EPS
Definition epsilon_greedy_policy.h:40
void on_episode(uint_t episode_idx) noexcept
any actions the policy should perform on the given episode index
static constexpr real_t EPSILON_DECAY_FACTOR
Definition epsilon_greedy_policy.h:41
output_type get_action(const MatType &q_map, uint_t state_idx)
get_action. Given a
uint_t output_type
The type returned when calling this->operator()
Definition epsilon_greedy_policy.h:37
output_type operator()(const MapType &q_map, uint_t state) const
operator() Select action for the given state
void set_eps_value(real_t eps)
Set the epsilon value.
class MaxTabularPolicy
Definition max_tabular_policy.h:30
static output_type get_action(const MatType &q_map, uint_t state_idx)
get_action. Given a
class RandomTabularPolicy
Definition random_tabular_policy.h:23
double real_t
real_t
Definition bitrl_types.h:23
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
EpsilonDecayOption
The EpsilonDecayOption enum. Enumerate various decaying options.
Definition epsilon_greedy_policy.h:24
Various utilities used when working with RL problems.
Definition cuberl_types.h:16