bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
multi_armed_bandits.h
Go to the documentation of this file.
1#ifndef MULTI_ARMED_BANDITS_H
2#define MULTI_ARMED_BANDITS_H
3
4#include "bitrl/bitrl_types.h"
8
9#include <any>
10#include <string>
11#include <unordered_map>
12#include <vector>
13#include <atomic>
14
15namespace bitrl
16{
17namespace envs::bandits
18{
19
25{
26
31
36
41
46};
47
53class MultiArmedBandits final : public EnvBase<TimeStep<bool>, MultiArmedBanditsSpace>
54{
55
56public:
60 static const std::string name;
61
66
72
77
82
87
92
97
104 virtual void
105 make(const std::string &version, const std::unordered_map<std::string, std::any> &options,
106 const std::unordered_map<std::string, std::any> &reset_options) override final;
107
111 virtual void close() override final;
112
118 virtual time_step_type reset() override final;
119
124 virtual time_step_type step(const action_type &action) override final;
125
129 uint_t n_actions() const noexcept { return bandits_.size(); }
130
134 real_t success_reward() const noexcept { return success_reward_; }
135
139 real_t fail_reward() const noexcept { return fail_reward_; }
140
145 static uint_t n_copies() {
146 return n_copies_.load();
147 }
148
149private:
153 uint_t seed_;
154
159 static std::atomic<uint_t> n_copies_;
160
164 real_t success_reward_;
165
170 real_t fail_reward_;
171
175 std::vector<utils::maths::stats::BernoulliDist> bandits_;
176};
177
178}
179} // namespace bitrl
180
181#endif
Base class interface for Reinforcement Learning environments.
Definition env_base.h:30
SpaceType::state_space state_space_type
Type describing the environment state space.
Definition env_base.h:44
SpaceType::state_type state_type
Type describing an individual state.
Definition env_base.h:47
const std::unordered_map< std::string, std::any > & reset_options() const noexcept
Access the configuration options provided to make().
Definition env_base.h:104
SpaceType::action_space action_space_type
Type describing the environment action space.
Definition env_base.h:50
TimeStepType time_step_type
Alias for the type returned when stepping the environment.
Definition env_base.h:41
SpaceType::action_type action_type
Type representing an individual action.
Definition env_base.h:53
std::string version() const noexcept
Get the environment version set during make().
Definition env_base.h:142
Definition multi_armed_bandits.h:54
static uint_t n_copies()
Definition multi_armed_bandits.h:145
EnvBase< TimeStep< bool >, MultiArmedBanditsSpace > base_type
The base type.
Definition multi_armed_bandits.h:65
base_type::time_step_type time_step_type
The time step type we return every time a step in the environment is performed.
Definition multi_armed_bandits.h:71
uint_t n_actions() const noexcept
Return the number of actions.
Definition multi_armed_bandits.h:129
real_t fail_reward() const noexcept
Returns the fail reward.
Definition multi_armed_bandits.h:139
base_type::state_space_type state_space_type
The type describing the state space for the environment.
Definition multi_armed_bandits.h:76
virtual time_step_type step(const action_type &action) override final
step in the environment by performing the given action
Definition multi_armed_banditis.cpp:82
real_t success_reward() const noexcept
Returns the sucees reward.
Definition multi_armed_bandits.h:134
base_type::action_type action_type
The type of the action to be undertaken in the environment.
Definition multi_armed_bandits.h:86
virtual void close() override final
close the environment
Definition multi_armed_banditis.cpp:110
base_type::action_space_type action_space_type
The type of the action space for the environment.
Definition multi_armed_bandits.h:81
virtual time_step_type reset() override final
Reset the environment.
Definition multi_armed_banditis.cpp:71
base_type::state_type state_type
The type of the action to be undertaken in the environment.
Definition multi_armed_bandits.h:91
static const std::string name
name
Definition multi_armed_bandits.h:60
virtual void make(const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
make. Builds the environment.
Definition multi_armed_banditis.cpp:25
MultiArmedBandits()
MultiArmedBandits Constructor.
Definition multi_armed_banditis.cpp:19
Definition bitrl_consts.h:14
double real_t
real_t
Definition bitrl_types.h:23
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
Null placeholder.
Definition bitrl_types.h:152
struct MultiArmedBanditsSpace specifies the MultiArmedBandits state-action space
Definition multi_armed_bandits.h:25
Null state_type
The type of the state.
Definition multi_armed_bandits.h:35
Null action_space
The type of the action space for the environment.
Definition multi_armed_bandits.h:40
uint_t action_type
The type of the action to be undertaken in the environment.
Definition multi_armed_bandits.h:45
Null state_space
The type describing the state space for the environment.
Definition multi_armed_bandits.h:30