bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
connect2_env.h
Go to the documentation of this file.
1/*
2 * Connect2 environment from
3 * <a href="https://github.com/JoshVarty/AlphaZeroSimple">AlphaZeroSimple</a>
4 *
5 *
6 *
7 *
8 */
9#ifndef CONNECT2_ENV_H
10#define CONNECT2_ENV_H
11
12#include "bitrl/bitrl_types.h"
13#include "bitrl/envs/env_base.h"
16
17#include <boost/noncopyable.hpp>
18#include <memory>
19#include <string>
20#include <unordered_map>
21#include <vector>
22#include <atomic>
23
24namespace bitrl
25{
26namespace envs
27{
28namespace connect2
29{
30
36class Connect2 final : public EnvBase<TimeStep<std::vector<uint_t>>,
37 DiscreteVectorStateDiscreteActionEnv<53, 0, 4, uint_t>>
38{
39
40 public:
44 static const std::string name;
45
52
57
62
67
72
77
81 using base_type::reset;
82
86 Connect2();
87
92 virtual void
93 make(const std::string &version, const std::unordered_map<std::string, std::any> &options,
94 const std::unordered_map<std::string, std::any> &reset_options) override final;
95
102 virtual time_step_type step(const action_type &action) override final;
103
107 virtual void close() override final;
108
112 virtual time_step_type reset() override final;
113
117 uint_t n_states() const noexcept { return state_space_type::size; }
118
122 uint_t n_actions() const noexcept { return action_space_type::size; }
123
127 time_step_type move(const uint_t pid, const action_type &action);
128
132 bool is_win(uint_t player) const noexcept;
133
137 bool has_legal_moves() const noexcept;
138
142 std::vector<uint_t> get_valid_moves() const;
143
148 static uint_t n_copies() {
149 return n_copies_.load();
150 }
151
152 private:
156 real_t discount_;
157
161 const uint_t player_id_1_{1};
162
166 const uint_t player_id_2_{2};
167
171 const uint_t win_val_{2};
172
177 static std::atomic<uint_t> n_copies_;
178
182 std::vector<uint_t> board_;
183
187 bool is_finished_{false};
188};
189
190inline void Connect2::close()
191{
192 board_ = std::vector<uint_t>();
194}
195
196} // namespace connect2
197} // namespace envs
198} // namespace bitrl
199
200#endif
Base class interface for Reinforcement Learning environments.
Definition env_base.h:30
SpaceType::state_space state_space_type
Type describing the environment state space.
Definition env_base.h:44
void invalidate_is_created_flag_() noexcept
Mark environment as not created.
Definition env_base.h:181
SpaceType::state_type state_type
Type describing an individual state.
Definition env_base.h:47
const std::unordered_map< std::string, std::any > & reset_options() const noexcept
Access the configuration options provided to make().
Definition env_base.h:104
SpaceType::action_space action_space_type
Type describing the environment action space.
Definition env_base.h:50
virtual time_step_type reset()=0
Reset the environment to an initial state using the reset options specified during make.
TimeStepType time_step_type
Alias for the type returned when stepping the environment.
Definition env_base.h:41
SpaceType::action_type action_type
Type representing an individual action.
Definition env_base.h:53
std::string version() const noexcept
Get the environment version set during make().
Definition env_base.h:142
Implementation of Connect2 environment from https://github.com/JoshVarty/AlphaZeroSimple Initially th...
Definition connect2_env.h:38
EnvBase< TimeStep< std::vector< uint_t > >, DiscreteVectorStateDiscreteActionEnv< 53, 0, 4, uint_t > > base_type
The base type.
Definition connect2_env.h:51
uint_t n_actions() const noexcept
n_actions. Returns the number of actions
Definition connect2_env.h:122
bool has_legal_moves() const noexcept
Returns true if there are still legal moves to do.
Definition connect2_env.cpp:89
time_step_type move(const uint_t pid, const action_type &action)
Make a move for the player with the given id.
Definition connect2_env.cpp:103
base_type::state_space_type state_space_type
The type describing the state space for the environment.
Definition connect2_env.h:61
static const std::string name
name
Definition connect2_env.h:44
base_type::action_type action_type
The type of the action to be undertaken in the environment.
Definition connect2_env.h:71
virtual time_step_type step(const action_type &action) override final
step. Move in the environment with the given action This function always moves player_1
Definition connect2_env.cpp:44
base_type::time_step_type time_step_type
The type of the time step.
Definition connect2_env.h:56
Connect2()
Constructor.
Definition connect2_env.cpp:19
virtual void make(const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
make. Builds the environment. Optionally we can choose if the environment will be slippery
Definition connect2_env.cpp:29
std::vector< uint_t > get_valid_moves() const
Get the valid moves.
Definition connect2_env.cpp:72
virtual time_step_type reset() override final
Reset the environment.
Definition connect2_env.cpp:49
base_type::state_type state_type
The state type.
Definition connect2_env.h:76
base_type::action_space_type action_space_type
The type of the action space for the environment.
Definition connect2_env.h:66
static uint_t n_copies()
Definition connect2_env.h:148
uint_t n_states() const noexcept
n_states. Returns the number of states
Definition connect2_env.h:117
bool is_win(uint_t player) const noexcept
Returns true if the player wins.
Definition connect2_env.cpp:58
virtual void close() override final
close
Definition connect2_env.h:190
Definition bitrl_consts.h:14
double real_t
real_t
Definition bitrl_types.h:23
std::size_t uint_t
uint_t
Definition bitrl_types.h:43