bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
grid_world_env.h
Go to the documentation of this file.
1#ifndef GRID_WORLD_ENV_H
2#define GRID_WORLD_ENV_H
3
11#include "bitrl/bitrl_config.h"
12#include "bitrl/bitrl_consts.h"
13#include "bitrl/bitrl_types.h"
14#include "bitrl/envs/env_base.h"
18
19#ifdef BITRL_DEBUG
20#include <cassert>
21#endif
22
23#include <any>
24#include <map>
25#include <memory>
26#include <string>
27#include <unordered_map>
28#include <utility>
29#include <vector>
30#include <atomic>
31
32namespace bitrl
33{
34namespace envs::grid_world
35{
36
37enum class GridWorldInitType : int
38{
39 STATIC = 0,
40 RANDOM = 1,
41 PLAYER = 2,
43};
45{
46 UP = 0,
47 DOWN = 1,
48 LEFT = 2,
49 RIGHT = 3,
51};
52
53namespace detail
54{
55
58typedef std::pair<int, int> board_position;
59
63typedef std::vector<std::vector<std::vector<real_t>>> board_state_type;
64
69{
70 PLAYER = 0,
71 GOAL = 1,
72 PIT = 2,
73 WALL = 3
74};
75
80{
81 VALID = 0,
83 LOST_GAME = 2
84};
85
89bool operator==(const board_position &p1, const board_position &p2);
90
94bool operator!=(const board_position &p1, const board_position &p2);
95
100
104int_t max(const board_position &p);
105
109int_t min(const board_position &p);
110
115{
119 std::string name;
120
125
131 board_piece(std::string name_, board_position pos_)
132 : name(std::move(name_)), pos(std::move(pos_))
133 {
134 }
135
139 board_piece() = default;
140};
141
143
147struct board
148{
151 std::map<board_component_type, board_piece> components;
152 std::map<std::string, board_mask> masks;
153 bool is_board_init = {false};
154
159
164
171
177
182 real_t get_reward() const;
183
187 void close();
188
192 void build_static_mode();
193
197 void build_random_mode();
198
203
210 void check_and_move(int_t row, int_t col);
211
218 board_position pos) const;
219};
220
221template <uint_t size_size> struct GridWorldEnv
222{
223
226 static constexpr uint_t STATE_SPACE_SIZE = size_size * size_size;
230};
231} // namespace detail
232
244template <uint_t side_size_>
245class Gridworld final
246 : public EnvBase<TimeStep<detail::board_state_type>, detail::GridWorldEnv<side_size_>>
247{
248 public:
249 static_assert(side_size_ >= 4, "The side size should be greater than or equal to 4");
250
254 static const std::string name;
255
259 static const uint_t n_components;
260
264 static const uint_t side_size;
265
270
276
281
286
291
296
300 using base_type::reset;
301
305 Gridworld();
306
311 void make(const std::string &version, const std::unordered_map<std::string, std::any> &options,
312 const std::unordered_map<std::string, std::any> &reset_options) override final;
313
317 time_step_type reset() override final;
318
324 time_step_type step(const action_type &action) override final;
325
329 void close() override final;
330
335 [[nodiscard]] bool has_random_state() const noexcept { return randomize_state_; }
336
340 [[nodiscard]] uint_t n_states() const noexcept { return side_size_ * side_size_; }
341
345 [[nodiscard]] uint_t n_actions() const noexcept { return action_space_type::size; }
346
351 [[nodiscard]] uint_t seed() const noexcept { return seed_; }
352
357 [[nodiscard]] real_t noise_factor() const noexcept { return noise_factor_; }
358
363 [[nodiscard]] bool is_game_lost() const;
364
369 [[nodiscard]] GridWorldInitType init_type() const noexcept { return init_mode_; }
370
375 static uint_t n_copies() {
376 return n_copies_.load();
377 }
378
379 private:
383 GridWorldInitType init_mode_;
384
388 bool randomize_state_;
389
393 uint_t seed_;
394
399 static std::atomic<uint_t> n_copies_;
400
404 real_t noise_factor_;
405
409 detail::board board_;
410};
411
412template <uint_t side_size> const std::string Gridworld<side_size>::name = "Gridworld";
413
414template <uint_t side_size_> const uint_t Gridworld<side_size_>::side_size = side_size_;
415
416template <uint_t side_size_> const uint_t Gridworld<side_size_>::n_components = 4;
417
418template <uint_t side_size> std::atomic<uint_t> Gridworld<side_size>::n_copies_ = 0;
419
420template <uint_t side_size_>
422 : EnvBase<TimeStep<detail::board_state_type>, detail::GridWorldEnv<side_size_>>(
423 Gridworld<side_size>::name),
424 init_mode_(GridWorldInitType::INVALID_TYPE), randomize_state_(false), seed_(0),
425 noise_factor_(0.0), board_()
426{
427 ++n_copies_;
428}
429
430template <uint_t side_size_>
431void Gridworld<side_size_>::make(const std::string &version,
432 const std::unordered_map<std::string, std::any> &options,
433 const std::unordered_map<std::string, std::any> &reset_options)
434{
435
436 if (this->is_created())
437 {
438 return;
439 }
440
441 if (const auto mode = options.find("mode"); mode != options.end())
442 {
443 init_mode_ = std::any_cast<GridWorldInitType>(mode->second);
444 }
445 else
446 {
447 init_mode_ = GridWorldInitType::STATIC;
448 }
449
450 if (const auto seed = options.find("seed"); seed != options.end())
451 {
452 seed_ = std::any_cast<uint_t>(seed->second);
453 }
454
455 if (const auto noise_factor = options.find("noise_factor"); noise_factor != options.end())
456 {
457 noise_factor_ = std::any_cast<real_t>(noise_factor->second);
458 }
459
460 if (const auto randomize_state = options.find("randomize_state");
461 randomize_state != options.end())
462 {
463 randomize_state_ = std::any_cast<bool>(randomize_state->second);
464 }
465
466 // initialize the board
467 board_.init_board(side_size_, init_mode_);
468
469 // set the version and set the board
470 // to created
471 this->set_version_(version);
472 auto idx = bitrl::utils::uuid4();
473 this->set_idx_(idx);
474 this->base_type::make(version, options, reset_options);
475 this->make_created_();
476}
477
478template <uint_t side_size_>
481{
482
483 auto obs = board_.step(static_cast<GridWorldActionType>(action));
484 auto reward = board_.get_reward();
485
486 // if the reward is not -1.0 then either
487 // we reached the goal or we hit the PIT
488 // in any case the game is over
489 auto step_type = reward != -1.0 ? TimeStepTp::LAST : TimeStepTp::MID;
490
491 this->get_current_time_step_() = time_step_type(step_type, reward, obs);
492 return this->get_current_time_step_();
493}
494
495template <uint_t side_size_>
497{
498
499 // reinitialize the board
500 auto obs = board_.init_board(side_size_, init_mode_);
501 auto reward = board_.get_reward();
502 this->get_current_time_step_() = time_step_type(TimeStepTp::FIRST, reward, obs);
503 return this->get_current_time_step_();
504}
505
506template <uint_t side_size_> bool Gridworld<side_size_>::is_game_lost() const
507{
508
509 auto player = board_.components.find(detail::board_component_type::PLAYER)->second.pos;
510 auto pit_pos = board_.components.find(detail::board_component_type::PIT)->second.pos;
511
512 if (player == pit_pos)
513 {
514 return true;
515 }
516
517 return false;
518}
519
520template <uint_t side_size_> void Gridworld<side_size_>::close() { board_.close(); }
521
522} // namespace envs::grid_world
523} // namespace bitrl
524
525#endif // GRID_WORLD_ENV_H
Base class interface for Reinforcement Learning environments.
Definition env_base.h:30
SpaceType::state_space state_space_type
Type describing the environment state space.
Definition env_base.h:44
SpaceType::state_type state_type
Type describing an individual state.
Definition env_base.h:47
const std::unordered_map< std::string, std::any > & reset_options() const noexcept
Access the configuration options provided to make().
Definition env_base.h:104
SpaceType::action_space action_space_type
Type describing the environment action space.
Definition env_base.h:50
virtual time_step_type reset()=0
Reset the environment to an initial state using the reset options specified during make.
SpaceType::action_type action_type
Type representing an individual action.
Definition env_base.h:53
std::string version() const noexcept
Get the environment version set during make().
Definition env_base.h:142
Definition grid_world_env.h:247
base_type::state_type state_type
The type of the action to be undertaken in the environment.
Definition grid_world_env.h:295
static const std::string name
name
Definition grid_world_env.h:254
bool has_random_state() const noexcept
has_random_state
Definition grid_world_env.h:335
Gridworld()
Constructor.
Definition grid_world_env.h:421
static const uint_t n_components
n_components
Definition grid_world_env.h:259
void make(const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
make. Builds the environment. Optionally we can choose if the environment will be slippery
Definition grid_world_env.h:431
static uint_t n_copies()
Definition grid_world_env.h:375
base_type::action_type action_type
The type of the action to be undertaken in the environment.
Definition grid_world_env.h:290
uint_t n_actions() const noexcept
n_actions. Returns the number of actions
Definition grid_world_env.h:345
GridWorldInitType init_type() const noexcept
init_type
Definition grid_world_env.h:369
static const uint_t side_size
side_size
Definition grid_world_env.h:264
base_type::time_step_type time_step_type
The time step type we return every time a step in the environment is performed.
Definition grid_world_env.h:275
base_type::state_space_type state_space_type
The type describing the state space for the environment.
Definition grid_world_env.h:280
uint_t seed() const noexcept
seed
Definition grid_world_env.h:351
EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > > base_type
The base_type.
Definition grid_world_env.h:269
base_type::action_space_type action_space_type
The type of the action space for the environment.
Definition grid_world_env.h:285
time_step_type reset() override final
Reset the environment.
Definition grid_world_env.h:496
time_step_type step(const action_type &action) override final
step
Definition grid_world_env.h:480
uint_t n_states() const noexcept
n_states. Returns the number of states
Definition grid_world_env.h:340
bool is_game_lost() const
Returns true if the PLAYER position is the same as the PIT position.
Definition grid_world_env.h:506
void close() override final
close
Definition grid_world_env.h:520
real_t noise_factor() const noexcept
noise_factor
Definition grid_world_env.h:357
int_t min(const board_position &p)
Returns the min component of a position.
Definition grid_world_env.cpp:147
board_component_type
The BoardComponentType enum.
Definition grid_world_env.h:69
@ PLAYER
Definition grid_world_env.h:70
@ PIT
Definition grid_world_env.h:72
@ WALL
Definition grid_world_env.h:73
@ GOAL
Definition grid_world_env.h:71
bool operator!=(const board_position &p1, const board_position &p2)
Test if two positions are equal.
Definition grid_world_env.cpp:138
std::pair< int, int > board_position
Models a position on the board.
Definition grid_world_env.h:58
int_t max(const board_position &p)
Returns the max component of a position.
Definition grid_world_env.cpp:145
std::vector< std::vector< std::vector< real_t > > > board_state_type
Array specifying the state of the board.
Definition grid_world_env.h:63
bool operator==(const board_position &p1, const board_position &p2)
Test if two positions are equal.
Definition grid_world_env.cpp:129
board_position operator+(const board_position &p1, const board_position &p2)
Add two positions and return their result.
Definition grid_world_env.cpp:140
board_move_type
The MoveType enum.
Definition grid_world_env.h:80
@ VALID
Definition grid_world_env.h:81
@ LOST_GAME
Definition grid_world_env.h:83
@ INVALID
Definition grid_world_env.h:82
GridWorldActionType
Definition grid_world_env.h:45
GridWorldInitType
Definition grid_world_env.h:38
std::string uuid4()
Definition bitrl_utils.h:22
Definition bitrl_consts.h:14
int int_t
integer type
Definition bitrl_types.h:33
double real_t
real_t
Definition bitrl_types.h:23
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
A scalar discrete space can be used to denote a space that only has a single value at each time....
Definition space_type.h:22
uint_t space_item_type
item_t
Definition space_type.h:27
static constexpr uint_t size
The overall size of the space meaning how many elements can potentially the space have.
Definition space_type.h:38
Definition grid_world_env.h:222
detail::board state_space
Definition grid_world_env.h:224
detail::board_state_type state_type
Definition grid_world_env.h:225
action_space::space_item_type action_type
Definition grid_world_env.h:228
static constexpr uint_t STATE_SPACE_SIZE
Definition grid_world_env.h:226
static constexpr uint_t ACTION_SPACE_SIZE
Definition grid_world_env.h:229
ScalarDiscreteSpace< 0, 4 > action_space
Definition grid_world_env.h:227
The BoardPiece struct.
Definition grid_world_env.h:115
board_piece()=default
Default constructor.
board_position pos
pos 2-tuple e.g. (1,4)
Definition grid_world_env.h:124
std::string name
Name of the piece.
Definition grid_world_env.h:119
board_piece(std::string name_, board_position pos_)
BoardPiece.
Definition grid_world_env.h:131
Definition grid_world_env.h:148
uint_t seed
Definition grid_world_env.h:150
void build_random_mode()
build_random_mode
Definition grid_world_env.cpp:396
void move_piece(board_component_type piece, board_position pos)
move_piece Move the pice to the given position
Definition grid_world_env.cpp:307
void build_player_mode(uint_t seed)
build_player_mode
Definition grid_world_env.cpp:425
void build_static_mode()
build_static_mode
Definition grid_world_env.cpp:386
board_move_type validate_move(board_component_type piece, board_position pos) const
validate_move_
Definition grid_world_env.cpp:329
uint_t board_size
Definition grid_world_env.h:149
board_state_type init_board(uint_t board_s, GridWorldInitType init_type)
initialize the board
Definition grid_world_env.cpp:156
void check_and_move(int_t row, int_t col)
check if the given move is valid and change the position of the player if the move either causes the ...
Definition grid_world_env.cpp:367
board_state_type get_state() const
get_state. Returns the state of the board
Definition grid_world_env.cpp:267
board_state_type step(GridWorldActionType action)
Execute the action on the board.
Definition grid_world_env.cpp:203
bool is_board_init
Definition grid_world_env.h:153
real_t get_reward() const
Get the reward the board currently returns depending on the position of the player.
Definition grid_world_env.cpp:244
void close()
close
Definition grid_world_env.cpp:149
std::map< std::string, board_mask > masks
Definition grid_world_env.h:152
std::map< board_component_type, board_piece > components
Definition grid_world_env.h:151