1#ifndef GRID_WORLD_ENV_H
2#define GRID_WORLD_ENV_H
11#include "bitrl/bitrl_config.h"
27#include <unordered_map>
34namespace envs::grid_world
132 :
name(std::move(name_)),
pos(std::move(pos_))
152 std::map<std::string, board_mask>
masks;
244template <u
int_t s
ide_size_>
246 :
public EnvBase<TimeStep<detail::board_state_type>, detail::GridWorldEnv<side_size_>>
249 static_assert(side_size_ >= 4,
"The side size should be greater than or equal to 4");
311 void make(
const std::string &
version,
const std::unordered_map<std::string, std::any> &options,
312 const std::unordered_map<std::string, std::any> &
reset_options)
override final;
329 void close() override final;
340 [[nodiscard]]
uint_t n_states() const noexcept {
return side_size_ * side_size_; }
376 return n_copies_.load();
388 bool randomize_state_;
399 static std::atomic<uint_t> n_copies_;
418template <u
int_t s
ide_size> std::atomic<uint_t> Gridworld<side_size>::n_copies_ = 0;
420template <u
int_t s
ide_size_>
422 :
EnvBase<
TimeStep<detail::board_state_type>, detail::GridWorldEnv<side_size_>>(
425 noise_factor_(0.0), board_()
430template <u
int_t s
ide_size_>
432 const std::unordered_map<std::string, std::any> &options,
433 const std::unordered_map<std::string, std::any> &reset_options)
436 if (this->is_created())
441 if (
const auto mode = options.find(
"mode"); mode != options.end())
443 init_mode_ = std::any_cast<GridWorldInitType>(mode->second);
450 if (
const auto seed = options.find(
"seed"); seed != options.end())
452 seed_ = std::any_cast<uint_t>(seed->second);
455 if (
const auto noise_factor = options.find(
"noise_factor"); noise_factor != options.end())
457 noise_factor_ = std::any_cast<real_t>(noise_factor->second);
460 if (
const auto randomize_state = options.find(
"randomize_state");
461 randomize_state != options.end())
463 randomize_state_ = std::any_cast<bool>(randomize_state->second);
467 board_.init_board(side_size_, init_mode_);
471 this->set_version_(version);
474 this->base_type::make(version, options, reset_options);
475 this->make_created_();
478template <u
int_t s
ide_size_>
484 auto reward = board_.get_reward();
491 this->get_current_time_step_() =
time_step_type(step_type, reward, obs);
492 return this->get_current_time_step_();
495template <u
int_t s
ide_size_>
500 auto obs = board_.init_board(side_size_, init_mode_);
501 auto reward = board_.get_reward();
503 return this->get_current_time_step_();
512 if (player == pit_pos)
Base class interface for Reinforcement Learning environments.
Definition env_base.h:30
SpaceType::state_space state_space_type
Type describing the environment state space.
Definition env_base.h:44
SpaceType::state_type state_type
Type describing an individual state.
Definition env_base.h:47
const std::unordered_map< std::string, std::any > & reset_options() const noexcept
Access the configuration options provided to make().
Definition env_base.h:104
SpaceType::action_space action_space_type
Type describing the environment action space.
Definition env_base.h:50
virtual time_step_type reset()=0
Reset the environment to an initial state using the reset options specified during make.
SpaceType::action_type action_type
Type representing an individual action.
Definition env_base.h:53
std::string version() const noexcept
Get the environment version set during make().
Definition env_base.h:142
Definition grid_world_env.h:247
base_type::state_type state_type
The type of the action to be undertaken in the environment.
Definition grid_world_env.h:295
static const std::string name
name
Definition grid_world_env.h:254
bool has_random_state() const noexcept
has_random_state
Definition grid_world_env.h:335
Gridworld()
Constructor.
Definition grid_world_env.h:421
static const uint_t n_components
n_components
Definition grid_world_env.h:259
void make(const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
make. Builds the environment. Optionally we can choose if the environment will be slippery
Definition grid_world_env.h:431
static uint_t n_copies()
Definition grid_world_env.h:375
base_type::action_type action_type
The type of the action to be undertaken in the environment.
Definition grid_world_env.h:290
uint_t n_actions() const noexcept
n_actions. Returns the number of actions
Definition grid_world_env.h:345
GridWorldInitType init_type() const noexcept
init_type
Definition grid_world_env.h:369
static const uint_t side_size
side_size
Definition grid_world_env.h:264
base_type::time_step_type time_step_type
The time step type we return every time a step in the environment is performed.
Definition grid_world_env.h:275
base_type::state_space_type state_space_type
The type describing the state space for the environment.
Definition grid_world_env.h:280
uint_t seed() const noexcept
seed
Definition grid_world_env.h:351
EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > > base_type
The base_type.
Definition grid_world_env.h:269
base_type::action_space_type action_space_type
The type of the action space for the environment.
Definition grid_world_env.h:285
time_step_type reset() override final
Reset the environment.
Definition grid_world_env.h:496
time_step_type step(const action_type &action) override final
step
Definition grid_world_env.h:480
uint_t n_states() const noexcept
n_states. Returns the number of states
Definition grid_world_env.h:340
bool is_game_lost() const
Returns true if the PLAYER position is the same as the PIT position.
Definition grid_world_env.h:506
void close() override final
close
Definition grid_world_env.h:520
real_t noise_factor() const noexcept
noise_factor
Definition grid_world_env.h:357
int_t min(const board_position &p)
Returns the min component of a position.
Definition grid_world_env.cpp:147
board_component_type
The BoardComponentType enum.
Definition grid_world_env.h:69
@ PLAYER
Definition grid_world_env.h:70
@ PIT
Definition grid_world_env.h:72
@ WALL
Definition grid_world_env.h:73
@ GOAL
Definition grid_world_env.h:71
bool operator!=(const board_position &p1, const board_position &p2)
Test if two positions are equal.
Definition grid_world_env.cpp:138
std::pair< int, int > board_position
Models a position on the board.
Definition grid_world_env.h:58
int_t max(const board_position &p)
Returns the max component of a position.
Definition grid_world_env.cpp:145
std::vector< std::vector< std::vector< real_t > > > board_state_type
Array specifying the state of the board.
Definition grid_world_env.h:63
bool operator==(const board_position &p1, const board_position &p2)
Test if two positions are equal.
Definition grid_world_env.cpp:129
board_position operator+(const board_position &p1, const board_position &p2)
Add two positions and return their result.
Definition grid_world_env.cpp:140
board_move_type
The MoveType enum.
Definition grid_world_env.h:80
@ VALID
Definition grid_world_env.h:81
@ LOST_GAME
Definition grid_world_env.h:83
@ INVALID
Definition grid_world_env.h:82
GridWorldActionType
Definition grid_world_env.h:45
GridWorldInitType
Definition grid_world_env.h:38
std::string uuid4()
Definition bitrl_utils.h:22
Definition bitrl_consts.h:14
int int_t
integer type
Definition bitrl_types.h:33
double real_t
real_t
Definition bitrl_types.h:23
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
A scalar discrete space can be used to denote a space that only has a single value at each time....
Definition space_type.h:22
uint_t space_item_type
item_t
Definition space_type.h:27
static constexpr uint_t size
The overall size of the space meaning how many elements can potentially the space have.
Definition space_type.h:38
Definition grid_world_env.h:222
detail::board state_space
Definition grid_world_env.h:224
detail::board_state_type state_type
Definition grid_world_env.h:225
action_space::space_item_type action_type
Definition grid_world_env.h:228
static constexpr uint_t STATE_SPACE_SIZE
Definition grid_world_env.h:226
static constexpr uint_t ACTION_SPACE_SIZE
Definition grid_world_env.h:229
ScalarDiscreteSpace< 0, 4 > action_space
Definition grid_world_env.h:227
The BoardPiece struct.
Definition grid_world_env.h:115
board_piece()=default
Default constructor.
board_position pos
pos 2-tuple e.g. (1,4)
Definition grid_world_env.h:124
std::string name
Name of the piece.
Definition grid_world_env.h:119
board_piece(std::string name_, board_position pos_)
BoardPiece.
Definition grid_world_env.h:131
Definition grid_world_env.h:148
uint_t seed
Definition grid_world_env.h:150
void build_random_mode()
build_random_mode
Definition grid_world_env.cpp:396
void move_piece(board_component_type piece, board_position pos)
move_piece Move the pice to the given position
Definition grid_world_env.cpp:307
void build_player_mode(uint_t seed)
build_player_mode
Definition grid_world_env.cpp:425
void build_static_mode()
build_static_mode
Definition grid_world_env.cpp:386
board_move_type validate_move(board_component_type piece, board_position pos) const
validate_move_
Definition grid_world_env.cpp:329
uint_t board_size
Definition grid_world_env.h:149
board_state_type init_board(uint_t board_s, GridWorldInitType init_type)
initialize the board
Definition grid_world_env.cpp:156
void check_and_move(int_t row, int_t col)
check if the given move is valid and change the position of the player if the move either causes the ...
Definition grid_world_env.cpp:367
board_state_type get_state() const
get_state. Returns the state of the board
Definition grid_world_env.cpp:267
board_state_type step(GridWorldActionType action)
Execute the action on the board.
Definition grid_world_env.cpp:203
bool is_board_init
Definition grid_world_env.h:153
real_t get_reward() const
Get the reward the board currently returns depending on the position of the player.
Definition grid_world_env.cpp:244
void close()
close
Definition grid_world_env.cpp:149
std::map< std::string, board_mask > masks
Definition grid_world_env.h:152
std::map< board_component_type, board_piece > components
Definition grid_world_env.h:151