bitrl_cuberl_docs/grid__world__env_8h_source.html

#ifndef GRID_WORLD_ENV_H

#define GRID_WORLD_ENV_H


#include "bitrl/bitrl_config.h"

#include "bitrl/bitrl_consts.h"

#include "bitrl/bitrl_types.h"

#include "bitrl/envs/env_base.h"

#include "bitrl/envs/space_type.h"

#include "bitrl/envs/time_step.h"

#include "bitrl/utils/bitrl_utils.h"


#ifdef BITRL_DEBUG

#include <cassert>

#endif


#include <any>

#include <map>

#include <memory>

#include <string>

#include <unordered_map>

#include <utility>

#include <vector>

#include <atomic>


namespace bitrl

{

namespace envs::grid_world

{


enum class GridWorldInitType : int

{

    STATIC = 0,

    RANDOM = 1,

    PLAYER = 2,

    INVALID_TYPE

};


enum class GridWorldActionType : uint_t

{

    UP = 0,

    DOWN = 1,

    LEFT = 2,

    RIGHT = 3,

    INVALID_TYPE

};


namespace detail

{


typedef std::pair<int, int> board_position;


typedef std::vector<std::vector<std::vector<real_t>>> board_state_type;


enum board_component_type

{

    PLAYER = 0,

    GOAL = 1,

    PIT = 2,

    WALL = 3

};


enum board_move_type

{

    VALID = 0,

    INVALID = 1,

    LOST_GAME = 2

};


bool operator==(const board_position &p1, const board_position &p2);


bool operator!=(const board_position &p1, const board_position &p2);


board_position operator+(const board_position &p1, const board_position &p2);


int_t max(const board_position &p);


int_t min(const board_position &p);


struct board_piece

{

    std::string name;


    board_position pos;


    board_piece(std::string name_, board_position pos_)

        : name(std::move(name_)), pos(std::move(pos_))

    {

    }


    board_piece() = default;

};


using board_mask = board_piece;


struct board

{

    uint_t board_size;

    uint_t seed = 42;

    std::map<board_component_type, board_piece> components;

    std::map<std::string, board_mask> masks;

    bool is_board_init = {false};


    board_state_type init_board(uint_t board_s, GridWorldInitType init_type);


    board_state_type step(GridWorldActionType action);


    void move_piece(board_component_type piece, board_position pos);


    board_state_type get_state() const;


    real_t get_reward() const;


    void close();


    void build_static_mode();


    void build_random_mode();


    void build_player_mode(uint_t seed);


    void check_and_move(int_t row, int_t col);


    [[nodiscard]] board_move_type validate_move(board_component_type piece,

                                                board_position pos) const;

};


template <uint_t size_size> struct GridWorldEnv

{


    typedef detail::board state_space;

    typedef detail::board_state_type state_type;

    static constexpr uint_t STATE_SPACE_SIZE = size_size * size_size;

    typedef ScalarDiscreteSpace<0, 4> action_space;

    typedef action_space::space_item_type action_type;

    static constexpr uint_t ACTION_SPACE_SIZE = action_space::size;

};


} // namespace detail


template <uint_t side_size_>


class Gridworld final

    : public EnvBase<TimeStep<detail::board_state_type>, detail::GridWorldEnv<side_size_>>

{

  public:

    static_assert(side_size_ >= 4, "The side size should be greater than or equal to 4");


    static const std::string name;


    static const uint_t n_components;


    static const uint_t side_size;


    typedef EnvBase<TimeStep<detail::board_state_type>, detail::GridWorldEnv<side_size_>> base_type;


    typedef typename base_type::time_step_type time_step_type;


    typedef typename base_type::state_space_type state_space_type;


    typedef typename base_type::action_space_type action_space_type;


    typedef typename base_type::action_type action_type;


    typedef typename base_type::state_type state_type;


    using base_type::reset;


    Gridworld();


    void make(const std::string &version, const std::unordered_map<std::string, std::any> &options,

              const std::unordered_map<std::string, std::any> &reset_options) override final;


    time_step_type reset() override final;


    time_step_type step(const action_type &action) override final;


    void close() override final;


    [[nodiscard]] bool has_random_state() const noexcept { return randomize_state_; }


    [[nodiscard]] uint_t n_states() const noexcept { return side_size_ * side_size_; }


    [[nodiscard]] uint_t n_actions() const noexcept { return action_space_type::size; }


    [[nodiscard]] uint_t seed() const noexcept { return seed_; }


    [[nodiscard]] real_t noise_factor() const noexcept { return noise_factor_; }


    [[nodiscard]] bool is_game_lost() const;


    [[nodiscard]] GridWorldInitType init_type() const noexcept { return init_mode_; }


    static uint_t n_copies() {

        return n_copies_.load();

    }


  private:

    GridWorldInitType init_mode_;


    bool randomize_state_;


    uint_t seed_;


    static std::atomic<uint_t> n_copies_;


    real_t noise_factor_;


    detail::board board_;

};


template <uint_t side_size> const std::string Gridworld<side_size>::name = "Gridworld";


template <uint_t side_size_> const uint_t Gridworld<side_size_>::side_size = side_size_;


template <uint_t side_size_> const uint_t Gridworld<side_size_>::n_components = 4;


template <uint_t side_size> std::atomic<uint_t> Gridworld<side_size>::n_copies_ = 0;


template <uint_t side_size_>


Gridworld<side_size_>::Gridworld()

    : EnvBase<TimeStep<detail::board_state_type>, detail::GridWorldEnv<side_size_>>(

          Gridworld<side_size>::name),

      init_mode_(GridWorldInitType::INVALID_TYPE), randomize_state_(false), seed_(0),

      noise_factor_(0.0), board_()

{

    ++n_copies_;

}


template <uint_t side_size_>


void Gridworld<side_size_>::make(const std::string &version,

                                 const std::unordered_map<std::string, std::any> &options,

                                 const std::unordered_map<std::string, std::any> &reset_options)

{


    if (this->is_created())

    {

        return;

    }


    if (const auto mode = options.find("mode"); mode != options.end())

    {

        init_mode_ = std::any_cast<GridWorldInitType>(mode->second);

    }

    else

    {

        init_mode_ = GridWorldInitType::STATIC;

    }


    if (const auto seed = options.find("seed"); seed != options.end())

    {

        seed_ = std::any_cast<uint_t>(seed->second);

    }


    if (const auto noise_factor = options.find("noise_factor"); noise_factor != options.end())

    {

        noise_factor_ = std::any_cast<real_t>(noise_factor->second);

    }


    if (const auto randomize_state = options.find("randomize_state");

        randomize_state != options.end())

    {

        randomize_state_ = std::any_cast<bool>(randomize_state->second);

    }


    // initialize the board

    board_.init_board(side_size_, init_mode_);


    // set the version and set the board

    // to created

    this->set_version_(version);

    auto idx = bitrl::utils::uuid4();

    this->set_idx_(idx);

    this->base_type::make(version, options, reset_options);

    this->make_created_();

}


template <uint_t side_size_>

typename Gridworld<side_size_>::time_step_type


Gridworld<side_size_>::step(const action_type &action)

{


    auto obs = board_.step(static_cast<GridWorldActionType>(action));

    auto reward = board_.get_reward();


    // if the reward is not -1.0 then either

    // we reached the goal or we hit the PIT

    // in any case the game is over

    auto step_type = reward != -1.0 ? TimeStepTp::LAST : TimeStepTp::MID;


    this->get_current_time_step_() = time_step_type(step_type, reward, obs);

    return this->get_current_time_step_();

}


template <uint_t side_size_>


typename Gridworld<side_size_>::time_step_type Gridworld<side_size_>::reset()

{


    // reinitialize the board

    auto obs = board_.init_board(side_size_, init_mode_);

    auto reward = board_.get_reward();

    this->get_current_time_step_() = time_step_type(TimeStepTp::FIRST, reward, obs);

    return this->get_current_time_step_();

}


template <uint_t side_size_> bool Gridworld<side_size_>::is_game_lost() const

{


    auto player = board_.components.find(detail::board_component_type::PLAYER)->second.pos;

    auto pit_pos = board_.components.find(detail::board_component_type::PIT)->second.pos;


    if (player == pit_pos)

    {

        return true;

    }


    return false;

}


template <uint_t side_size_> void Gridworld<side_size_>::close() { board_.close(); }


} // namespace envs::grid_world

} // namespace bitrl


#endif // GRID_WORLD_ENV_H

bitrl_consts.h

bitrl_types.h

bitrl_utils.h

bitrl::TimeStep< detail::board_state_type >

bitrl::envs::EnvBase
Base class interface for Reinforcement Learning environments.
Definition env_base.h:30

bitrl::envs::EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > >::state_space_type
SpaceType::state_space state_space_type
Type describing the environment state space.
Definition env_base.h:44

bitrl::envs::EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > >::state_type
SpaceType::state_type state_type
Type describing an individual state.
Definition env_base.h:47

bitrl::envs::EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > >::reset_options
const std::unordered_map< std::string, std::any > & reset_options() const noexcept
Access the configuration options provided to make().
Definition env_base.h:104

bitrl::envs::EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > >::action_space_type
SpaceType::action_space action_space_type
Type describing the environment action space.
Definition env_base.h:50

bitrl::envs::EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > >::reset
virtual time_step_type reset()=0
Reset the environment to an initial state using the reset options specified during make.

bitrl::envs::EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > >::action_type
SpaceType::action_type action_type
Type representing an individual action.
Definition env_base.h:53

bitrl::envs::EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > >::version
std::string version() const noexcept
Get the environment version set during make().
Definition env_base.h:142

bitrl::envs::grid_world::Gridworld
Definition grid_world_env.h:247

bitrl::envs::grid_world::Gridworld::state_type
base_type::state_type state_type
The type of the action to be undertaken in the environment.
Definition grid_world_env.h:295

bitrl::envs::grid_world::Gridworld::name
static const std::string name
name
Definition grid_world_env.h:254

bitrl::envs::grid_world::Gridworld::has_random_state
bool has_random_state() const noexcept
has_random_state
Definition grid_world_env.h:335

bitrl::envs::grid_world::Gridworld::Gridworld
Gridworld()
Constructor.
Definition grid_world_env.h:421

bitrl::envs::grid_world::Gridworld::n_components
static const uint_t n_components
n_components
Definition grid_world_env.h:259

bitrl::envs::grid_world::Gridworld::make
void make(const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
make. Builds the environment. Optionally we can choose if the environment will be slippery
Definition grid_world_env.h:431

bitrl::envs::grid_world::Gridworld::n_copies
static uint_t n_copies()
Definition grid_world_env.h:375

bitrl::envs::grid_world::Gridworld::action_type
base_type::action_type action_type
The type of the action to be undertaken in the environment.
Definition grid_world_env.h:290

bitrl::envs::grid_world::Gridworld::n_actions
uint_t n_actions() const noexcept
n_actions. Returns the number of actions
Definition grid_world_env.h:345

bitrl::envs::grid_world::Gridworld::init_type
GridWorldInitType init_type() const noexcept
init_type
Definition grid_world_env.h:369

bitrl::envs::grid_world::Gridworld::side_size
static const uint_t side_size
side_size
Definition grid_world_env.h:264

bitrl::envs::grid_world::Gridworld::time_step_type
base_type::time_step_type time_step_type
The time step type we return every time a step in the environment is performed.
Definition grid_world_env.h:275

bitrl::envs::grid_world::Gridworld::state_space_type
base_type::state_space_type state_space_type
The type describing the state space for the environment.
Definition grid_world_env.h:280

bitrl::envs::grid_world::Gridworld::seed
uint_t seed() const noexcept
seed
Definition grid_world_env.h:351

bitrl::envs::grid_world::Gridworld::base_type
EnvBase< TimeStep< detail::board_state_type >, detail::GridWorldEnv< side_size_ > > base_type
The base_type.
Definition grid_world_env.h:269

bitrl::envs::grid_world::Gridworld::action_space_type
base_type::action_space_type action_space_type
The type of the action space for the environment.
Definition grid_world_env.h:285

bitrl::envs::grid_world::Gridworld::reset
time_step_type reset() override final
Reset the environment.
Definition grid_world_env.h:496

bitrl::envs::grid_world::Gridworld::step
time_step_type step(const action_type &action) override final
step
Definition grid_world_env.h:480

bitrl::envs::grid_world::Gridworld::n_states
uint_t n_states() const noexcept
n_states. Returns the number of states
Definition grid_world_env.h:340

bitrl::envs::grid_world::Gridworld::is_game_lost
bool is_game_lost() const
Returns true if the PLAYER position is the same as the PIT position.
Definition grid_world_env.h:506

bitrl::envs::grid_world::Gridworld::close
void close() override final
close
Definition grid_world_env.h:520

bitrl::envs::grid_world::Gridworld::noise_factor
real_t noise_factor() const noexcept
noise_factor
Definition grid_world_env.h:357

env_base.h

bitrl::envs::grid_world::detail::min
int_t min(const board_position &p)
Returns the min component of a position.
Definition grid_world_env.cpp:147

bitrl::envs::grid_world::detail::board_component_type
board_component_type
The BoardComponentType enum.
Definition grid_world_env.h:69

bitrl::envs::grid_world::detail::PLAYER
@ PLAYER
Definition grid_world_env.h:70

bitrl::envs::grid_world::detail::PIT
@ PIT
Definition grid_world_env.h:72

bitrl::envs::grid_world::detail::WALL
@ WALL
Definition grid_world_env.h:73

bitrl::envs::grid_world::detail::GOAL
@ GOAL
Definition grid_world_env.h:71

bitrl::envs::grid_world::detail::operator!=
bool operator!=(const board_position &p1, const board_position &p2)
Test if two positions are equal.
Definition grid_world_env.cpp:138

bitrl::envs::grid_world::detail::board_position
std::pair< int, int > board_position
Models a position on the board.
Definition grid_world_env.h:58

bitrl::envs::grid_world::detail::max
int_t max(const board_position &p)
Returns the max component of a position.
Definition grid_world_env.cpp:145

bitrl::envs::grid_world::detail::board_state_type
std::vector< std::vector< std::vector< real_t > > > board_state_type
Array specifying the state of the board.
Definition grid_world_env.h:63

bitrl::envs::grid_world::detail::operator==
bool operator==(const board_position &p1, const board_position &p2)
Test if two positions are equal.
Definition grid_world_env.cpp:129

bitrl::envs::grid_world::detail::operator+
board_position operator+(const board_position &p1, const board_position &p2)
Add two positions and return their result.
Definition grid_world_env.cpp:140

bitrl::envs::grid_world::detail::board_move_type
board_move_type
The MoveType enum.
Definition grid_world_env.h:80

bitrl::envs::grid_world::detail::VALID
@ VALID
Definition grid_world_env.h:81

bitrl::envs::grid_world::detail::LOST_GAME
@ LOST_GAME
Definition grid_world_env.h:83

bitrl::envs::grid_world::detail::INVALID
@ INVALID
Definition grid_world_env.h:82

bitrl::envs::grid_world::GridWorldActionType
GridWorldActionType
Definition grid_world_env.h:45

bitrl::envs::grid_world::GridWorldActionType::RIGHT
@ RIGHT

bitrl::envs::grid_world::GridWorldActionType::LEFT
@ LEFT

bitrl::envs::grid_world::GridWorldActionType::DOWN
@ DOWN

bitrl::envs::grid_world::GridWorldActionType::UP
@ UP

bitrl::envs::grid_world::GridWorldInitType
GridWorldInitType
Definition grid_world_env.h:38

bitrl::envs::grid_world::GridWorldInitType::PLAYER
@ PLAYER

bitrl::envs::grid_world::GridWorldInitType::INVALID_TYPE
@ INVALID_TYPE

bitrl::envs::grid_world::GridWorldInitType::RANDOM
@ RANDOM

bitrl::envs::grid_world::GridWorldInitType::STATIC
@ STATIC

bitrl::utils::uuid4
std::string uuid4()
Definition bitrl_utils.h:22

bitrl
Definition bitrl_consts.h:14

bitrl::int_t
int int_t
integer type
Definition bitrl_types.h:33

bitrl::real_t
double real_t
real_t
Definition bitrl_types.h:23

bitrl::uint_t
std::size_t uint_t
uint_t
Definition bitrl_types.h:43

bitrl::TimeStepTp::MID
@ MID

bitrl::TimeStepTp::FIRST
@ FIRST

bitrl::TimeStepTp::LAST
@ LAST

space_type.h

bitrl::envs::ScalarDiscreteSpace
A scalar discrete space can be used to denote a space that only has a single value at each time....
Definition space_type.h:22

bitrl::envs::ScalarDiscreteSpace::space_item_type
uint_t space_item_type
item_t
Definition space_type.h:27

bitrl::envs::ScalarDiscreteSpace::size
static constexpr uint_t size
The overall size of the space meaning how many elements can potentially the space have.
Definition space_type.h:38

bitrl::envs::grid_world::detail::GridWorldEnv
Definition grid_world_env.h:222

bitrl::envs::grid_world::detail::GridWorldEnv::state_space
detail::board state_space
Definition grid_world_env.h:224

bitrl::envs::grid_world::detail::GridWorldEnv::state_type
detail::board_state_type state_type
Definition grid_world_env.h:225

bitrl::envs::grid_world::detail::GridWorldEnv::action_type
action_space::space_item_type action_type
Definition grid_world_env.h:228

bitrl::envs::grid_world::detail::GridWorldEnv::STATE_SPACE_SIZE
static constexpr uint_t STATE_SPACE_SIZE
Definition grid_world_env.h:226

bitrl::envs::grid_world::detail::GridWorldEnv::ACTION_SPACE_SIZE
static constexpr uint_t ACTION_SPACE_SIZE
Definition grid_world_env.h:229

bitrl::envs::grid_world::detail::GridWorldEnv::action_space
ScalarDiscreteSpace< 0, 4 > action_space
Definition grid_world_env.h:227

bitrl::envs::grid_world::detail::board_piece
The BoardPiece struct.
Definition grid_world_env.h:115

bitrl::envs::grid_world::detail::board_piece::board_piece
board_piece()=default
Default constructor.

bitrl::envs::grid_world::detail::board_piece::pos
board_position pos
pos 2-tuple e.g. (1,4)
Definition grid_world_env.h:124

bitrl::envs::grid_world::detail::board_piece::name
std::string name
Name of the piece.
Definition grid_world_env.h:119

bitrl::envs::grid_world::detail::board_piece::board_piece
board_piece(std::string name_, board_position pos_)
BoardPiece.
Definition grid_world_env.h:131

bitrl::envs::grid_world::detail::board
Definition grid_world_env.h:148

bitrl::envs::grid_world::detail::board::seed
uint_t seed
Definition grid_world_env.h:150

bitrl::envs::grid_world::detail::board::build_random_mode
void build_random_mode()
build_random_mode
Definition grid_world_env.cpp:396

bitrl::envs::grid_world::detail::board::move_piece
void move_piece(board_component_type piece, board_position pos)
move_piece Move the pice to the given position
Definition grid_world_env.cpp:307

bitrl::envs::grid_world::detail::board::build_player_mode
void build_player_mode(uint_t seed)
build_player_mode
Definition grid_world_env.cpp:425

bitrl::envs::grid_world::detail::board::build_static_mode
void build_static_mode()
build_static_mode
Definition grid_world_env.cpp:386

bitrl::envs::grid_world::detail::board::validate_move
board_move_type validate_move(board_component_type piece, board_position pos) const
validate_move_
Definition grid_world_env.cpp:329

bitrl::envs::grid_world::detail::board::board_size
uint_t board_size
Definition grid_world_env.h:149

bitrl::envs::grid_world::detail::board::init_board
board_state_type init_board(uint_t board_s, GridWorldInitType init_type)
initialize the board
Definition grid_world_env.cpp:156

bitrl::envs::grid_world::detail::board::check_and_move
void check_and_move(int_t row, int_t col)
check if the given move is valid and change the position of the player if the move either causes the ...
Definition grid_world_env.cpp:367

bitrl::envs::grid_world::detail::board::get_state
board_state_type get_state() const
get_state. Returns the state of the board
Definition grid_world_env.cpp:267

bitrl::envs::grid_world::detail::board::step
board_state_type step(GridWorldActionType action)
Execute the action on the board.
Definition grid_world_env.cpp:203

bitrl::envs::grid_world::detail::board::is_board_init
bool is_board_init
Definition grid_world_env.h:153

bitrl::envs::grid_world::detail::board::get_reward
real_t get_reward() const
Get the reward the board currently returns depending on the position of the player.
Definition grid_world_env.cpp:244

bitrl::envs::grid_world::detail::board::close
void close()
close
Definition grid_world_env.cpp:149

bitrl::envs::grid_world::detail::board::masks
std::map< std::string, board_mask > masks
Definition grid_world_env.h:152

bitrl::envs::grid_world::detail::board::components
std::map< board_component_type, board_piece > components
Definition grid_world_env.h:151

time_step.h