bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
bitrl::envs::gymnasium::BlackJack Class Referencefinal

#include <black_jack_env.h>

Inheritance diagram for bitrl::envs::gymnasium::BlackJack:
Collaboration diagram for bitrl::envs::gymnasium::BlackJack:

Public Types

typedef ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >::base_type base_type
 The base type.
 
typedef base_type::time_step_type time_step_type
 The time step type we return every time a step in the environment is performed.
 
typedef base_type::state_space_type state_space_type
 The type describing the state space for the environment.
 
typedef base_type::action_space_type action_space_type
 The type of the action space for the environment.
 
typedef base_type::action_type action_type
 The type of the action to be undertaken in the environment.
 
typedef base_type::state_type state_type
 The type of the action to be undertaken in the environment.
 
- Public Types inherited from bitrl::envs::gymnasium::ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >
typedef GymnasiumEnvBase< TimeStep< std::vector< uint_t > >, ScalarDiscreteEnv< state_end, action_end, 0, 0 > >::base_type base_type
 The base_type.
 
typedef base_type::time_step_type time_step_type
 The time step type we return every time a step in the environment is performed.
 
typedef base_type::state_space_type state_space_type
 The type describing the state space for the environment.
 
typedef base_type::action_space_type action_space_type
 The type of the action space for the environment.
 
typedef base_type::action_type action_type
 The type of the action to be undertaken in the environment.
 
typedef base_type::state_type state_type
 The type of the state.
 
typedef std::vector< std::tuple< real_t, uint_t, real_t, bool > > dynamics_t
 dynamics_t
 
- Public Types inherited from bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >
typedef EnvBase< TimeStepType, SpaceType > base_type
 Base environment type alias.
 
typedef base_type::time_step_type time_step_type
 Time step returned at each environment step.
 
typedef base_type::state_space_type state_space_type
 Type describing the observation/state space of the environment.
 
typedef base_type::action_space_type action_space_type
 Type describing the action space of the environment.
 
typedef base_type::action_type action_type
 Type representing a valid action to execute.
 
typedef base_type::state_type state_type
 Type representing a state/observation returned by the environment.
 
- Public Types inherited from bitrl::envs::EnvBase< TimeStepType, SpaceType >
typedef TimeStepType time_step_type
 Alias for the type returned when stepping the environment.
 
typedef SpaceType::state_space state_space_type
 Type describing the environment state space.
 
typedef SpaceType::state_type state_type
 Type describing an individual state.
 
typedef SpaceType::action_space action_space_type
 Type describing the environment action space.
 
typedef SpaceType::action_type action_type
 Type representing an individual action.
 

Public Member Functions

 BlackJack (network::RESTRLEnvClient &api_server)
 
 BlackJack (const BlackJack &other)
 
 ~BlackJack () override=default
 
virtual void make (const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
 make. Builds the environment. Optionally we can choose if the environment will be slippery
 
virtual time_step_type step (const action_type &action) override final
 step
 
bool is_natural () const noexcept
 
bool is_sab () const noexcept
 
- Public Member Functions inherited from bitrl::envs::gymnasium::ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >
virtual ~ToyTextEnvBase ()=default
 ~FrozenLake. Destructor.
 
dynamics_t p (uint_t sidx, uint_t aidx) const
 P.
 
uint_t n_actions () const noexcept
 n_actions. Returns the number of actions
 
uint_t n_states () const noexcept
 Number of states.
 
- Public Member Functions inherited from bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >
virtual ~GymnasiumEnvBase ()
 Virtual destructor.
 
virtual bool is_alive () const
 Check whether the environment is still alive/connected.
 
virtual void close () override
 Close the environment on the server and release any resources.
 
virtual time_step_type reset () override
 Reset the environment to an initial state using the reset options specified during make.
 
uint_t n_copies () const
 
network::RESTRLEnvClientget_api_server () const
 Retrieve the REST API wrapper instance used for communication.
 
std::string get_url () const
 Get the full URL for this environment endpoint on the server.
 
virtual time_step_type reset ()=0
 Import the reset() overloads from the base class.
 
- Public Member Functions inherited from bitrl::envs::EnvBase< TimeStepType, SpaceType >
virtual ~EnvBase ()=default
 Virtual destructor.
 
virtual time_step_type step (const action_type &action)=0
 Perform one step in the environment using an action.
 
const std::unordered_map< std::string, std::any > & make_options () const noexcept
 Access the configuration options provided to make().
 
const std::unordered_map< std::string, std::any > & reset_options () const noexcept
 Access the configuration options provided to make().
 
template<typename T >
read_option (const std::string &op_name) const
 Read a specific make() option and cast it to the requested type.
 
std::string idx () const noexcept
 Get the id identifying this environment within a simulation batch. The id is valid only if make has been called.
 
bool is_created () const noexcept
 Check if make() has successfully initialized the environment.
 
std::string env_name () const noexcept
 Get the name of this environment instance.
 
std::string version () const noexcept
 Get the environment version set during make().
 

Static Public Attributes

static const std::string name = "BlackJack"
 name
 
static const std::string URI = "/gymnasium/black-jack-env"
 The URI for accessing the environment.
 
- Static Public Attributes inherited from bitrl::envs::EnvBase< TimeStepType, SpaceType >
static const uint_t DEFAULT_ENV_SEED = 42
 Default seed used in reset() if none provided.
 

Protected Member Functions

virtual dynamics_t build_dynamics_from_response_ (const nlohmann::json &) const override final
 build the dynamics from response
 
virtual time_step_type create_time_step_from_response_ (const nlohmann::json &) const override final
 Handle the reset response from the environment server.
 
- Protected Member Functions inherited from bitrl::envs::gymnasium::ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >
 ToyTextEnvBase (network::RESTRLEnvClient &api_server, const std::string &name)
 Constructor.
 
 ToyTextEnvBase (const ToyTextEnvBase &other)
 Copy constructor.
 
- Protected Member Functions inherited from bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >
 GymnasiumEnvBase (network::RESTRLEnvClient &api_server, const std::string &name)
 Constructor.
 
 GymnasiumEnvBase (const GymnasiumEnvBase &)
 Copy constructor.
 
- Protected Member Functions inherited from bitrl::envs::EnvBase< TimeStepType, SpaceType >
 EnvBase (const std::string &idx=bitrl::consts::INVALID_STR, const std::string &name=bitrl::consts::INVALID_STR)
 Constructor (protected — for subclassing only).
 
 EnvBase (const EnvBase &)
 Copy constructor.
 
void set_version_ (const std::string &version) noexcept
 Set internal version string.
 
void set_idx_ (const std::string &idx) noexcept
 Set the id of the environment.
 
void set_make_options_ (const std::unordered_map< std::string, std::any > &options) noexcept
 Store options for future access.
 
void set_reset_options_ (const std::unordered_map< std::string, std::any > &options) noexcept
 Store reset options for future access.
 
void invalidate_is_created_flag_ () noexcept
 Mark environment as not created.
 
void make_created_ () noexcept
 Mark environment creation as successful.
 
time_step_typeget_current_time_step_ () noexcept
 Mutable access to the current time step.
 
const time_step_typeget_current_time_step_ () const noexcept
 Read-only access to the current time step.
 

Additional Inherited Members

- Protected Attributes inherited from bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >
network::RESTRLEnvClientapi_server_
 Server wrapper handling communication with remote Gymnasium environment.
 

Detailed Description

BlackJack class. Wrapper to the Blackjack OpenAI-Gym environment.

This environment is part of the Toy Text environments which contains general information about the environment. Action Space: Discrete(2) Observation Space: Tuple(Discrete(32), Discrete(11), Discrete(2)) Blackjack is a card game where the goal is to beat the dealer by obtaining cards that sum to closer to 21 (without going over 21) than the dealers cards. The game starts with the dealer having one face up and one face down card, while the player has two face up cards. All cards are drawn from an infinite deck (i.e. with replacement). The card values are:

  • Face cards (Jack, Queen, King) have a point value of 10.
  • Aces can either count as 11 (called a ‘usable ace’) or 1.
  • Numerical cards (2-10) have a value equal to their number. The player has the sum of cards held. The player can request additional cards (hit) until they decide to stop (stick) or exceed 21 (bust, immediate loss). After the player sticks, the dealer reveals their facedown card, and draws cards until their sum is 17 or greater. If the dealer goes bust, the player wins. If neither the player nor the dealer busts, the outcome (win, lose, draw) is decided by whose sum is closer to 21. This environment corresponds to the version of the blackjack problem described in Example 5.1 in Reinforcement Learning: An Introduction by Sutton and Barto [1].

Action Space The action shape is (1,) in the range {0, 1} indicating whether to stick or hit. 0: Stick 1: Hit Observation Space The observation consists of a 3-tuple containing: the player’s current sum, the value of the dealer’s one showing card (1-10 where 1 is ace), and whether the player holds a usable ace (0 or 1). The observation is returned as (int(), int(), int()). Rewards win game: +1 lose game: -1 draw game: 0 win game with natural blackjack: +1.5 (if natural is True) +1 (if natural is False) Episode End The episode ends if the following happens: Termination: The player hits and the sum of hand exceeds 21. The player sticks. An ace will always be counted as usable (11) unless it busts the player.

Member Typedef Documentation

◆ action_space_type

typedef base_type::action_space_type bitrl::envs::gymnasium::BlackJack::action_space_type

The type of the action space for the environment.

◆ action_type

typedef base_type::action_type bitrl::envs::gymnasium::BlackJack::action_type

The type of the action to be undertaken in the environment.

◆ base_type

The base type.

◆ state_space_type

typedef base_type::state_space_type bitrl::envs::gymnasium::BlackJack::state_space_type

The type describing the state space for the environment.

◆ state_type

typedef base_type::state_type bitrl::envs::gymnasium::BlackJack::state_type

The type of the action to be undertaken in the environment.

◆ time_step_type

The time step type we return every time a step in the environment is performed.

Constructor & Destructor Documentation

◆ BlackJack() [1/2]

bitrl::envs::gymnasium::BlackJack::BlackJack ( network::RESTRLEnvClient api_server)

◆ BlackJack() [2/2]

bitrl::envs::gymnasium::BlackJack::BlackJack ( const BlackJack other)

◆ ~BlackJack()

bitrl::envs::gymnasium::BlackJack::~BlackJack ( )
overridedefault

Member Function Documentation

◆ build_dynamics_from_response_()

BlackJack::dynamics_t bitrl::envs::gymnasium::BlackJack::build_dynamics_from_response_ ( const nlohmann::json &  ) const
finaloverrideprotectedvirtual

build the dynamics from response

Reimplemented from bitrl::envs::gymnasium::ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >.

◆ create_time_step_from_response_()

BlackJack::time_step_type bitrl::envs::gymnasium::BlackJack::create_time_step_from_response_ ( const nlohmann::json &  response) const
finaloverrideprotectedvirtual

Handle the reset response from the environment server.

Implements bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >.

◆ is_natural()

bool bitrl::envs::gymnasium::BlackJack::is_natural ( ) const
inlinenoexcept

◆ is_sab()

bool bitrl::envs::gymnasium::BlackJack::is_sab ( ) const
inlinenoexcept

◆ make()

void bitrl::envs::gymnasium::BlackJack::make ( const std::string &  version,
const std::unordered_map< std::string, std::any > &  options,
const std::unordered_map< std::string, std::any > &  reset_options 
)
finaloverridevirtual

make. Builds the environment. Optionally we can choose if the environment will be slippery

Implements bitrl::envs::EnvBase< TimeStepType, SpaceType >.

◆ step()

BlackJack::time_step_type bitrl::envs::gymnasium::BlackJack::step ( const action_type action)
finaloverridevirtual

step

Parameters
action
Returns

Member Data Documentation

◆ name

const std::string bitrl::envs::gymnasium::BlackJack::name = "BlackJack"
static

name

◆ URI

const std::string bitrl::envs::gymnasium::BlackJack::URI = "/gymnasium/black-jack-env"
static

The URI for accessing the environment.


The documentation for this class was generated from the following files: