#include <black_jack_env.h>

Inheritance diagram for bitrl::envs::gymnasium::BlackJack:

[legend]

Collaboration diagram for bitrl::envs::gymnasium::BlackJack:

[legend]

Public Types
typedef ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >::base_type	base_type
	The base type.

typedef base_type::time_step_type	time_step_type
	The time step type we return every time a step in the environment is performed.

typedef base_type::state_space_type	state_space_type
	The type describing the state space for the environment.

typedef base_type::action_space_type	action_space_type
	The type of the action space for the environment.

typedef base_type::action_type	action_type
	The type of the action to be undertaken in the environment.

typedef base_type::state_type	state_type
	The type of the action to be undertaken in the environment.

Public Types inherited from bitrl::envs::gymnasium::ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >
typedef GymnasiumEnvBase< TimeStep< std::vector< uint_t > >, ScalarDiscreteEnv< state_end, action_end, 0, 0 > >::base_type	base_type
	The base_type.

typedef base_type::time_step_type	time_step_type
	The time step type we return every time a step in the environment is performed.

typedef base_type::state_space_type	state_space_type
	The type describing the state space for the environment.

typedef base_type::action_space_type	action_space_type
	The type of the action space for the environment.

typedef base_type::action_type	action_type
	The type of the action to be undertaken in the environment.

typedef base_type::state_type	state_type
	The type of the state.

typedef std::vector< std::tuple< real_t, uint_t, real_t, bool > >	dynamics_t
	dynamics_t

Public Types inherited from bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >
typedef EnvBase< TimeStepType, SpaceType >	base_type
	Base environment type alias.

typedef base_type::time_step_type	time_step_type
	Time step returned at each environment step.

typedef base_type::state_space_type	state_space_type
	Type describing the observation/state space of the environment.

typedef base_type::action_space_type	action_space_type
	Type describing the action space of the environment.

typedef base_type::action_type	action_type
	Type representing a valid action to execute.

typedef base_type::state_type	state_type
	Type representing a state/observation returned by the environment.

Public Types inherited from bitrl::envs::EnvBase< TimeStepType, SpaceType >
typedef TimeStepType	time_step_type
	Alias for the type returned when stepping the environment.

typedef SpaceType::state_space	state_space_type
	Type describing the environment state space.

typedef SpaceType::state_type	state_type
	Type describing an individual state.

typedef SpaceType::action_space	action_space_type
	Type describing the environment action space.

typedef SpaceType::action_type	action_type
	Type representing an individual action.

Public Member Functions
	BlackJack (network::RESTRLEnvClient &api_server)

	BlackJack (const BlackJack &other)

	~BlackJack () override=default

virtual void	make (const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
	make. Builds the environment. Optionally we can choose if the environment will be slippery

virtual time_step_type	step (const action_type &action) override final
	step

bool	is_natural () const noexcept

bool	is_sab () const noexcept

Public Member Functions inherited from bitrl::envs::gymnasium::ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >
virtual	~ToyTextEnvBase ()=default
	~FrozenLake. Destructor.

dynamics_t	p (uint_t sidx, uint_t aidx) const
	P.

uint_t	n_actions () const noexcept
	n_actions. Returns the number of actions

uint_t	n_states () const noexcept
	Number of states.

Public Member Functions inherited from bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >
virtual	~GymnasiumEnvBase ()
	Virtual destructor.

virtual bool	is_alive () const
	Check whether the environment is still alive/connected.

virtual void	close () override
	Close the environment on the server and release any resources.

virtual time_step_type	reset () override
	Reset the environment to an initial state using the reset options specified during make.

uint_t	n_copies () const

network::RESTRLEnvClient &	get_api_server () const
	Retrieve the REST API wrapper instance used for communication.

std::string	get_url () const
	Get the full URL for this environment endpoint on the server.

virtual time_step_type	reset ()=0
	Import the reset() overloads from the base class.

Public Member Functions inherited from bitrl::envs::EnvBase< TimeStepType, SpaceType >
virtual	~EnvBase ()=default
	Virtual destructor.

virtual time_step_type	step (const action_type &action)=0
	Perform one step in the environment using an action.

const std::unordered_map< std::string, std::any > &	make_options () const noexcept
	Access the configuration options provided to make().

const std::unordered_map< std::string, std::any > &	reset_options () const noexcept
	Access the configuration options provided to make().

template<typename T >
T	read_option (const std::string &op_name) const
	Read a specific make() option and cast it to the requested type.

std::string	idx () const noexcept
	Get the id identifying this environment within a simulation batch. The id is valid only if make has been called.

bool	is_created () const noexcept
	Check if make() has successfully initialized the environment.

std::string	env_name () const noexcept
	Get the name of this environment instance.

std::string	version () const noexcept
	Get the environment version set during make().

Static Public Attributes
static const std::string	name = "BlackJack"
	name

static const std::string	URI = "/gymnasium/black-jack-env"
	The URI for accessing the environment.

Static Public Attributes inherited from bitrl::envs::EnvBase< TimeStepType, SpaceType >
static const uint_t	DEFAULT_ENV_SEED = 42
	Default seed used in reset() if none provided.

Protected Member Functions
virtual dynamics_t	build_dynamics_from_response_ (const nlohmann::json &) const override final
	build the dynamics from response

virtual time_step_type	create_time_step_from_response_ (const nlohmann::json &) const override final
	Handle the reset response from the environment server.

Protected Member Functions inherited from bitrl::envs::gymnasium::ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >
	ToyTextEnvBase (network::RESTRLEnvClient &api_server, const std::string &name)
	Constructor.

	ToyTextEnvBase (const ToyTextEnvBase &other)
	Copy constructor.

Protected Member Functions inherited from bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >
	GymnasiumEnvBase (network::RESTRLEnvClient &api_server, const std::string &name)
	Constructor.

	GymnasiumEnvBase (const GymnasiumEnvBase &)
	Copy constructor.

Protected Member Functions inherited from bitrl::envs::EnvBase< TimeStepType, SpaceType >
	EnvBase (const std::string &idx=bitrl::consts::INVALID_STR, const std::string &name=bitrl::consts::INVALID_STR)
	Constructor (protected — for subclassing only).

	EnvBase (const EnvBase &)
	Copy constructor.

void	set_version_ (const std::string &version) noexcept
	Set internal version string.

void	set_idx_ (const std::string &idx) noexcept
	Set the id of the environment.

void	set_make_options_ (const std::unordered_map< std::string, std::any > &options) noexcept
	Store options for future access.

void	set_reset_options_ (const std::unordered_map< std::string, std::any > &options) noexcept
	Store reset options for future access.

void	invalidate_is_created_flag_ () noexcept
	Mark environment as not created.

void	make_created_ () noexcept
	Mark environment creation as successful.

time_step_type &	get_current_time_step_ () noexcept
	Mutable access to the current time step.

const time_step_type &	get_current_time_step_ () const noexcept
	Read-only access to the current time step.

Additional Inherited Members
Protected Attributes inherited from bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >
network::RESTRLEnvClient *	api_server_
	Server wrapper handling communication with remote Gymnasium environment.

Detailed Description

BlackJack class. Wrapper to the Blackjack OpenAI-Gym environment.

This environment is part of the Toy Text environments which contains general information about the environment. Action Space: Discrete(2) Observation Space: Tuple(Discrete(32), Discrete(11), Discrete(2)) Blackjack is a card game where the goal is to beat the dealer by obtaining cards that sum to closer to 21 (without going over 21) than the dealers cards. The game starts with the dealer having one face up and one face down card, while the player has two face up cards. All cards are drawn from an infinite deck (i.e. with replacement). The card values are:

Face cards (Jack, Queen, King) have a point value of 10.
Aces can either count as 11 (called a ‘usable ace’) or 1.
Numerical cards (2-10) have a value equal to their number. The player has the sum of cards held. The player can request additional cards (hit) until they decide to stop (stick) or exceed 21 (bust, immediate loss). After the player sticks, the dealer reveals their facedown card, and draws cards until their sum is 17 or greater. If the dealer goes bust, the player wins. If neither the player nor the dealer busts, the outcome (win, lose, draw) is decided by whose sum is closer to 21. This environment corresponds to the version of the blackjack problem described in Example 5.1 in Reinforcement Learning: An Introduction by Sutton and Barto [1].

Action Space The action shape is (1,) in the range {0, 1} indicating whether to stick or hit. 0: Stick 1: Hit Observation Space The observation consists of a 3-tuple containing: the player’s current sum, the value of the dealer’s one showing card (1-10 where 1 is ace), and whether the player holds a usable ace (0 or 1). The observation is returned as (int(), int(), int()). Rewards win game: +1 lose game: -1 draw game: 0 win game with natural blackjack: +1.5 (if natural is True) +1 (if natural is False) Episode End The episode ends if the following happens: Termination: The player hits and the sum of hand exceeds 21. The player sticks. An ace will always be counted as usable (11) unless it busts the player.

Member Typedef Documentation

◆ action_space_type

typedef base_type::action_space_type bitrl::envs::gymnasium::BlackJack::action_space_type

The type of the action space for the environment.

◆ action_type

typedef base_type::action_type bitrl::envs::gymnasium::BlackJack::action_type

The type of the action to be undertaken in the environment.

◆ base_type

typedef ToyTextEnvBase<TimeStep<std::vector<uint_t>>,48,2>::base_type bitrl::envs::gymnasium::BlackJack::base_type

The base type.

◆ state_space_type

typedef base_type::state_space_type bitrl::envs::gymnasium::BlackJack::state_space_type

The type describing the state space for the environment.

◆ state_type

typedef base_type::state_type bitrl::envs::gymnasium::BlackJack::state_type

The type of the action to be undertaken in the environment.

◆ time_step_type

typedef base_type::time_step_type bitrl::envs::gymnasium::BlackJack::time_step_type

The time step type we return every time a step in the environment is performed.

Constructor & Destructor Documentation

◆ BlackJack() [1/2]

bitrl::envs::gymnasium::BlackJack::BlackJack ( network::RESTRLEnvClient & api_server )

◆ BlackJack() [2/2]

bitrl::envs::gymnasium::BlackJack::BlackJack ( const BlackJack & other )

◆ ~BlackJack()

bitrl::envs::gymnasium::BlackJack::~BlackJack ( )

overridedefault

Member Function Documentation

◆ build_dynamics_from_response_()

BlackJack::dynamics_t bitrl::envs::gymnasium::BlackJack::build_dynamics_from_response_ ( const nlohmann::json & ) const

finaloverrideprotectedvirtual

build the dynamics from response

Reimplemented from bitrl::envs::gymnasium::ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >.

◆ create_time_step_from_response_()

BlackJack::time_step_type bitrl::envs::gymnasium::BlackJack::create_time_step_from_response_ ( const nlohmann::json & response ) const

finaloverrideprotectedvirtual

Handle the reset response from the environment server.

Implements bitrl::envs::gymnasium::GymnasiumEnvBase< TimeStepType, SpaceType >.

◆ is_natural()

bool bitrl::envs::gymnasium::BlackJack::is_natural ( ) const

inlinenoexcept

◆ is_sab()

bool bitrl::envs::gymnasium::BlackJack::is_sab ( ) const

inlinenoexcept

◆ make()

void bitrl::envs::gymnasium::BlackJack::make	(	const std::string &	version,
		const std::unordered_map< std::string, std::any > &	options,
		const std::unordered_map< std::string, std::any > &	reset_options
	)

finaloverridevirtual

make. Builds the environment. Optionally we can choose if the environment will be slippery

Implements bitrl::envs::EnvBase< TimeStepType, SpaceType >.

◆ step()

BlackJack::time_step_type bitrl::envs::gymnasium::BlackJack::step ( const action_type & action )

finaloverridevirtual

step

Parameters

action

Returns

Member Data Documentation

◆ name

const std::string bitrl::envs::gymnasium::BlackJack::name = "BlackJack"

static

name

◆ URI

const std::string bitrl::envs::gymnasium::BlackJack::URI = "/gymnasium/black-jack-env"