|
| typedef ToyTextEnvBase< TimeStep< std::vector< uint_t > >, 48, 2 >::base_type | base_type |
| | The base type.
|
| |
| typedef base_type::time_step_type | time_step_type |
| | The time step type we return every time a step in the environment is performed.
|
| |
| typedef base_type::state_space_type | state_space_type |
| | The type describing the state space for the environment.
|
| |
| typedef base_type::action_space_type | action_space_type |
| | The type of the action space for the environment.
|
| |
| typedef base_type::action_type | action_type |
| | The type of the action to be undertaken in the environment.
|
| |
| typedef base_type::state_type | state_type |
| | The type of the action to be undertaken in the environment.
|
| |
| typedef GymnasiumEnvBase< TimeStep< std::vector< uint_t > >, ScalarDiscreteEnv< state_end, action_end, 0, 0 > >::base_type | base_type |
| | The base_type.
|
| |
| typedef base_type::time_step_type | time_step_type |
| | The time step type we return every time a step in the environment is performed.
|
| |
| typedef base_type::state_space_type | state_space_type |
| | The type describing the state space for the environment.
|
| |
| typedef base_type::action_space_type | action_space_type |
| | The type of the action space for the environment.
|
| |
| typedef base_type::action_type | action_type |
| | The type of the action to be undertaken in the environment.
|
| |
| typedef base_type::state_type | state_type |
| | The type of the state.
|
| |
| typedef std::vector< std::tuple< real_t, uint_t, real_t, bool > > | dynamics_t |
| | dynamics_t
|
| |
| typedef EnvBase< TimeStepType, SpaceType > | base_type |
| | Base environment type alias.
|
| |
| typedef base_type::time_step_type | time_step_type |
| | Time step returned at each environment step.
|
| |
| typedef base_type::state_space_type | state_space_type |
| | Type describing the observation/state space of the environment.
|
| |
| typedef base_type::action_space_type | action_space_type |
| | Type describing the action space of the environment.
|
| |
| typedef base_type::action_type | action_type |
| | Type representing a valid action to execute.
|
| |
| typedef base_type::state_type | state_type |
| | Type representing a state/observation returned by the environment.
|
| |
| typedef TimeStepType | time_step_type |
| | Alias for the type returned when stepping the environment.
|
| |
| typedef SpaceType::state_space | state_space_type |
| | Type describing the environment state space.
|
| |
| typedef SpaceType::state_type | state_type |
| | Type describing an individual state.
|
| |
| typedef SpaceType::action_space | action_space_type |
| | Type describing the environment action space.
|
| |
| typedef SpaceType::action_type | action_type |
| | Type representing an individual action.
|
| |
|
| | BlackJack (network::RESTRLEnvClient &api_server) |
| |
| | BlackJack (const BlackJack &other) |
| |
| | ~BlackJack () override=default |
| |
| virtual void | make (const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final |
| | make. Builds the environment. Optionally we can choose if the environment will be slippery
|
| |
| virtual time_step_type | step (const action_type &action) override final |
| | step
|
| |
| bool | is_natural () const noexcept |
| |
| bool | is_sab () const noexcept |
| |
| virtual | ~ToyTextEnvBase ()=default |
| | ~FrozenLake. Destructor.
|
| |
| dynamics_t | p (uint_t sidx, uint_t aidx) const |
| | P.
|
| |
| uint_t | n_actions () const noexcept |
| | n_actions. Returns the number of actions
|
| |
| uint_t | n_states () const noexcept |
| | Number of states.
|
| |
| virtual | ~GymnasiumEnvBase () |
| | Virtual destructor.
|
| |
| virtual bool | is_alive () const |
| | Check whether the environment is still alive/connected.
|
| |
| virtual void | close () override |
| | Close the environment on the server and release any resources.
|
| |
| virtual time_step_type | reset () override |
| | Reset the environment to an initial state using the reset options specified during make.
|
| |
| uint_t | n_copies () const |
| |
| network::RESTRLEnvClient & | get_api_server () const |
| | Retrieve the REST API wrapper instance used for communication.
|
| |
| std::string | get_url () const |
| | Get the full URL for this environment endpoint on the server.
|
| |
| virtual time_step_type | reset ()=0 |
| | Import the reset() overloads from the base class.
|
| |
| virtual | ~EnvBase ()=default |
| | Virtual destructor.
|
| |
| virtual time_step_type | step (const action_type &action)=0 |
| | Perform one step in the environment using an action.
|
| |
| const std::unordered_map< std::string, std::any > & | make_options () const noexcept |
| | Access the configuration options provided to make().
|
| |
| const std::unordered_map< std::string, std::any > & | reset_options () const noexcept |
| | Access the configuration options provided to make().
|
| |
| template<typename T > |
| T | read_option (const std::string &op_name) const |
| | Read a specific make() option and cast it to the requested type.
|
| |
| std::string | idx () const noexcept |
| | Get the id identifying this environment within a simulation batch. The id is valid only if make has been called.
|
| |
| bool | is_created () const noexcept |
| | Check if make() has successfully initialized the environment.
|
| |
| std::string | env_name () const noexcept |
| | Get the name of this environment instance.
|
| |
| std::string | version () const noexcept |
| | Get the environment version set during make().
|
| |
BlackJack class. Wrapper to the Blackjack OpenAI-Gym environment.
This environment is part of the Toy Text environments which contains general information about the environment. Action Space: Discrete(2) Observation Space: Tuple(Discrete(32), Discrete(11), Discrete(2)) Blackjack is a card game where the goal is to beat the dealer by obtaining cards that sum to closer to 21 (without going over 21) than the dealers cards. The game starts with the dealer having one face up and one face down card, while the player has two face up cards. All cards are drawn from an infinite deck (i.e. with replacement). The card values are:
- Face cards (Jack, Queen, King) have a point value of 10.
- Aces can either count as 11 (called a ‘usable ace’) or 1.
- Numerical cards (2-10) have a value equal to their number. The player has the sum of cards held. The player can request additional cards (hit) until they decide to stop (stick) or exceed 21 (bust, immediate loss). After the player sticks, the dealer reveals their facedown card, and draws cards until their sum is 17 or greater. If the dealer goes bust, the player wins. If neither the player nor the dealer busts, the outcome (win, lose, draw) is decided by whose sum is closer to 21. This environment corresponds to the version of the blackjack problem described in Example 5.1 in Reinforcement Learning: An Introduction by Sutton and Barto [1].
Action Space The action shape is (1,) in the range {0, 1} indicating whether to stick or hit. 0: Stick 1: Hit Observation Space The observation consists of a 3-tuple containing: the player’s current sum, the value of the dealer’s one showing card (1-10 where 1 is ace), and whether the player holds a usable ace (0 or 1). The observation is returned as (int(), int(), int()). Rewards win game: +1 lose game: -1 draw game: 0 win game with natural blackjack: +1.5 (if natural is True) +1 (if natural is False) Episode End The episode ends if the following happens: Termination: The player hits and the sum of hand exceeds 21. The player sticks. An ace will always be counted as usable (11) unless it busts the player.