bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
lunar_lander_env.h
Go to the documentation of this file.
1//
2// Created by alex on 6/29/25.
3//
4
5#ifndef LUNAR_LANDER_ENV_H
6#define LUNAR_LANDER_ENV_H
7
8#include "bitrl/bitrl_types.h"
12#include "bitrl/extern/nlohmann/json/json.hpp"
14
15namespace bitrl
16{
17namespace envs::gymnasium
18{
19namespace lunar_lander_detail
20{
21template <typename TimeStepType, typename SpaceType>
22class _LunarLanderEnv : public GymnasiumEnvBase<TimeStepType, SpaceType>
23{
24 public:
29
35
40
45
50
55
61 _LunarLanderEnv(network::RESTRLEnvClient &api_server, const std::string &name,
62 const std::string &uri);
63
68
72 ~_LunarLanderEnv() = default;
73
77 virtual void
78 make(const std::string &version, const std::unordered_map<std::string, std::any> &options,
79 const std::unordered_map<std::string, std::any> &reset_options) override final;
80
84 virtual time_step_type step(const action_type &action) override final;
85
89 uint_t n_actions() const noexcept { return action_space_type::size; }
90
91 protected:
95 virtual time_step_type
96 create_time_step_from_response_(const nlohmann::json &response) const override;
97};
98
99template <typename TimeStepType, typename SpaceType>
101 const std::string &name,
102 const std::string &uri)
103 : GymnasiumEnvBase<TimeStepType, SpaceType>(api_server, name)
104{
105 this->get_api_server().register_if_not(name, uri);
106}
107
108template <typename TimeStepType, typename SpaceType>
114
115template <typename TimeStepType, typename SpaceType>
117 const std::string &version, const std::unordered_map<std::string, std::any> &options,
118 const std::unordered_map<std::string, std::any> &reset_options)
119{
120 if (this->is_created())
121 {
122 return;
123 }
124
125 nlohmann::json ops;
126
127 if (auto gravity_itr = options.find("gravity"); gravity_itr != options.end())
128 {
129 auto gravity = std::any_cast<real_t>(gravity_itr->second);
130 ops["gravity"] = gravity;
131 }
132
133 if (auto enable_wind_itr = options.find("enable_wind"); enable_wind_itr != options.end())
134 {
135 auto wind = std::any_cast<bool>(enable_wind_itr->second);
136 ops["enable_wind"] = wind;
137 }
138
139 if (auto wind_power_itr = options.find("wind_power"); wind_power_itr != options.end())
140 {
141 auto wind_power = std::any_cast<real_t>(wind_power_itr->second);
142 ops["wind_power"] = wind_power;
143 }
144
145 if (auto turbulence_power_itr = options.find("turbulence_power");
146 turbulence_power_itr != options.end())
147 {
148 auto turbulence_power = std::any_cast<real_t>(turbulence_power_itr->second);
149 ops["turbulence_power"] = turbulence_power;
150 }
151
152 auto response = this->get_api_server().make(this->env_name(), version, ops);
153
154 auto idx = response["idx"];
155 this->set_idx_(idx);
156 this->base_type::make(version, options, reset_options);
157 this->make_created_();
158}
159
160template <typename TimeStepType, typename SpaceType>
163{
164
165#ifdef BITRL_DEBUG
166 assert(this->is_created() && "Environment has not been created");
167#endif
168
169 if (this->get_current_time_step_().last())
170 {
171 return this->reset();
172 }
173
174 const auto response = this->get_api_server().step(this->env_name(), this->idx(), action);
175
176 this->get_current_time_step_() = this->create_time_step_from_response_(response);
177 return this->get_current_time_step_();
178}
179
180template <typename TimeStepType, typename SpaceType>
183 const nlohmann::json &response) const
184{
185 auto step_type = response["time_step"]["step_type"].template get<uint_t>();
186 auto reward = response["time_step"]["reward"];
187 auto discount = response["time_step"]["discount"];
188 auto obs = response["time_step"]["observation"];
189 auto info = response["time_step"]["info"];
190 return time_step_type(TimeStepEnumUtils::time_step_type_from_int(step_type), reward, obs,
191 discount, std::unordered_map<std::string, std::any>());
192}
193
194} // namespace lunar_lander_detail
195
201 TimeStep<std::vector<real_t>>, ContinuousVectorStateDiscreteActionEnv<8, 4, 0, real_t>>
202{
203 public:
207 static const std::string name;
208
212 static const std::string URI;
213
220
226
231
236
241
246
251
256
260 ~LunarLanderDiscreteEnv() override = default;
261};
262
269 TimeStep<std::vector<real_t>>,
270 ContinuousVectorStateContinuousVectorActionEnv<8, 2, real_t, real_t>>
271{
272 public:
276 static const std::string name;
277
281 static const std::string URI;
282
290
296
301
306
311
316
321
326
330 ~LunarLanderContinuousEnv() override = default;
331};
332} // namespace envs::gymnasium
333} // namespace bitrl
334
335#endif // LUNAR_LANDER_ENV_H
Forward declaration.
Definition time_step.h:22
Base class interface for Reinforcement Learning environments.
Definition env_base.h:30
SpaceType::state_space state_space_type
Type describing the environment state space.
Definition env_base.h:44
SpaceType::state_type state_type
Type describing an individual state.
Definition env_base.h:47
const std::unordered_map< std::string, std::any > & reset_options() const noexcept
Access the configuration options provided to make().
Definition env_base.h:104
SpaceType::action_space action_space_type
Type describing the environment action space.
Definition env_base.h:50
TimeStepType time_step_type
Alias for the type returned when stepping the environment.
Definition env_base.h:41
SpaceType::action_type action_type
Type representing an individual action.
Definition env_base.h:53
std::string version() const noexcept
Get the environment version set during make().
Definition env_base.h:142
Base class for all Gymnasium environment wrappers.
Definition gymnasium_env_base.h:41
network::RESTRLEnvClient & get_api_server() const
Retrieve the REST API wrapper instance used for communication.
Definition gymnasium_env_base.h:98
Definition lunar_lander_env.h:271
base_type::action_space_type action_space_type
The type of the action space for the environment.
Definition lunar_lander_env.h:305
base_type::state_type state_type
The type of the state.
Definition lunar_lander_env.h:315
base_type::state_space_type state_space_type
The type describing the state space for the environment.
Definition lunar_lander_env.h:300
lunar_lander_detail::_LunarLanderEnv< TimeStep< std::vector< real_t > >, ContinuousVectorStateContinuousVectorActionEnv< 8, 2, real_t, real_t > > base_type
Base class type.
Definition lunar_lander_env.h:289
base_type::action_type action_type
The type of the action to be undertaken in the environment.
Definition lunar_lander_env.h:310
~LunarLanderContinuousEnv() override=default
~Pendulum. Destructor
static const std::string name
name
Definition lunar_lander_env.h:276
base_type::time_step_type time_step_type
The time step type we return every time a step in the environment is performed.
Definition lunar_lander_env.h:295
static const std::string URI
The URI for accessing the environment.
Definition lunar_lander_env.h:281
LunarLanderDiscreteEnv environment with discrete action space.
Definition lunar_lander_env.h:202
base_type::state_type state_type
The type of the state.
Definition lunar_lander_env.h:245
static const std::string name
name
Definition lunar_lander_env.h:207
base_type::state_space_type state_space_type
The type describing the state space for the environment.
Definition lunar_lander_env.h:230
lunar_lander_detail::_LunarLanderEnv< TimeStep< std::vector< real_t > >, ContinuousVectorStateDiscreteActionEnv< 8, 4, 0, real_t > > base_type
Base class type.
Definition lunar_lander_env.h:219
base_type::time_step_type time_step_type
The time step type we return every time a step in the environment is performed.
Definition lunar_lander_env.h:225
~LunarLanderDiscreteEnv() override=default
~Pendulum. Destructor
base_type::action_space_type action_space_type
The type of the action space for the environment.
Definition lunar_lander_env.h:235
static const std::string URI
The URI for accessing the environment.
Definition lunar_lander_env.h:212
base_type::action_type action_type
The type of the action to be undertaken in the environment.
Definition lunar_lander_env.h:240
base_type::state_type state_type
The type of the state.
Definition lunar_lander_env.h:54
_LunarLanderEnv(const _LunarLanderEnv &other)
Definition lunar_lander_env.h:109
base_type::time_step_type time_step_type
The time step type we return every time a step in the environment is performed.
Definition lunar_lander_env.h:34
virtual time_step_type step(const action_type &action) override final
step. Step in the environment following the given action
Definition lunar_lander_env.h:162
base_type::action_space_type action_space_type
The type of the action space for the environment.
Definition lunar_lander_env.h:44
uint_t n_actions() const noexcept
n_actions. Returns the number of actions
Definition lunar_lander_env.h:89
base_type::action_type action_type
The type of the action to be undertaken in the environment.
Definition lunar_lander_env.h:49
virtual time_step_type create_time_step_from_response_(const nlohmann::json &response) const override
build the time step from the server response
Definition lunar_lander_env.h:182
_LunarLanderEnv(network::RESTRLEnvClient &api_server, const std::string &name, const std::string &uri)
Definition lunar_lander_env.h:100
base_type::state_space_type state_space_type
The type describing the state space for the environment.
Definition lunar_lander_env.h:39
GymnasiumEnvBase< TimeStepType, SpaceType >::base_type base_type
Base class type.
Definition lunar_lander_env.h:28
virtual void make(const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
make. Build the environment
Definition lunar_lander_env.h:116
Utility class to facilitate HTTP requests between the environments REST API and C++ drivers.
Definition rest_rl_env_client.h:29
void register_if_not(const std::string &name, const std::string &uri)
Same as register_new but swallows the thrown exception.
Definition rest_rl_env_client.cpp:62
Definition bitrl_consts.h:14
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
static TimeStepTp time_step_type_from_int(int aidx)
Definition time_step_type.cpp:31