12#include "bitrl/bitrl_config.h"
20#include "bitrl/extern/nlohmann/json/json.hpp"
26#include <unordered_map>
41template <u
int_t state_size>
42class GymWalk final :
public EnvBase<TimeStep<uint_t>, ScalarDiscreteEnv<state_size, 1, 0, 0>>
46 static const std::string
name;
47 static const std::string
URI;
55 typedef std::vector<std::tuple<real_t, uint_t, real_t, bool>>
dynamics_t;
63 make(
const std::string &
version,
const std::unordered_map<std::string, std::any> &options,
64 const std::unordered_map<std::string, std::any> &
reset_options)
override final;
66 virtual void close() override final;
86 dynamics_t build_dynamics_from_response_(
const nlohmann::json &response)
const;
87 time_step_type create_time_step_from_response_(
const nlohmann::json &response)
const;
95template <u
int_t state_size>
98 api_server_(&api_server)
100 api_server_ -> register_if_not(
name,
URI);
103template <u
int_t state_size>
107 auto dynamics = response[
"dynamics"];
111template <u
int_t state_size>
113GymWalk<state_size>::create_time_step_from_response_(
const nlohmann::json &response)
const
116 auto step_type = response[
"time_step"][
"step_type"];
117 auto reward = response[
"time_step"][
"reward"];
118 auto discount = response[
"time_step"][
"discount"];
119 auto observation = response[
"time_step"][
"observation"];
120 auto info = response[
"time_step"][
"info"];
122 discount, std::unordered_map<std::string, std::any>());
125template <u
int_t state_size>
127 const std::unordered_map<std::string, std::any> &options,
128 const std::unordered_map<std::string, std::any> &reset_options)
131 if (this->is_created())
137 auto response = api_server_ -> make(this->env_name(), version, ops);
139 auto idx = response[
"idx"];
141 this->base_type::make(version, options, reset_options);
142 this->make_created_();
145template <u
int_t state_size>
150 assert(this->is_created() &&
"Environment has not been created");
153 if (this->get_current_time_step_().last())
155 return this->reset();
158 const auto response = api_server_ -> step(this->env_name(), this->idx(), action);
160 this->get_current_time_step_() = this->create_time_step_from_response_(response);
161 return this->get_current_time_step_();
166 auto response = this->api_server_ -> is_alive(this->env_name(), this->idx());
167 return response[
"result"];
170template <u
int_t state_size>
173 auto response = this->api_server_->n_copies(this->env_name());
174 return response[
"copies"];
181 if (!this->is_created())
186 auto response = this->api_server_ -> close(this->env_name(), this->idx());
187 this->invalidate_is_created_flag_();
190template <u
int_t state_size>
194 if (!this->is_created())
197 assert(this->is_created() &&
"Environment has not been created");
203 auto &reset_ops = this->reset_options();
205 auto response = this->api_server_ -> reset(this->env_name(), this->idx(), seed, nlohmann::json());
207 this->create_time_step_from_response_(response);
208 return this->get_current_time_step_();
211template <u
int_t state_size>
217 assert(this->is_created() &&
"Environment has not been created");
220 auto response = this->api_server_ -> dynamics(this->env_name(), this->idx(), sidx, aidx);
221 return build_dynamics_from_response_(response);
Base class interface for Reinforcement Learning environments.
Definition env_base.h:30
SpaceType::state_space state_space_type
Type describing the environment state space.
Definition env_base.h:44
SpaceType::state_type state_type
Type describing an individual state.
Definition env_base.h:47
const std::unordered_map< std::string, std::any > & reset_options() const noexcept
Access the configuration options provided to make().
Definition env_base.h:104
SpaceType::action_space action_space_type
Type describing the environment action space.
Definition env_base.h:50
SpaceType::action_type action_type
Type representing an individual action.
Definition env_base.h:53
std::string version() const noexcept
Get the environment version set during make().
Definition env_base.h:142
std::string env_name() const noexcept
Get the name of this environment instance.
Definition env_base.h:136
virtual time_step_type step(const action_type &action) override final
Definition gym_walk.h:146
uint_t n_actions() const noexcept
Definition gym_walk.h:70
base_type::state_type state_type
Definition gym_walk.h:54
bool is_alive() const
Definition gym_walk.h:164
base_type::action_space_type action_space_type
Definition gym_walk.h:52
virtual void make(const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
Construct the environment instance.
Definition gym_walk.h:126
uint_t n_states() const noexcept
Definition gym_walk.h:69
std::string get_url() const
Get the full URL for this environment endpoint on the server.
Definition gym_walk.h:77
static const std::string URI
Definition gym_walk.h:47
static const std::string name
Definition gym_walk.h:46
EnvBase< TimeStep< uint_t >, ScalarDiscreteEnv< state_size, 1, 0, 0 > > base_type
Definition gym_walk.h:49
virtual time_step_type reset() override final
Reset the environment to an initial state using the reset options specified during make.
Definition gym_walk.h:191
base_type::time_step_type time_step_type
Definition gym_walk.h:50
GymWalk(network::RESTRLEnvClient &api_server)
Constructor.
Definition gym_walk.h:96
base_type::action_type action_type
Definition gym_walk.h:53
uint_t n_copies() const
Definition gym_walk.h:171
virtual void close() override final
Close and release any acquired environment resources.
Definition gym_walk.h:178
dynamics_t p(uint_t sidx, uint_t aidx) const
Definition gym_walk.h:213
base_type::state_space_type state_space_type
Definition gym_walk.h:51
std::vector< std::tuple< real_t, uint_t, real_t, bool > > dynamics_t
Definition gym_walk.h:55
Utility class to facilitate HTTP requests between the environments REST API and C++ drivers.
Definition rest_rl_env_client.h:29
std::string get_env_url(const std::string &name) const noexcept
Return the url for the environment with the given name.
Definition rest_rl_env_client.cpp:86
OutT resolve(const std::string &name, const std::map< std::string, std::any > &input)
Definition std_map_utils.h:25
Definition bitrl_consts.h:14
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
static TimeStepTp time_step_type_from_int(int aidx)
Definition time_step_type.cpp:31
Definition env_types.h:13