bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
gym_walk.h
Go to the documentation of this file.
1/*
2 * GymWalk environment from
3 * <a href="https://github.com/mimoralea/gym-walk2">gym_walk</a>
4 *
5 *
6 *
7 */
8
9#ifndef GYM_WALK_H
10#define GYM_WALK_H
11
12#include "bitrl/bitrl_config.h"
13#include "bitrl/bitrl_consts.h"
14#include "bitrl/bitrl_types.h"
15#include "bitrl/envs/env_base.h"
20#include "bitrl/extern/nlohmann/json/json.hpp"
21
22#include <any>
23#include <memory>
24#include <string>
25#include <tuple>
26#include <unordered_map>
27#include <vector>
28
29#ifdef BITRL_DEBUG
30#include <cassert>
31#endif
32
33namespace bitrl
34{
35namespace envs::gdrl
36{
37
41template <uint_t state_size>
42class GymWalk final : public EnvBase<TimeStep<uint_t>, ScalarDiscreteEnv<state_size, 1, 0, 0>>
43{
44 public:
45
46 static const std::string name;
47 static const std::string URI;
48
55 typedef std::vector<std::tuple<real_t, uint_t, real_t, bool>> dynamics_t;
56
61
62 virtual void
63 make(const std::string &version, const std::unordered_map<std::string, std::any> &options,
64 const std::unordered_map<std::string, std::any> &reset_options) override final;
65 bool is_alive() const;
66 virtual void close() override final;
67 virtual time_step_type step(const action_type &action) override final;
68 virtual time_step_type reset() override final;
69 uint_t n_states() const noexcept { return state_space_type::size; }
70 uint_t n_actions() const noexcept { return action_space_type::size + 1; }
71 dynamics_t p(uint_t sidx, uint_t aidx) const;
72
77 std::string get_url() const{return api_server_->get_env_url(this->env_name());}
78
83 uint_t n_copies() const;
84
85 private:
86 dynamics_t build_dynamics_from_response_(const nlohmann::json &response) const;
87 time_step_type create_time_step_from_response_(const nlohmann::json &response) const;
88
89 network::RESTRLEnvClient *api_server_;
90};
91
92template <uint_t state_size> const std::string GymWalk<state_size>::name = "GymWalk";
93template <uint_t state_size> const std::string GymWalk<state_size>::URI = "/gdrl/gym-walk-env";
94
95template <uint_t state_size>
97 : base_type(bitrl::consts::INVALID_STR, name),
98 api_server_(&api_server)
99{
100 api_server_ -> register_if_not(name, URI);
101}
102
103template <uint_t state_size>
105GymWalk<state_size>::build_dynamics_from_response_(const nlohmann::json &response) const
106{
107 auto dynamics = response["dynamics"];
108 return dynamics;
109}
110
111template <uint_t state_size>
113GymWalk<state_size>::create_time_step_from_response_(const nlohmann::json &response) const
114{
115
116 auto step_type = response["time_step"]["step_type"];
117 auto reward = response["time_step"]["reward"];
118 auto discount = response["time_step"]["discount"];
119 auto observation = response["time_step"]["observation"];
120 auto info = response["time_step"]["info"];
121 return GymWalk::time_step_type(TimeStepEnumUtils::time_step_type_from_int(step_type.get<uint_t>()), reward, observation,
122 discount, std::unordered_map<std::string, std::any>());
123}
124
125template <uint_t state_size>
126void GymWalk<state_size>::make(const std::string &version,
127 const std::unordered_map<std::string, std::any> &options,
128 const std::unordered_map<std::string, std::any> &reset_options)
129{
130
131 if (this->is_created())
132 {
133 return;
134 }
135
136 nlohmann::json ops;
137 auto response = api_server_ -> make(this->env_name(), version, ops);
138
139 auto idx = response["idx"];
140 this->set_idx_(idx);
141 this->base_type::make(version, options, reset_options);
142 this->make_created_();
143}
144
145template <uint_t state_size>
147{
148
149#ifdef BITRL_DEBUG
150 assert(this->is_created() && "Environment has not been created");
151#endif
152
153 if (this->get_current_time_step_().last())
154 {
155 return this->reset();
156 }
157
158 const auto response = api_server_ -> step(this->env_name(), this->idx(), action);
159
160 this->get_current_time_step_() = this->create_time_step_from_response_(response);
161 return this->get_current_time_step_();
162}
163
164template <uint_t state_size> bool GymWalk<state_size>::is_alive() const
165{
166 auto response = this->api_server_ -> is_alive(this->env_name(), this->idx());
167 return response["result"];
168}
169
170template <uint_t state_size>
172{
173 auto response = this->api_server_->n_copies(this->env_name());
174 return response["copies"];
175}
176
177
178template <uint_t state_size> void GymWalk<state_size>::close()
179{
180
181 if (!this->is_created())
182 {
183 return;
184 }
185
186 auto response = this->api_server_ -> close(this->env_name(), this->idx());
187 this->invalidate_is_created_flag_();
188}
189
190template <uint_t state_size>
192{
193
194 if (!this->is_created())
195 {
196#ifdef BITRL_DEBUG
197 assert(this->is_created() && "Environment has not been created");
198#endif
199 return time_step_type();
200 }
201
202
203 auto &reset_ops = this->reset_options();
204 auto seed = utils::resolve<uint_t>("seed", reset_ops);
205 auto response = this->api_server_ -> reset(this->env_name(), this->idx(), seed, nlohmann::json());
206
207 this->create_time_step_from_response_(response);
208 return this->get_current_time_step_();
209}
210
211template <uint_t state_size>
214{
215
216#ifdef BITRL_DEBUG
217 assert(this->is_created() && "Environment has not been created");
218#endif
219
220 auto response = this->api_server_ -> dynamics(this->env_name(), this->idx(), sidx, aidx);
221 return build_dynamics_from_response_(response);
222}
223
224} // namespace envs::gdrl
225} // namespace bitrl
226
227#endif // GYM_WALK_H
Base class interface for Reinforcement Learning environments.
Definition env_base.h:30
SpaceType::state_space state_space_type
Type describing the environment state space.
Definition env_base.h:44
SpaceType::state_type state_type
Type describing an individual state.
Definition env_base.h:47
const std::unordered_map< std::string, std::any > & reset_options() const noexcept
Access the configuration options provided to make().
Definition env_base.h:104
SpaceType::action_space action_space_type
Type describing the environment action space.
Definition env_base.h:50
SpaceType::action_type action_type
Type representing an individual action.
Definition env_base.h:53
std::string version() const noexcept
Get the environment version set during make().
Definition env_base.h:142
std::string env_name() const noexcept
Get the name of this environment instance.
Definition env_base.h:136
Definition gym_walk.h:43
virtual time_step_type step(const action_type &action) override final
Definition gym_walk.h:146
uint_t n_actions() const noexcept
Definition gym_walk.h:70
base_type::state_type state_type
Definition gym_walk.h:54
bool is_alive() const
Definition gym_walk.h:164
base_type::action_space_type action_space_type
Definition gym_walk.h:52
virtual void make(const std::string &version, const std::unordered_map< std::string, std::any > &options, const std::unordered_map< std::string, std::any > &reset_options) override final
Construct the environment instance.
Definition gym_walk.h:126
uint_t n_states() const noexcept
Definition gym_walk.h:69
std::string get_url() const
Get the full URL for this environment endpoint on the server.
Definition gym_walk.h:77
static const std::string URI
Definition gym_walk.h:47
static const std::string name
Definition gym_walk.h:46
EnvBase< TimeStep< uint_t >, ScalarDiscreteEnv< state_size, 1, 0, 0 > > base_type
Definition gym_walk.h:49
virtual time_step_type reset() override final
Reset the environment to an initial state using the reset options specified during make.
Definition gym_walk.h:191
base_type::time_step_type time_step_type
Definition gym_walk.h:50
GymWalk(network::RESTRLEnvClient &api_server)
Constructor.
Definition gym_walk.h:96
base_type::action_type action_type
Definition gym_walk.h:53
uint_t n_copies() const
Definition gym_walk.h:171
virtual void close() override final
Close and release any acquired environment resources.
Definition gym_walk.h:178
dynamics_t p(uint_t sidx, uint_t aidx) const
Definition gym_walk.h:213
base_type::state_space_type state_space_type
Definition gym_walk.h:51
std::vector< std::tuple< real_t, uint_t, real_t, bool > > dynamics_t
Definition gym_walk.h:55
Utility class to facilitate HTTP requests between the environments REST API and C++ drivers.
Definition rest_rl_env_client.h:29
std::string get_env_url(const std::string &name) const noexcept
Return the url for the environment with the given name.
Definition rest_rl_env_client.cpp:86
OutT resolve(const std::string &name, const std::map< std::string, std::any > &input)
Definition std_map_utils.h:25
Definition bitrl_consts.h:14
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
static TimeStepTp time_step_type_from_int(int aidx)
Definition time_step_type.cpp:31
Definition env_types.h:13