bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
time_step.h
Go to the documentation of this file.
1#ifndef TIME_STEP_H
2#define TIME_STEP_H
3
4#include "bitrl/bitrl_types.h"
7
8#include <any>
9#include <ostream>
10#include <stdexcept>
11#include <string>
12#include <unordered_map>
13#include <vector>
14
15namespace bitrl
16{
17
21template <typename StateTp> class TimeStep
22{
23 public:
27 typedef StateTp state_type;
28
33
38
42 TimeStep(TimeStepTp type, real_t reward, const state_type &obs, real_t discount_factor);
43
47 TimeStep(TimeStepTp type, real_t reward, const state_type &obs, real_t discount_factor,
48 std::unordered_map<std::string, std::any> &&extra);
49
54 TimeStep(const TimeStep &other);
55
59 TimeStep &operator=(const TimeStep &other);
60
65 TimeStep(TimeStep &&other) noexcept;
66
72 TimeStep &operator=(TimeStep &&other) noexcept;
73
78 bool first() const noexcept { return type_ == TimeStepTp::FIRST; }
79
84 bool mid() const noexcept { return type_ == TimeStepTp::MID; }
85
90 bool last() const noexcept { return type_ == TimeStepTp::LAST; }
91
96 TimeStepTp type() const noexcept { return type_; }
97
102 state_type observation() const { return obs_; }
103
108 real_t reward() const noexcept { return reward_; }
109
113 real_t discount() const noexcept { return discount_; }
114
119 bool done() const noexcept { return type_ == TimeStepTp::LAST; }
120
124 void clear() noexcept;
125
129 template <typename T> const T &get_extra(std::string name) const;
130
135 const std::unordered_map<std::string, std::any> &info() const noexcept { return extra_; }
136
141 std::unordered_map<std::string, std::any> &info() noexcept { return extra_; }
142
143 private:
147 TimeStepTp type_;
148
152 real_t reward_;
153
157 state_type obs_;
158
162 real_t discount_;
163
167 std::unordered_map<std::string, std::any> extra_;
168};
169
170template <typename StateTp>
172 : type_(TimeStepTp::INVALID_TYPE), reward_(0.0), obs_(), discount_(1.0), extra_()
173{
174}
175
176template <typename StateTp>
178 real_t discount_factor)
179 : type_(type), reward_(reward), obs_(obs), discount_(discount_factor), extra_()
180{
181}
182
183template <typename StateTp>
185 : TimeStep<StateTp>(type, reward, obs, 1.0)
186{
187}
188
189template <typename StateTp>
191 real_t discount_factor,
192 std::unordered_map<std::string, std::any> &&extra)
193 : type_(type), reward_(reward), obs_(obs), discount_(discount_factor), extra_(extra)
194{
195}
196
197template <typename StateTp>
199 : type_(other.type_), reward_(other.reward_), obs_(other.obs_), discount_(other.discount_),
200 extra_(other.extra_)
201{
202}
203
204template <typename StateTp>
206{
207
208 type_ = other.type_;
209 reward_ = other.reward_;
210 obs_ = other.obs_;
211 discount_ = other.discount_;
212 extra_ = other.extra_;
213 return *this;
214}
215
216template <typename StateTp>
218 : type_(other.type_), reward_(other.reward_), obs_(other.obs_), discount_(other.discount_),
219 extra_(other.extra_)
220{
221 other.clear();
222}
223
224template <typename StateTp>
226{
227
228 type_ = other.type_;
229 reward_ = other.reward_;
230 obs_ = other.obs_;
231 discount_ = other.discount_;
232 extra_ = other.extra_;
233 other.clear();
234 return *this;
235}
236
237template <typename StateTp> void TimeStep<StateTp>::clear() noexcept
238{
239
241 reward_ = 0.0;
242 obs_ = state_type();
243 discount_ = 1.0;
244 extra_.clear();
245}
246
247template <typename StateTp>
248template <typename T>
249const T &TimeStep<StateTp>::get_extra(std::string name) const
250{
251
252 auto itr = extra_.find(name);
253
254 if (itr == extra_.end())
255 {
256 throw std::logic_error("Property " + name + " does not exist");
257 }
258
259 return std::any_cast<const T &>(itr->second);
260}
261
262template <typename StateTp>
263inline std::ostream &operator<<(std::ostream &out, const TimeStep<StateTp> &step)
264{
265
266 out << "Step type....." << TimeStepEnumUtils::to_string(step.type()) << std::endl;
267 out << "Reward........" << step.reward() << std::endl;
268 out << "Observation..." << step.observation() << std::endl;
269 out << "Discount..... " << step.discount() << std::endl;
270 return out;
271}
272
273template <typename T>
274std::ostream &operator<<(std::ostream &out, const TimeStep<std::vector<T>> &step)
275{
276
277 out << "Step type....." << TimeStepEnumUtils::to_string(step.type()) << std::endl;
278 out << "Reward........" << step.reward() << std::endl;
279
280 auto obs = step.observation();
281
282 out << "Observation...";
284
285 out << "Discount..... " << step.discount() << std::endl;
286 return out;
287}
288
289} // namespace bitrl
290
291#endif // TIME_STEP_H
Forward declaration.
Definition time_step.h:22
TimeStep(TimeStepTp type, real_t reward, const state_type &obs, real_t discount_factor, std::unordered_map< std::string, std::any > &&extra)
TimeStep. Constructor.
Definition time_step.h:190
state_type observation() const
observation
Definition time_step.h:102
bool last() const noexcept
last
Definition time_step.h:90
const std::unordered_map< std::string, std::any > & info() const noexcept
info
Definition time_step.h:135
void clear() noexcept
clear
Definition time_step.h:237
TimeStep(TimeStepTp type, real_t reward, const state_type &obs, real_t discount_factor)
TimeStep. Constructor.
Definition time_step.h:177
TimeStep(TimeStep &&other) noexcept
TimeStep.
Definition time_step.h:217
bool done() const noexcept
done
Definition time_step.h:119
real_t reward() const noexcept
reward
Definition time_step.h:108
std::unordered_map< std::string, std::any > & info() noexcept
info
Definition time_step.h:141
TimeStep & operator=(TimeStep &&other) noexcept
operator =
Definition time_step.h:225
TimeStep(TimeStepTp type, real_t reward, const state_type &obs)
TimeStep. Constructor.
Definition time_step.h:184
TimeStep & operator=(const TimeStep &other)
Assignment operator.
Definition time_step.h:205
bool mid() const noexcept
mid
Definition time_step.h:84
StateTp state_type
state_ Type of the state
Definition time_step.h:27
real_t discount() const noexcept
discount. Returns the discount factor
Definition time_step.h:113
TimeStep(const TimeStep &other)
TimeStep.
Definition time_step.h:198
bool first() const noexcept
first
Definition time_step.h:78
const T & get_extra(std::string name) const
get_extra
Definition time_step.h:249
TimeStep()
TimeStep.
Definition time_step.h:171
TimeStepTp type() const noexcept
type
Definition time_step.h:96
std::ostream & print_vector(std::ostream &out, const std::vector< T > &obs)
Definition io_utils.h:23
Definition bitrl_consts.h:14
std::ostream & operator<<(std::ostream &out, const Null &)
Definition bitrl_types.h:165
double real_t
real_t
Definition bitrl_types.h:23
TimeStepTp
The TimeStepTp enum.
Definition time_step_type.h:16
static std::string to_string(TimeStepTp type)
Definition time_step_type.cpp:37