bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
vector_time_step.h
Go to the documentation of this file.
1#ifndef VECTOR_TIME_STEP_H
2#define VECTOR_TIME_STEP_H
3
4#include "bitrl/bitrl_types.h"
6#include "bitrl/extern/nlohmann/json/json.hpp"
7
8#include <algorithm>
9#include <any>
10#include <ostream>
11#include <unordered_map>
12#include <vector>
13
14namespace bitrl
15{
16
20template <typename StateType> class VectorTimeStep
21{
22
23 public:
27 typedef StateType state_type;
28
32 VectorTimeStep() = default;
33
37 VectorTimeStep(const std::vector<TimeStepTp> &types, const std::vector<real_t> &rewards,
38 const std::vector<state_type> &obs);
39
43 VectorTimeStep(const std::vector<TimeStepTp> &types, const std::vector<real_t> &rewards,
44 const std::vector<state_type> &obs, const std::vector<real_t> &discount_factors);
45
46 VectorTimeStep(const std::vector<TimeStepTp> &types, const std::vector<real_t> &rewards,
47 const std::vector<state_type> &obs, const std::vector<real_t> &discount_factors,
48 std::unordered_map<std::string, std::any> &&extra);
49
55
60
65 VectorTimeStep(VectorTimeStep &&other) noexcept;
66
73
78 const std::vector<TimeStepTp> &types() const noexcept { return types_; }
79
84 const std::vector<state_type> &observations() const { return obs_; }
85
90 const std::vector<real_t> &rewards() const noexcept { return rewards_; }
91
95 real_t reward() const noexcept;
96
100 bool done() const noexcept;
101
105 const std::vector<real_t> &discounts() const noexcept { return discounts_; }
106
111 bool last() const noexcept;
112
113 private:
117 std::vector<TimeStepTp> types_;
118
122 std::vector<real_t> rewards_;
123
127 std::vector<state_type> obs_;
128
132 std::vector<real_t> discounts_;
133
137 std::unordered_map<std::string, std::any> extra_;
138};
139
140template <typename StateType>
141VectorTimeStep<StateType>::VectorTimeStep(const std::vector<TimeStepTp> &types,
142 const std::vector<real_t> &rewards,
143 const std::vector<state_type> &obs,
144 const std::vector<real_t> &discount_factors,
145 std::unordered_map<std::string, std::any> &&extra)
146 : types_(types), rewards_(rewards), obs_(obs), discounts_(discount_factors), extra_(extra)
147{
148}
149
150template <typename StateType>
152 const std::vector<real_t> &rewards,
153 const std::vector<state_type> &obs,
154 const std::vector<real_t> &discount_factors)
155 : types_(types), rewards_(rewards), obs_(obs), discounts_(discount_factors)
156{
157}
158
159template <typename StateType>
160VectorTimeStep<StateType>::VectorTimeStep(const std::vector<TimeStepTp> &types,
161 const std::vector<real_t> &rewards,
162 const std::vector<state_type> &obs)
163 : VectorTimeStep<StateType>(types, rewards, obs, std::vector<real_t>())
164{
165}
166
167template <typename StateType>
169 : types_(other.types_), rewards_(other.rewards_), obs_(other.obs_),
170 discounts_(other.discounts_), extra_(other.extra_)
171{
172}
173
174template <typename StateType>
177{
178
179 types_ = other.types_;
180 rewards_ = other.rewards_;
181 obs_ = other.obs_;
182 discounts_ = other.discounts_;
183 extra_ = other.extra_;
184 return *this;
185}
186
187template <typename StateType>
189 : types_(other.types_), rewards_(other.rewards_), obs_(other.obs_),
190 discounts_(other.discounts_), extra_(other.extra_)
191{
192 // other.clear();
193}
194
195template <typename StateType>
198{
199
200 types_ = other.types_;
201 rewards_ = other.rewards_;
202 obs_ = other.obs_;
203 discounts_ = other.discounts_;
204 extra_ = other.extra_;
205 // other.clear();
206 return *this;
207}
208
209template <typename StateType> real_t VectorTimeStep<StateType>::reward() const noexcept
210{
211
212 auto sum_ = 0.0;
213 sum_ = std::accumulate(rewards_.begin(), rewards_.end(), sum_);
214 return sum_;
215}
216
217template <typename StateType> bool VectorTimeStep<StateType>::done() const noexcept
218{
219 auto done_ = false;
220
221 for (auto step_type : types_)
222 {
223 if (step_type == TimeStepTp::LAST)
224 {
225 done_ = true;
226 break;
227 }
228 }
229
230 return done_;
231}
232
233template <typename StateType> bool VectorTimeStep<StateType>::last() const noexcept
234{
235 return done();
236}
237
238template <typename StateTp>
239inline std::ostream &operator<<(std::ostream &out, const VectorTimeStep<StateTp> &step)
240{
241
242 using json = nlohmann::json;
243 json j;
244
245 auto types = step.types();
246 std::vector<std::string> step_to_str(types.size());
247
248 for (uint_t i = 0; i < step_to_str.size(); ++i)
249 {
250 step_to_str[i] = TimeStepEnumUtils::to_string(types[i]);
251 }
252
253 j["step_types"] = step_to_str;
254 j["rewards"] = step.rewards();
255 j["observations"] = step.observations();
256 j["discounts"] = step.discounts();
257
258 out << j << std::endl;
259 return out;
260}
261
262} // namespace bitrl
263
264#endif // VECTOR_TIME_STEP_H
Forward declaration.
Definition vector_time_step.h:21
VectorTimeStep & operator=(const VectorTimeStep &other)
Assignment operator.
Definition vector_time_step.h:176
const std::vector< state_type > & observations() const
observation
Definition vector_time_step.h:84
const std::vector< real_t > & rewards() const noexcept
reward
Definition vector_time_step.h:90
const std::vector< real_t > & discounts() const noexcept
discount. Returns the discount factor
Definition vector_time_step.h:105
VectorTimeStep(const std::vector< TimeStepTp > &types, const std::vector< real_t > &rewards, const std::vector< state_type > &obs, const std::vector< real_t > &discount_factors)
VectorTimeStep. Constructor.
Definition vector_time_step.h:151
VectorTimeStep(const VectorTimeStep &other)
TimeStep.
Definition vector_time_step.h:168
bool last() const noexcept
last
Definition vector_time_step.h:233
VectorTimeStep & operator=(VectorTimeStep &&other) noexcept
operator =
Definition vector_time_step.h:197
StateType state_type
state_ Type of the state
Definition vector_time_step.h:27
real_t reward() const noexcept
Returns the sum of the rewards received.
Definition vector_time_step.h:209
VectorTimeStep(const std::vector< TimeStepTp > &types, const std::vector< real_t > &rewards, const std::vector< state_type > &obs, const std::vector< real_t > &discount_factors, std::unordered_map< std::string, std::any > &&extra)
Definition vector_time_step.h:141
bool done() const noexcept
Returns true if any time step is LAST.
Definition vector_time_step.h:217
VectorTimeStep(VectorTimeStep &&other) noexcept
TimeStep.
Definition vector_time_step.h:188
const std::vector< TimeStepTp > & types() const noexcept
type
Definition vector_time_step.h:78
VectorTimeStep()=default
Default construcotr.
VectorTimeStep(const std::vector< TimeStepTp > &types, const std::vector< real_t > &rewards, const std::vector< state_type > &obs)
VectorTimeStep. Constructor.
Definition vector_time_step.h:160
Definition bitrl_consts.h:14
std::ostream & operator<<(std::ostream &out, const Null &)
Definition bitrl_types.h:165
double real_t
real_t
Definition bitrl_types.h:23
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
TimeStepTp
The TimeStepTp enum.
Definition time_step_type.h:16
static std::string to_string(TimeStepTp type)
Definition time_step_type.cpp:37