bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
utils.h
Go to the documentation of this file.
1#ifndef UTILS_H
2#define UTILS_H
3
8
11
12#include <vector>
13
14namespace cuberl{
15namespace rl::algos
16{
17
22 template<typename WorldTp>
23 auto state_actions_from_v(const WorldTp& env,
24 const DynVec<real_t>& v,
25 real_t gamma,
26 uint_t state) -> DynVec<real_t>{
27
28 auto q = DynVec<real_t>(env.n_actions());
29 std::for_each(q.begin(),
30 q.end(),
31 [](auto& item){item = 0.0;});
32
33
34 for(uint_t a=0; a < env.n_actions(); ++a){
35
36 const auto& transition_dyn = env.p(state, a);
37
38 for(auto& dyn: transition_dyn){
39 auto prob = std::get<0>(dyn);
40 auto next_state = std::get<1>(dyn);
41 auto reward = std::get<2>(dyn);
42 //auto done = std::get<3>(dyn);
43 q[a] += prob * (reward + gamma * v[next_state]);
44 }
45 }
46
47 return q;
48 }
49
50
59 template<typename T>
60 std::vector<T> create_discounts_array(T base, uint_t npoints)
61 {
62 std::vector<T> points(npoints, 0.0);
63 for(uint_t i=0; i<npoints; ++i){
64 points[i] = std::pow(base, i);
65 }
66 return points;
67 }
68
69
74 template<typename T>
75 std::vector<T>
76 calculate_discounted_return_vector(const std::vector<T>& rewards,
77 T gamma)
78 {
79 std::vector<T> returns(rewards.size(), 0.0);
80 for(uint_t t=0; t<rewards.size(); ++t){
81 returns[t] = std::pow(gamma, t)*rewards[t];
82 }
83
84 return returns;
85 }
86
91 template<typename TimeStepType, typename T>
92 std::vector<T>
93 calculate_discounted_return_vector(const std::vector<TimeStepType>& trajectory,
94 T gamma){
95
96 std::vector<T> rewards(trajectory.size());
97 for(uint_t t =0; t<trajectory.size(); ++t){
98 rewards[t] = std::pow(gamma, t)*trajectory[t].reward();
99 }
100
101 return rewards;
102
103 }
104
112 template<typename T>
113 T
114 calculate_discounted_return(const std::vector<T>& rewards, T gamma)
115 {
116 auto discounted_vector = calculate_discounted_return_vector(rewards, gamma);
117 return cuberl::maths::sum(discounted_vector);
118 }
119
127 template<typename T>
128 T
129 calculate_mean_discounted_return(const std::vector<T>& rewards, T gamma)
130 {
131 auto discounted_vector = calculate_discounted_return_vector(rewards, gamma);
132 return cuberl::maths::mean(discounted_vector);
133 }
134
138 template<typename TimeStepType, typename T>
139 T
140 calculate_discounted_return(const std::vector<TimeStepType>& trajectory, T gamma){
141
142 auto discounted_vector = calculate_discounted_return_vector(trajectory, gamma);
143 return cuberl::maths::sum(discounted_vector);
144 }
145
146 template<typename TimeStepType, typename T>
147 T
148 calculate_mean_discounted_return(const std::vector<TimeStepType>& trajectory, T gamma){
149 auto discounted_vector = calculate_discounted_return_vector(trajectory, gamma);
150 return cuberl::maths::mean(discounted_vector);
151
152 }
153
159 template<typename T>
160 std::vector<T>
161 calculate_step_discounted_return(const std::vector<T>& rewards, T gamma)
162 {
163 std::vector<T> discounted_returns(rewards.size());
164 for(uint_t t=0; t<rewards.size(); ++t){
165
166 T G = 0.0;
167 auto begin = rewards.begin();
168 // advance the iterator t positions
169 std::advance(begin, t);
170 auto counter = 0;
171 for(; begin != rewards.end(); ++begin){
172 G += std::pow(gamma, counter++) * (*begin);
173 }
174
175 discounted_returns[t] = G;
176 }
177
178 return discounted_returns;
179 }
180}
181}
182
183#endif // UTILS_H
double real_t
real_t
Definition bitrl_types.h:23
Eigen::RowVectorX< T > DynVec
Dynamically sized row vector.
Definition bitrl_types.h:74
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
real_t mean(IteratorType begin, IteratorType end, bool parallel=true)
mean Compute the mean value of the values in the provided iterator range
Definition vector_math.h:126
std::iterator_traits< IteratorType >::value_type sum(IteratorType begin, IteratorType end, bool parallel=true)
Definition vector_math.h:98
std::vector< T > create_discounts_array(T base, uint_t npoints)
create_discounts_array
Definition utils.h:60
T calculate_discounted_return(const std::vector< T > &rewards, T gamma)
calculate_discounted_return. Calculates the sum of the discounted rewards for the given rewards array...
Definition utils.h:114
auto state_actions_from_v(const WorldTp &env, const DynVec< real_t > &v, real_t gamma, uint_t state) -> DynVec< real_t >
Given the state index returns the list of actions under the provided value functions.
Definition utils.h:23
std::vector< T > calculate_step_discounted_return(const std::vector< T > &rewards, T gamma)
Given an array of rewards, for each entry calculate the following: $$G = \sum_{k=t+1}^T \gamma^{k-t-1...
Definition utils.h:161
std::vector< T > calculate_discounted_return_vector(const std::vector< T > &rewards, T gamma)
Create a vector where element i is the product $$\gamma^i * rewards[i]$$.
Definition utils.h:76
T calculate_mean_discounted_return(const std::vector< T > &rewards, T gamma)
calculate_mean_discounted_return. Same as calculate_discounted_return but the result is weighted by 1...
Definition utils.h:129
Various utilities used when working with RL problems.
Definition cuberl_types.h:16