22 template<
typename WorldTp>
29 std::for_each(q.begin(),
31 [](
auto& item){item = 0.0;});
34 for(
uint_t a=0; a < env.n_actions(); ++a){
36 const auto& transition_dyn = env.p(state, a);
38 for(
auto& dyn: transition_dyn){
39 auto prob = std::get<0>(dyn);
40 auto next_state = std::get<1>(dyn);
41 auto reward = std::get<2>(dyn);
43 q[a] += prob * (reward + gamma * v[next_state]);
62 std::vector<T> points(npoints, 0.0);
63 for(
uint_t i=0; i<npoints; ++i){
64 points[i] = std::pow(base, i);
79 std::vector<T> returns(rewards.size(), 0.0);
80 for(
uint_t t=0; t<rewards.size(); ++t){
81 returns[t] = std::pow(gamma, t)*rewards[t];
91 template<
typename TimeStepType,
typename T>
96 std::vector<T> rewards(trajectory.size());
97 for(
uint_t t =0; t<trajectory.size(); ++t){
98 rewards[t] = std::pow(gamma, t)*trajectory[t].reward();
138 template<
typename TimeStepType,
typename T>
146 template<
typename TimeStepType,
typename T>
163 std::vector<T> discounted_returns(rewards.size());
164 for(
uint_t t=0; t<rewards.size(); ++t){
167 auto begin = rewards.begin();
169 std::advance(begin, t);
171 for(; begin != rewards.end(); ++begin){
172 G += std::pow(gamma, counter++) * (*begin);
175 discounted_returns[t] = G;
178 return discounted_returns;
double real_t
real_t
Definition bitrl_types.h:23
Eigen::RowVectorX< T > DynVec
Dynamically sized row vector.
Definition bitrl_types.h:74
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
real_t mean(IteratorType begin, IteratorType end, bool parallel=true)
mean Compute the mean value of the values in the provided iterator range
Definition vector_math.h:126
std::iterator_traits< IteratorType >::value_type sum(IteratorType begin, IteratorType end, bool parallel=true)
Definition vector_math.h:98
std::vector< T > create_discounts_array(T base, uint_t npoints)
create_discounts_array
Definition utils.h:60
T calculate_discounted_return(const std::vector< T > &rewards, T gamma)
calculate_discounted_return. Calculates the sum of the discounted rewards for the given rewards array...
Definition utils.h:114
auto state_actions_from_v(const WorldTp &env, const DynVec< real_t > &v, real_t gamma, uint_t state) -> DynVec< real_t >
Given the state index returns the list of actions under the provided value functions.
Definition utils.h:23
std::vector< T > calculate_step_discounted_return(const std::vector< T > &rewards, T gamma)
Given an array of rewards, for each entry calculate the following: $$G = \sum_{k=t+1}^T \gamma^{k-t-1...
Definition utils.h:161
std::vector< T > calculate_discounted_return_vector(const std::vector< T > &rewards, T gamma)
Create a vector where element i is the product $$\gamma^i * rewards[i]$$.
Definition utils.h:76
T calculate_mean_discounted_return(const std::vector< T > &rewards, T gamma)
calculate_mean_discounted_return. Same as calculate_discounted_return but the result is weighted by 1...
Definition utils.h:129
Various utilities used when working with RL problems.
Definition cuberl_types.h:16