bitrl_cuberl_docs/utils_8h_source.html

#ifndef UTILS_H

#define UTILS_H


#include "cuberl/base/cuberl_types.h"

#include "cuberl/maths/vector_math.h"


#include <vector>


namespace cuberl{

namespace rl::algos

{


    template<typename WorldTp>


    auto state_actions_from_v(const WorldTp& env,

                              const DynVec<real_t>& v,

                              real_t gamma,

                              uint_t state) -> DynVec<real_t>{


        auto q = DynVec<real_t>(env.n_actions());

        std::for_each(q.begin(),

                      q.end(),

                      [](auto& item){item = 0.0;});


        for(uint_t a=0; a < env.n_actions(); ++a){


            const auto& transition_dyn = env.p(state, a);


            for(auto& dyn: transition_dyn){

                auto prob = std::get<0>(dyn);

                auto next_state = std::get<1>(dyn);

                auto reward = std::get<2>(dyn);

                //auto done = std::get<3>(dyn);

                q[a] += prob * (reward + gamma * v[next_state]);

            }

        }


        return q;

    }


    template<typename T>


    std::vector<T> create_discounts_array(T base, uint_t npoints)

    {

        std::vector<T> points(npoints, 0.0);

        for(uint_t i=0; i<npoints; ++i){

            points[i] = std::pow(base, i);

        }

        return points;

    }


    template<typename T>

    std::vector<T>


    calculate_discounted_return_vector(const std::vector<T>& rewards,

                                       T gamma)

    {

        std::vector<T> returns(rewards.size(), 0.0);

        for(uint_t t=0; t<rewards.size(); ++t){

            returns[t] = std::pow(gamma, t)*rewards[t];

        }


        return returns;

    }


    template<typename TimeStepType, typename T>

    std::vector<T>


    calculate_discounted_return_vector(const std::vector<TimeStepType>& trajectory,

                                       T gamma){


        std::vector<T> rewards(trajectory.size());

        for(uint_t t =0; t<trajectory.size(); ++t){

            rewards[t] = std::pow(gamma, t)*trajectory[t].reward();

        }


        return rewards;


    }


    template<typename T>

    T


    calculate_discounted_return(const std::vector<T>& rewards, T gamma)

    {

        auto discounted_vector = calculate_discounted_return_vector(rewards, gamma);

        return cuberl::maths::sum(discounted_vector);

    }


    template<typename T>

    T


    calculate_mean_discounted_return(const std::vector<T>& rewards, T gamma)

    {

        auto discounted_vector = calculate_discounted_return_vector(rewards, gamma);

        return cuberl::maths::mean(discounted_vector);

    }


    template<typename TimeStepType, typename T>

    T


    calculate_discounted_return(const std::vector<TimeStepType>& trajectory, T gamma){


        auto discounted_vector = calculate_discounted_return_vector(trajectory, gamma);

        return cuberl::maths::sum(discounted_vector);

    }


    template<typename TimeStepType, typename T>

    T


    calculate_mean_discounted_return(const std::vector<TimeStepType>& trajectory, T gamma){

        auto discounted_vector = calculate_discounted_return_vector(trajectory, gamma);

        return cuberl::maths::mean(discounted_vector);


    }


    template<typename T>

    std::vector<T>


    calculate_step_discounted_return(const std::vector<T>& rewards, T gamma)

    {

        std::vector<T> discounted_returns(rewards.size());

        for(uint_t t=0; t<rewards.size(); ++t){


            T G = 0.0;

            auto begin = rewards.begin();

            // advance the iterator t positions

            std::advance(begin, t);

            auto counter = 0;

            for(; begin != rewards.end(); ++begin){

                G += std::pow(gamma, counter++) * (*begin);

            }


            discounted_returns[t] = G;

        }


        return discounted_returns;

    }


}

}


#endif // UTILS_H

cuberl_types.h

bitrl::real_t
double real_t
real_t
Definition bitrl_types.h:23

bitrl::DynVec
Eigen::RowVectorX< T > DynVec
Dynamically sized row vector.
Definition bitrl_types.h:74

bitrl::uint_t
std::size_t uint_t
uint_t
Definition bitrl_types.h:43

cuberl::maths::mean
real_t mean(IteratorType begin, IteratorType end, bool parallel=true)
mean Compute the mean value of the values in the provided iterator range
Definition vector_math.h:126

cuberl::maths::sum
std::iterator_traits< IteratorType >::value_type sum(IteratorType begin, IteratorType end, bool parallel=true)
Definition vector_math.h:98

cuberl::rl::algos::create_discounts_array
std::vector< T > create_discounts_array(T base, uint_t npoints)
create_discounts_array
Definition utils.h:60

cuberl::rl::algos::calculate_discounted_return
T calculate_discounted_return(const std::vector< T > &rewards, T gamma)
calculate_discounted_return. Calculates the sum of the discounted rewards for the given rewards array...
Definition utils.h:114

cuberl::rl::algos::state_actions_from_v
auto state_actions_from_v(const WorldTp &env, const DynVec< real_t > &v, real_t gamma, uint_t state) -> DynVec< real_t >
Given the state index returns the list of actions under the provided value functions.
Definition utils.h:23

cuberl::rl::algos::calculate_step_discounted_return
std::vector< T > calculate_step_discounted_return(const std::vector< T > &rewards, T gamma)
Given an array of rewards, for each entry calculate the following: $$G = \sum_{k=t+1}^T \gamma^{k-t-1...
Definition utils.h:161

cuberl::rl::algos::calculate_discounted_return_vector
std::vector< T > calculate_discounted_return_vector(const std::vector< T > &rewards, T gamma)
Create a vector where element i is the product $$\gamma^i * rewards[i]$$.
Definition utils.h:76

cuberl::rl::algos::calculate_mean_discounted_return
T calculate_mean_discounted_return(const std::vector< T > &rewards, T gamma)
calculate_mean_discounted_return. Same as calculate_discounted_return but the result is weighted by 1...
Definition utils.h:129

cuberl
Various utilities used when working with RL problems.
Definition cuberl_types.h:16

vector_math.h