bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
cuberl::rl::algos Namespace Reference

Namespaces

namespace  dp
 
namespace  mc
 
namespace  pg
 
namespace  td
 

Classes

struct  RLAlgoConfig
 The RLAlgoConfig struct. More...
 
class  RLSolverBase
 RLAlgoBase. Base class for RL algorithms. More...
 

Functions

template<typename WorldTp >
auto state_actions_from_v (const WorldTp &env, const DynVec< real_t > &v, real_t gamma, uint_t state) -> DynVec< real_t >
 Given the state index returns the list of actions under the provided value functions.
 
template<typename T >
std::vector< T > create_discounts_array (T base, uint_t npoints)
 create_discounts_array
 
template<typename T >
std::vector< T > calculate_discounted_return_vector (const std::vector< T > &rewards, T gamma)
 Create a vector where element i is the product $$\gamma^i * rewards[i]$$.
 
template<typename TimeStepType , typename T >
std::vector< T > calculate_discounted_return_vector (const std::vector< TimeStepType > &trajectory, T gamma)
 calculate_discounted_return_vector. Creates the discounted return vector for the given trajectory
 
template<typename T >
calculate_discounted_return (const std::vector< T > &rewards, T gamma)
 calculate_discounted_return. Calculates the sum of the discounted rewards for the given rewards array using the given gamma
 
template<typename T >
calculate_mean_discounted_return (const std::vector< T > &rewards, T gamma)
 calculate_mean_discounted_return. Same as calculate_discounted_return but the result is weighted by 1/N where N is the size of the given rewards array
 
template<typename TimeStepType , typename T >
calculate_discounted_return (const std::vector< TimeStepType > &trajectory, T gamma)
 Calculate the discounted return from the given trajectory.
 
template<typename TimeStepType , typename T >
calculate_mean_discounted_return (const std::vector< TimeStepType > &trajectory, T gamma)
 
template<typename T >
std::vector< T > calculate_step_discounted_return (const std::vector< T > &rewards, T gamma)
 Given an array of rewards, for each entry calculate the following: $$G = \sum_{k=t+1}^T \gamma^{k-t-1}R_k$$.
 

Function Documentation

◆ calculate_discounted_return() [1/2]

template<typename T >
T cuberl::rl::algos::calculate_discounted_return ( const std::vector< T > &  rewards,
gamma 
)

calculate_discounted_return. Calculates the sum of the discounted rewards for the given rewards array using the given gamma

Parameters
rewards
gamma
Returns

◆ calculate_discounted_return() [2/2]

template<typename TimeStepType , typename T >
T cuberl::rl::algos::calculate_discounted_return ( const std::vector< TimeStepType > &  trajectory,
gamma 
)

Calculate the discounted return from the given trajectory.

◆ calculate_discounted_return_vector() [1/2]

template<typename T >
std::vector< T > cuberl::rl::algos::calculate_discounted_return_vector ( const std::vector< T > &  rewards,
gamma 
)

Create a vector where element i is the product $$\gamma^i * rewards[i]$$.

◆ calculate_discounted_return_vector() [2/2]

template<typename TimeStepType , typename T >
std::vector< T > cuberl::rl::algos::calculate_discounted_return_vector ( const std::vector< TimeStepType > &  trajectory,
gamma 
)

calculate_discounted_return_vector. Creates the discounted return vector for the given trajectory

◆ calculate_mean_discounted_return() [1/2]

template<typename T >
T cuberl::rl::algos::calculate_mean_discounted_return ( const std::vector< T > &  rewards,
gamma 
)

calculate_mean_discounted_return. Same as calculate_discounted_return but the result is weighted by 1/N where N is the size of the given rewards array

Parameters
rewards
gamma
Returns

◆ calculate_mean_discounted_return() [2/2]

template<typename TimeStepType , typename T >
T cuberl::rl::algos::calculate_mean_discounted_return ( const std::vector< TimeStepType > &  trajectory,
gamma 
)

◆ calculate_step_discounted_return()

template<typename T >
std::vector< T > cuberl::rl::algos::calculate_step_discounted_return ( const std::vector< T > &  rewards,
gamma 
)

Given an array of rewards, for each entry calculate the following: $$G = \sum_{k=t+1}^T \gamma^{k-t-1}R_k$$.

◆ create_discounts_array()

template<typename T >
std::vector< T > cuberl::rl::algos::create_discounts_array ( base,
uint_t  npoints 
)

create_discounts_array

Parameters
end
base
start
endpoint
Returns

◆ state_actions_from_v()

template<typename WorldTp >
auto cuberl::rl::algos::state_actions_from_v ( const WorldTp &  env,
const DynVec< real_t > &  v,
real_t  gamma,
uint_t  state 
) -> DynVec<real_t>

Given the state index returns the list of actions under the provided value functions.