bitrl_cuberl_docs/softmax__policy_8h_source.html

#ifndef SOFTMAX_POLICY_H

#define SOFTMAX_POLICY_H


#include "cuberl/base/cuberl_types.h"

#include "cuberl/rl/policies/max_tabular_policy.h"

#include "cuberl/maths/vector_math.h"


namespace cuberl {

namespace rl {

namespace policies {


class MaxTabularSoftmaxPolicy

{

public:


    typedef uint_t output_type;


    MaxTabularSoftmaxPolicy(real_t tau=1.0);


    template<typename MatType>

    output_type operator()(const MatType& q_map, uint_t state_idx)const;


    template<typename VecTp>

    output_type operator()(const VecTp& q_map)const;


    void on_episode(uint_t)noexcept{}


    void reset()noexcept{}


private:


    real_t tau_;


    MaxTabularPolicy max_policy_;


};


inline


MaxTabularSoftmaxPolicy::MaxTabularSoftmaxPolicy(real_t tau)

:

tau_(tau)

{}


template<typename VecTp>

MaxTabularSoftmaxPolicy::output_type


MaxTabularSoftmaxPolicy::operator()(const VecTp& q_map)const{


    auto softmax_vec = maths::softmax_vec(q_map.begin(), q_map.end(), tau_);

    return max_policy_.get_action(softmax_vec);

}


}

}

}


#endif // SOFTMAX_POLICY_H

cuberl::rl::policies::MaxTabularPolicy
class MaxTabularPolicy
Definition max_tabular_policy.h:30

cuberl::rl::policies::MaxTabularPolicy::get_action
static output_type get_action(const MatType &q_map, uint_t state_idx)
get_action. Given a

cuberl::rl::policies::MaxTabularSoftmaxPolicy
Definition softmax_policy.h:18

cuberl::rl::policies::MaxTabularSoftmaxPolicy::on_episode
void on_episode(uint_t) noexcept
any actions the policy should perform on the given episode index
Definition softmax_policy.h:49

cuberl::rl::policies::MaxTabularSoftmaxPolicy::reset
void reset() noexcept
Reset the policy.
Definition softmax_policy.h:54

cuberl::rl::policies::MaxTabularSoftmaxPolicy::MaxTabularSoftmaxPolicy
MaxTabularSoftmaxPolicy(real_t tau=1.0)
Constructor.
Definition softmax_policy.h:68

cuberl::rl::policies::MaxTabularSoftmaxPolicy::output_type
uint_t output_type
The output type of operator()
Definition softmax_policy.h:24

cuberl::rl::policies::MaxTabularSoftmaxPolicy::operator()
output_type operator()(const MatType &q_map, uint_t state_idx) const
operator(). Given a

cuberl_types.h

max_tabular_policy.h

bitrl::real_t
double real_t
real_t
Definition bitrl_types.h:23

bitrl::uint_t
std::size_t uint_t
uint_t
Definition bitrl_types.h:43

cuberl::maths::softmax_vec
std::vector< T > softmax_vec(const std::vector< T > &vec, real_t tau=1.0)
applies softmax operation to the elements of the vector and returns a vector with the result
Definition vector_math.h:342

cuberl
Various utilities used when working with RL problems.
Definition cuberl_types.h:16

vector_math.h