bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
random_tabular_policy.h
Go to the documentation of this file.
1#ifndef RANDOM_TABULAR_POLICY_H
2#define RANDOM_TABULAR_POLICY_H
3
5#include "cuberl/base/cubeai_config.h"
7
8
9#ifdef USE_PYTORCH
10#include <torch/torch.h>
11#endif
12
13#include <random>
14
15namespace cuberl {
16namespace rl {
17namespace policies {
18
23{
24
25public:
26
31
36
41
45 template<typename MatType>
46 output_type operator()(const MatType& q_map, uint_t state_idx)const;
47
48#ifdef USE_PYTORCH
49 output_type operator()(const torch_tensor_t& vec)const;
50#endif
51
57 template<typename VecTp>
58 output_type operator()(const VecTp& vec)const;
59
60
64 template<typename MatType>
65 output_type get_action(const MatType& q_map, uint_t state_idx);
66
72 template<typename VecTp>
73 output_type get_action(const VecTp& q_map);
74
79 void on_episode(uint_t)noexcept{}
80
84 void reset()noexcept{}
85
86private:
87
91 //std::random_device rd_;
92
96 mutable std::mt19937 generator_;
97};
98
99#ifdef USE_PYTORCH
100inline
102RandomTabularPolicy::operator()(const torch_tensor_t& vec)const{
103
104 auto vector = cuberl::utils::pytorch::TorchAdaptor::to_vector<real_t>(vec);
105 //std::discrete_distribution<int> distribution(vector.begin(), vector.end());
106 std::uniform_int_distribution<uint_t> distribution(0, vector.size()-1);
107 return distribution(generator_);
108
109}
110#endif
111
112template<typename VecTp>
114RandomTabularPolicy::operator()(const VecTp& vec)const{
115
116 //std::discrete_distribution<int> distribution(vec.begin(), vec.end());
117 std::uniform_int_distribution<uint_t> distribution(0, vec.size()-1);
118 return distribution(generator_);
119
120}
121
122template<typename VecTp>
125 std::uniform_int_distribution<uint_t> distribution(0, vec.size()-1);
126 return distribution(generator_);
127}
128
129}
130}
131}
132
133#endif // RANDOM_TABULAR_POLICY_H
class RandomTabularPolicy
Definition random_tabular_policy.h:23
RandomTabularPolicy(uint_t seed)
Constructor Initialize with a seed.
uint_t output_type
The type returned when calling this->operator()
Definition random_tabular_policy.h:30
output_type operator()(const MatType &q_map, uint_t state_idx) const
operator(). Given a
void reset() noexcept
Reset the policy.
Definition random_tabular_policy.h:84
void on_episode(uint_t) noexcept
any actions the policy should perform on the given episode index
Definition random_tabular_policy.h:79
output_type get_action(const MatType &q_map, uint_t state_idx)
get_action. Given a
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
Various utilities used when working with RL problems.
Definition cuberl_types.h:16