bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
policy_stochastic_adaptor.h
Go to the documentation of this file.
1#ifndef STOCHASTIC_ADAPTOR_POLICY_H
2#define STOCHASTIC_ADAPTOR_POLICY_H
3
6#include "cuberl/base/cubeai_config.h"
7
8#ifdef CUBERL_DEBUG
9#include <cassert>
10#endif
11
12#include <memory>
13#include <any>
14#include <map>
15
16namespace cuberl {
17namespace rl {
18namespace policies {
19
20class DiscretePolicyBase;
21
25template<typename PolicyType>
26class StochasticAdaptorPolicy/*: public DiscretePolicyAdaptorBase*/
27{
28
29public:
30
31 typedef PolicyType policy_type;
32
37 StochasticAdaptorPolicy(uint_t state_space_size, uint_t action_space_size,
38 policy_type& policy);
39
44
50 virtual policy_type& operator()(const std::map<std::string, std::any>& options);
51
52private:
53
57 uint_t state_space_size_;
58
62 uint_t action_space_size_;
63
67 policy_type& policy_;
68};
69
70template<typename PolicyType>
72 uint_t action_space_size, policy_type& policy)
73 :
74 //DiscretePolicyAdaptorBase(),
75 state_space_size_(state_space_size),
76 action_space_size_(action_space_size),
77 policy_(policy)
78{}
79
80
81
82template<typename PolicyType>
84StochasticAdaptorPolicy<PolicyType>::operator()(const std::map<std::string, std::any>& options){
85
86 auto state = std::any_cast<uint_t>(options.find("state")->second);
87 auto state_actions = std::any_cast<DynVec<real_t>>(options.find("state_actions")->second);
88 auto best_actions = maths::max_indices(state_actions);
89
90#ifdef CUBERL_DEBUG
91 assert(best_actions.size() <= action_space_size_ && "Incompatible number of best actions. Cannot exccedd the action space size");
92#endif
93
94 std::vector<std::pair<uint_t, real_t>> best_actions_vals(best_actions.size());
95
96 for(uint_t i=0; i<best_actions.size(); ++i){
97 best_actions_vals[i] = {best_actions[i], 1.0/best_actions.size()};
98 }
99
100 auto& state_action_vals = this->policy_.state_actions_values();
101
102 auto& view = state_action_vals[state];
103
104 //collect all the actions in a map
105 auto act_val_map = std::unordered_map<uint_t, real_t>();
106
107 for(uint_t a=0; a<best_actions_vals.size(); ++a){
108 act_val_map.insert({best_actions_vals[a].first, best_actions_vals[a].second});
109 }
110
111 for(uint_t a=0; a<view.size(); ++a){
112 auto action = view[a].first;
113
114 if(act_val_map.contains(action)){
115 view[a].second = act_val_map[action];
116 }
117 else{
118 view[a].second = 0.0;
119 }
120 }
121
122 return this->policy_;
123}
124
125
126
127
128}
129
130}
131
132}
133
134#endif // STOCHASTIC_ADAPTOR_POLICY_H
The StochasticAdaptorPolicy class.
Definition policy_stochastic_adaptor.h:27
virtual policy_type & operator()(const std::map< std::string, std::any > &options)
operator ()
Definition policy_stochastic_adaptor.h:84
StochasticAdaptorPolicy(uint_t state_space_size, uint_t action_space_size, policy_type &policy)
StochasticAdaptorPolicy.
Definition policy_stochastic_adaptor.h:71
PolicyType policy_type
Definition policy_stochastic_adaptor.h:31
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
std::vector< uint_t > max_indices(const DynVec< T > &vec)
Definition vector_math.h:432
Various utilities used when working with RL problems.
Definition cuberl_types.h:16