bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
a2c_monitor.h
Go to the documentation of this file.
1#ifndef A2C_MONITOR_H
2#define A2C_MONITOR_H
3
4#include "cuberl/base/cubeai_config.h"
5
6#ifdef USE_PYTORCH
7
9#include "cuberl/data_structs/experience_buffer.h"
10
11
12#include <vector>
13#include <tuple>
14
15namespace cuberl {
16namespace rl {
17namespace algos {
18namespace pg {
19
20template<typename ActionType, typename StateType>
21struct A2CMonitor
22{
23
24 typedef StateType state_type;
25 typedef ActionType action_type;
26 typedef std::tuple<state_type, // the state observed
27 action_type, // the action taken
28 real_t, // the reward received
29 bool, // done?
30 torch_tensor_t, // log prob
31 torch_tensor_t // critic values
32 > experience_tuple_type;
33
34 typedef cuberl::containers::ExperienceBuffer<experience_tuple_type> experience_buffer_type;
35
37 std::vector<real_t> rewards;
38 std::vector<real_t> policy_loss_values;
39 std::vector<real_t> critic_loss_values;
40 std::vector<uint_t> episode_duration;
41
42
43 void reset()noexcept;
44
45 template<typename T, uint_t index>
46 std::vector<T>
47 get(const std::vector<experience_tuple_type>& experience)const;
48
49};
50
51
52template<typename ActionType, typename StateType>
53template<typename T, uint_t index>
54std::vector<T>
55A2CMonitor<ActionType, StateType>::get(const std::vector<experience_tuple_type>& experience)const{
56
57 std::vector<T> result;
58 result.reserve(experience.size());
59
60 auto b = experience.begin();
61 auto e = experience.end();
62
63 for(; b != e; ++b){
64 auto item = *b;
65 result.push_back(std::get<index>(item));
66 }
67
68 return result;
69}
70
71template<typename ActionType, typename StateType>
72void
73A2CMonitor<ActionType, StateType>::reset()noexcept{
74
75 policy_loss_values.clear();
76 rewards.clear();
77 episode_duration.clear();
78}
79
80}
81}
82}
83}
84
85
86
87
88#endif
89#endif
double real_t
real_t
Definition bitrl_types.h:23
Various utilities used when working with RL problems.
Definition cuberl_types.h:16
std::pair< uint_t, uint_t > state_type
Definition example_15.cpp:28