bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
envs_utils.h
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2024 <copyright holder> <email>
2// SPDX-License-Identifier: Apache-2.0
3
4#ifndef ENVS_UTILS_H
5#define ENVS_UTILS_H
6
7#include "bitrl/bitrl_config.h"
8#include "bitrl/bitrl_types.h"
9
10#include <vector>
11
12#ifdef BITRL_DEBUG
13#include <cassert>
14#endif
15
16namespace bitrl
17{
18namespace envs
19{
20
25template <typename EnvType, typename ActionSelector>
26std::vector<typename EnvType::time_step_type>
27create_trajectory(EnvType &env, ActionSelector &action_selector, uint_t max_steps)
28{
29
30#ifdef BITRL_DEBUG
31 assert(max_steps >= 1 && "Attempt to generate environment trajector with max_steps < 1");
32#endif
33
34 typedef typename EnvType::time_step_type time_step_type;
35
36 std::vector<time_step_type> trajectory;
37 trajectory.reserve(max_steps);
38
39 auto done = false;
40
41 while (!done)
42 {
43 auto time_step = env.reset();
44
45 // loop infinitely
46 uint_t counter = 0;
47 while (true)
48 {
49
50 // select the action given the observation
51 auto action = action_selector(time_step.observation());
52
53 auto new_time_step = env.step(action);
54 done = new_time_step.done();
55 trajectory.push_back(new_time_step);
56
57 if (new_time_step.done())
58 {
59 break;
60 }
61
62 if (counter >= max_steps - 1)
63 {
64 trajectory.clear();
65 break;
66 }
67
68 time_step = new_time_step;
69 }
70 }
71
72 return trajectory;
73}
74
75} // namespace envs
76} // namespace bitrl
77
78#endif // ENVS_UTILS_H
std::vector< typename EnvType::time_step_type > create_trajectory(EnvType &env, ActionSelector &action_selector, uint_t max_steps)
Definition envs_utils.h:27
Definition bitrl_consts.h:14
std::size_t uint_t
uint_t
Definition bitrl_types.h:43