95 std::ostream&
print(std::ostream& out)
const;
106 return opts.
print(out);
double real_t
real_t
Definition bitrl_types.h:23
std::size_t uint_t
uint_t
Definition bitrl_types.h:43
DeviceType
Enumeration of various device types.
Definition bitrl_types.h:159
std::ostream & operator<<(std::ostream &out, const A2CConfig &opts)
Definition a2c_config.h:115
Various utilities used when working with RL problems.
Definition cuberl_types.h:16
The PPOConfig struct. Configuration for PPOSolver class.
Definition ppo_config.h:23
std::ostream & print(std::ostream &out) const
print
DeviceType device_type
Definition ppo_config.h:83
void load_from_json(const std::string &filename)
Load the configuration from the given json file.
uint_t buffer_size
How large the experince buffer should be.
Definition ppo_config.h:68
real_t max_grad_norm_policy
The value to clip the gradient for the policy.
Definition ppo_config.h:48
std::string save_model_path
Definition ppo_config.h:88
uint_t max_itrs_per_episode
Number of iterations per episode.
Definition ppo_config.h:63
bool clip_critic_grad
Flag indicating whether to clip the critic grad.
Definition ppo_config.h:43
real_t gamma
Discount factor.
Definition ppo_config.h:28
uint_t max_passes_over_batch
Definition ppo_config.h:70
uint_t n_episodes
Number of training episodes.
Definition ppo_config.h:58
bool clip_policy_grad
Flag indicating whether to clip the policy grad.
Definition ppo_config.h:38
real_t clip_epsilon
Definition ppo_config.h:73
bool normalize_advantages
Definition ppo_config.h:78
real_t epsilon
The epsilon factor to use.
Definition ppo_config.h:33
real_t max_grad_norm_critic
The value to clip the gradient for the actor.
Definition ppo_config.h:53