The PPOConfig struct. Configuration for PPOSolver class. More...

#include <ppo_config.h>

Public Member Functions
std::ostream &	print (std::ostream &out) const
	print

void	load_from_json (const std::string &filename)
	Load the configuration from the given json file.

Public Attributes
real_t	gamma {0.99}
	Discount factor.

real_t	epsilon {0.01}
	The epsilon factor to use.

bool	clip_policy_grad {false}
	Flag indicating whether to clip the policy grad.

bool	clip_critic_grad {false}
	Flag indicating whether to clip the critic grad.

real_t	max_grad_norm_policy {1.0}
	The value to clip the gradient for the policy.

real_t	max_grad_norm_critic {1.0}
	The value to clip the gradient for the actor.

uint_t	n_episodes {100}
	Number of training episodes.

uint_t	max_itrs_per_episode {100}
	Number of iterations per episode.

uint_t	buffer_size {100}
	How large the experince buffer should be.

uint_t	max_passes_over_batch {4}

real_t	clip_epsilon {0.5}

bool	normalize_advantages {true}

DeviceType	device_type {DeviceType::CPU}

std::string	save_model_path {""}

Detailed Description

The PPOConfig struct. Configuration for PPOSolver class.

Member Function Documentation

◆ load_from_json()

void cuberl::rl::algos::pg::PPOConfig::load_from_json ( const std::string & filename )

Load the configuration from the given json file.

◆ print()

std::ostream & cuberl::rl::algos::pg::PPOConfig::print ( std::ostream & out ) const

print

Parameters

out

Returns

Member Data Documentation

◆ buffer_size

uint_t cuberl::rl::algos::pg::PPOConfig::buffer_size {100}

How large the experince buffer should be.

◆ clip_critic_grad

bool cuberl::rl::algos::pg::PPOConfig::clip_critic_grad {false}

Flag indicating whether to clip the critic grad.

◆ clip_epsilon

real_t cuberl::rl::algos::pg::PPOConfig::clip_epsilon {0.5}

◆ clip_policy_grad

bool cuberl::rl::algos::pg::PPOConfig::clip_policy_grad {false}

Flag indicating whether to clip the policy grad.

◆ device_type

DeviceType cuberl::rl::algos::pg::PPOConfig::device_type {DeviceType::CPU}

◆ epsilon

real_t cuberl::rl::algos::pg::PPOConfig::epsilon {0.01}

The epsilon factor to use.

◆ gamma

real_t cuberl::rl::algos::pg::PPOConfig::gamma {0.99}

Discount factor.

◆ max_grad_norm_critic

real_t cuberl::rl::algos::pg::PPOConfig::max_grad_norm_critic {1.0}

The value to clip the gradient for the actor.

◆ max_grad_norm_policy

real_t cuberl::rl::algos::pg::PPOConfig::max_grad_norm_policy {1.0}

The value to clip the gradient for the policy.

◆ max_itrs_per_episode

uint_t cuberl::rl::algos::pg::PPOConfig::max_itrs_per_episode {100}

Number of iterations per episode.

◆ max_passes_over_batch

uint_t cuberl::rl::algos::pg::PPOConfig::max_passes_over_batch {4}

◆ n_episodes

uint_t cuberl::rl::algos::pg::PPOConfig::n_episodes {100}

Number of training episodes.

◆ normalize_advantages

bool cuberl::rl::algos::pg::PPOConfig::normalize_advantages {true}

◆ save_model_path

std::string cuberl::rl::algos::pg::PPOConfig::save_model_path {""}

The documentation for this struct was generated from the following file:

libs/cuberl/include/cuberl/rl/algorithms/pg/ppo_config.h

Public Member Functions

Public Attributes