1#ifndef REINFORCE_LOSS_H
2#define REINFORCE_LOSS_H
4#include "cuberl/base/cubeai_config.h"
23std::vector<torch_tensor_t>
24compute_loss_item(
const std::vector<real_t>& rewards,
25 const std::vector<torch_tensor_t>& log_probs);
32compute_baseline_with_constant(
const std::vector<real_t>& rewards,
41compute_baseline_with_mean(
const std::vector<real_t>& rewards);
48compute_baseline_with_standardization(
const std::vector<real_t>& rewards,
const real_t TOLERANCE
Tolerance used around the library.
Definition bitrl_consts.h:31
double real_t
real_t
Definition bitrl_types.h:23
Various utilities used when working with RL problems.
Definition cuberl_types.h:16