bitrl_cuberl_docs/reinforce__monitor_8h_source.html

#ifndef REINFORCE_MONITOR_H

#define REINFORCE_MONITOR_H


#include "cuberl/base/cubeai_config.h"


#ifdef USE_PYTORCH


#include "cuberl/base/cuberl_types.h"

#include "cuberl/data_structs/experience_buffer.h"


#include <vector>

#include <tuple>


namespace cuberl {

namespace rl {

namespace algos {

namespace pg {


template<typename ActionType, typename StateType>

struct ReinforceMonitor

{


    typedef StateType state_type;

    typedef ActionType action_type;

    typedef std::tuple<state_type, // the state observed

                       action_type,  // the action taken

                       real_t, // the reward received

                       bool, // done?

                       torch_tensor_t // log prob

                       > experience_tuple_type;


    typedef cuberl::containers::ExperienceBuffer<experience_tuple_type> experience_buffer_type;


    std::vector<real_t> rewards;

    std::vector<real_t> policy_loss_values;

    std::vector<uint_t> episode_duration;


    void reset()noexcept;


    template<typename T, uint_t index>

    std::vector<T>

    get(const std::vector<experience_tuple_type>& experience)const;


};


template<typename ActionType, typename StateType>

template<typename T, uint_t index>

std::vector<T>

ReinforceMonitor<ActionType, StateType>::get(const std::vector<experience_tuple_type>& experience)const{


    std::vector<T> result;

    result.reserve(experience.size());


    auto b = experience.begin();

    auto e = experience.end();


    for(; b != e; ++b){

        auto item = *b;

        result.push_back(std::get<index>(item));

    }


    return result;

}


template<typename ActionType, typename StateType>

void

ReinforceMonitor<ActionType, StateType>::reset()noexcept{


    policy_loss_values.clear();

    rewards.clear();

    episode_duration.clear();

}


}

}

}

}


#endif

#endif

cuberl_types.h

bitrl::real_t
double real_t
real_t
Definition bitrl_types.h:23

cuberl
Various utilities used when working with RL problems.
Definition cuberl_types.h:16

example::state_type
std::pair< uint_t, uint_t > state_type
Definition example_15.cpp:28