bitrl & cuberl Documentation
Simulation engine for reinforcement learning agents
Loading...
Searching...
No Matches
exe Namespace Reference

Functions

DynMat< real_t > create_transition_matrix ()
 
DynMat< real_t > compute_matrix_power (const DynMat< real_t > &mat, uint_t power)
 
void print_matrix (const DynMat< real_t > &mat)
 
real_t get_reward (real_t prob, uint_t n=10)
 
void update_record (std::vector< std::vector< real_t > > &records, uint_t action, real_t r)
 
uint_t get_best_arm (const std::vector< std::vector< real_t > > &records)
 
std::vector< real_t > get_probs (uint_t n)
 
DynVec< real_t > extract_part (const std::vector< std::vector< real_t > > &values)
 

Variables

const uint_t N = 10
 
const auto N_EXPERIMENTS = 500
 
const auto TAU = 0.7
 
const uint SEED = 42
 

Detailed Description

Solve the multi-arm bandit problem using soft-max policy. When using a soft-max policy policy we get a distribution of probabilities over the actions. We select the action with the highest probability. For this example we will solve a 10-armed bandit problem, so N=10.

This example is taken from the book: Reinforcement Learning in Action by Manning Publications.

Function Documentation

◆ compute_matrix_power()

DynMat< real_t > exe::compute_matrix_power ( const DynMat< real_t > &  mat,
uint_t  power 
)

◆ create_transition_matrix()

DynMat< real_t > exe::create_transition_matrix ( )

◆ extract_part()

DynVec< real_t > exe::extract_part ( const std::vector< std::vector< real_t > > &  values)

◆ get_best_arm()

uint_t exe::get_best_arm ( const std::vector< std::vector< real_t > > &  records)

◆ get_probs()

std::vector< real_t > exe::get_probs ( uint_t  n)

◆ get_reward()

real_t exe::get_reward ( real_t  prob,
uint_t  n = 10 
)

◆ print_matrix()

void exe::print_matrix ( const DynMat< real_t > &  mat)

◆ update_record()

void exe::update_record ( std::vector< std::vector< real_t > > &  records,
uint_t  action,
real_t  r 
)

Variable Documentation

◆ N

const uint_t exe::N = 10

◆ N_EXPERIMENTS

const auto exe::N_EXPERIMENTS = 500

◆ SEED

const uint exe::SEED = 42

◆ TAU

const auto exe::TAU = 0.7