86 auto state = std::any_cast<uint_t>(options.find(
"state")->second);
87 auto state_actions = std::any_cast<DynVec<real_t>>(options.find(
"state_actions")->second);
91 assert(best_actions.size() <= action_space_size_ &&
"Incompatible number of best actions. Cannot exccedd the action space size");
94 std::vector<std::pair<uint_t, real_t>> best_actions_vals(best_actions.size());
96 for(
uint_t i=0; i<best_actions.size(); ++i){
97 best_actions_vals[i] = {best_actions[i], 1.0/best_actions.size()};
100 auto& state_action_vals = this->policy_.state_actions_values();
102 auto& view = state_action_vals[state];
105 auto act_val_map = std::unordered_map<uint_t, real_t>();
107 for(
uint_t a=0; a<best_actions_vals.size(); ++a){
108 act_val_map.insert({best_actions_vals[a].first, best_actions_vals[a].second});
111 for(
uint_t a=0; a<view.size(); ++a){
112 auto action = view[a].first;
114 if(act_val_map.contains(action)){
115 view[a].second = act_val_map[action];
118 view[a].second = 0.0;
122 return this->policy_;