doxygen/modules/LibtorchDRLControlTrainer_8h_source.html

 //* This file is part of the MOOSE framework
 //* https://mooseframework.inl.gov
 //*
 //* All rights reserved, see COPYRIGHT for full restrictions
 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
 //*
 //* Licensed under LGPL 2.1, please see LICENSE for details
 //* https://www.gnu.org/licenses/lgpl-2.1.html

 #ifdef LIBTORCH_ENABLED

 #pragma once

 #include <torch/torch.h>
 #include "LibtorchArtificialNeuralNet.h"

 #include "libmesh/utility.h"
 #include "SurrogateTrainer.h"

 class LibtorchDRLControlTrainer : public SurrogateTrainerBase
 {
 public:
   static InputParameters validParams();

   LibtorchDRLControlTrainer(const InputParameters & parameters);

   virtual void execute() override;

   Real averageEpisodeReward() { return _average_episode_reward; }

   void trainController();

   const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const { return *_control_nn; }

 protected:
   void computeAverageEpisodeReward();

   void convertDataToTensor(std::vector<std::vector<Real>> & vector_data,
                            torch::Tensor & tensor_data,
                            const bool detach = false);

   torch::Tensor evaluateValue(torch::Tensor & input);

   torch::Tensor evaluateAction(torch::Tensor & input, torch::Tensor & output);

   void computeRewardToGo();

   void resetData();

   const std::vector<ReporterName> _response_names;

   std::vector<const std::vector<Real> *> _response_value_pointers;

   const std::vector<Real> _response_shift_factors;

   const std::vector<Real> _response_scaling_factors;

   const std::vector<ReporterName> _control_names;

   std::vector<const std::vector<Real> *> _control_value_pointers;

   const std::vector<ReporterName> _log_probability_names;

   std::vector<const std::vector<Real> *> _log_probability_value_pointers;

   const ReporterName _reward_name;

   const std::vector<Real> * _reward_value_pointer;

   const unsigned int _input_timesteps;

   unsigned int _num_inputs;
   unsigned int _num_outputs;

   std::vector<std::vector<Real>> _input_data;
   std::vector<std::vector<Real>> _output_data;
   std::vector<std::vector<Real>> _log_probability_data;

   std::vector<Real> _reward_data;
   std::vector<Real> _return_data;

   const unsigned int _num_epochs;

   const std::vector<unsigned int> _num_critic_neurons_per_layer;

   const Real _critic_learning_rate;

   const std::vector<unsigned int> _num_control_neurons_per_layer;

   const Real _control_learning_rate;

   const unsigned int _update_frequency;

   const Real _clip_param;

   const Real _decay_factor;

   const std::vector<Real> _action_std;

   const std::string _filename_base;

   const bool _read_from_file;

   const bool _shift_outputs;

   Real _average_episode_reward;

   const bool _standardize_advantage;

   const unsigned int _loss_print_frequency;

   std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _control_nn;
   std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _critic_nn;

   torch::Tensor _std;

   torch::Tensor _input_tensor;
   torch::Tensor _output_tensor;
   torch::Tensor _return_tensor;
   torch::Tensor _log_probability_tensor;

 private:
   void getInputDataFromReporter(std::vector<std::vector<Real>> & data,
                                 const std::vector<const std::vector<Real> *> & reporter_links,
                                 const unsigned int num_timesteps);
   void getOutputDataFromReporter(std::vector<std::vector<Real>> & data,
                                  const std::vector<const std::vector<Real> *> & reporter_links);

   void getRewardDataFromReporter(std::vector<Real> & data,
                                  const std::vector<Real> * const reporter_link);

   void getReporterPointers(const std::vector<ReporterName> & reporter_names,
                            std::vector<const std::vector<Real> *> & pointer_storage);

   unsigned int _update_counter;
 };

 #endif
LibtorchDRLControlTrainer::computeAverageEpisodeReward
void computeAverageEpisodeReward()
Compute the average eposiodic reward.
Definition: LibtorchDRLControlTrainer.C:278

LibtorchDRLControlTrainer::_log_probability_tensor
torch::Tensor _log_probability_tensor
Definition: LibtorchDRLControlTrainer.h:198

LibtorchDRLControlTrainer::averageEpisodeReward
Real averageEpisodeReward()
Function which returns the current average episodic reward.
Definition: LibtorchDRLControlTrainer.h:42

LibtorchDRLControlTrainer::_input_data
std::vector< std::vector< Real > > _input_data
Definition: LibtorchDRLControlTrainer.h:125

LibtorchDRLControlTrainer::_average_episode_reward
Real _average_episode_reward
Storage for the current average episode reward.
Definition: LibtorchDRLControlTrainer.h:178

LibtorchDRLControlTrainer::_input_tensor
torch::Tensor _input_tensor
Torch::tensor version of the input and action data.
Definition: LibtorchDRLControlTrainer.h:195

LibtorchDRLControlTrainer::_control_names
const std::vector< ReporterName > _control_names
Control reporter names.
Definition: LibtorchDRLControlTrainer.h:98

LibtorchDRLControlTrainer::getRewardDataFromReporter
void getRewardDataFromReporter(std::vector< Real > &data, const std::vector< Real > *const reporter_link)
Extract the reward values from the postprocessors of the controlled system This assumes that they are...
Definition: LibtorchDRLControlTrainer.C:471

LibtorchDRLControlTrainer::_action_std
const std::vector< Real > _action_std
Standard deviation for the actions.
Definition: LibtorchDRLControlTrainer.h:161

LibtorchDRLControlTrainer::_shift_outputs
const bool _shift_outputs
Currently, the controls are executed after the user objects at initial in moose.
Definition: LibtorchDRLControlTrainer.h:175

LibtorchDRLControlTrainer::getReporterPointers
void getReporterPointers(const std::vector< ReporterName > &reporter_names, std::vector< const std::vector< Real > *> &pointer_storage)
Getting reporter pointers with given names.
Definition: LibtorchDRLControlTrainer.C:479

LibtorchDRLControlTrainer::evaluateAction
torch::Tensor evaluateAction(torch::Tensor &input, torch::Tensor &output)
Function which evaluates the control net and then computes the logarithmic probability of the action...
Definition: LibtorchDRLControlTrainer.C:399

LibtorchDRLControlTrainer::_num_control_neurons_per_layer
const std::vector< unsigned int > _num_control_neurons_per_layer
Number of neurons within the hidden layers in the control neural net.
Definition: LibtorchDRLControlTrainer.h:146

LibtorchDRLControlTrainer::_num_inputs
unsigned int _num_inputs
Number of inputs for the control and critic neural nets.
Definition: LibtorchDRLControlTrainer.h:119

LibtorchDRLControlTrainer::convertDataToTensor
void convertDataToTensor(std::vector< std::vector< Real >> &vector_data, torch::Tensor &tensor_data, const bool detach=false)
Function to convert input/output data from std::vector<std::vector> to torch::tensor.
Definition: LibtorchDRLControlTrainer.C:373

Moose::LibtorchArtificialNeuralNet

LibtorchDRLControlTrainer::trainController
void trainController()
The condensed training function.
Definition: LibtorchDRLControlTrainer.C:312

LibtorchDRLControlTrainer::_response_value_pointers
std::vector< const std::vector< Real > * > _response_value_pointers
Pointers to the current values of the responses.
Definition: LibtorchDRLControlTrainer.h:89

LibtorchDRLControlTrainer::_reward_name
const ReporterName _reward_name
Reward reporter name.
Definition: LibtorchDRLControlTrainer.h:110

LibtorchDRLControlTrainer::_response_shift_factors
const std::vector< Real > _response_shift_factors
Shifting constants for the responses.
Definition: LibtorchDRLControlTrainer.h:92

LibtorchDRLControlTrainer::_clip_param
const Real _clip_param
The clip parameter used while clamping the advantage value.
Definition: LibtorchDRLControlTrainer.h:155

LibtorchDRLControlTrainer::getInputDataFromReporter
void getInputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links, const unsigned int num_timesteps)
Extract the response values from the postprocessors of the controlled system.
Definition: LibtorchDRLControlTrainer.C:426

LibtorchDRLControlTrainer::_response_names
const std::vector< ReporterName > _response_names
Response reporter names.
Definition: LibtorchDRLControlTrainer.h:86

LibtorchArtificialNeuralNet.h

LibtorchDRLControlTrainer::_log_probability_value_pointers
std::vector< const std::vector< Real > * > _log_probability_value_pointers
Pointers to the current values of the control log probabilities.
Definition: LibtorchDRLControlTrainer.h:107

LibtorchDRLControlTrainer::_input_timesteps
const unsigned int _input_timesteps
Number of timesteps to fetch from the reporters to be the input of then eural nets.
Definition: LibtorchDRLControlTrainer.h:116

InputParameters

LibtorchDRLControlTrainer::_output_data
std::vector< std::vector< Real > > _output_data
Definition: LibtorchDRLControlTrainer.h:126

LibtorchDRLControlTrainer::_std
torch::Tensor _std
standard deviation in a tensor format for sampling the actual control value
Definition: LibtorchDRLControlTrainer.h:192

LibtorchDRLControlTrainer::_control_learning_rate
const Real _control_learning_rate
The learning rate for the optimization algorithm for the control.
Definition: LibtorchDRLControlTrainer.h:149

LibtorchDRLControlTrainer::execute
virtual void execute() override
Definition: LibtorchDRLControlTrainer.C:242

LibtorchDRLControlTrainer::_num_outputs
unsigned int _num_outputs
Number of outputs for the control neural network.
Definition: LibtorchDRLControlTrainer.h:121

LibtorchDRLControlTrainer::controlNeuralNet
const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const
Definition: LibtorchDRLControlTrainer.h:47

LibtorchDRLControlTrainer::_decay_factor
const Real _decay_factor
Decaying factor that is used when calculating the return from the reward.
Definition: LibtorchDRLControlTrainer.h:158

LibtorchDRLControlTrainer::_reward_data
std::vector< Real > _reward_data
Definition: LibtorchDRLControlTrainer.h:132

LibtorchDRLControlTrainer::_num_critic_neurons_per_layer
const std::vector< unsigned int > _num_critic_neurons_per_layer
Number of neurons within the hidden layers in the critic neural net.
Definition: LibtorchDRLControlTrainer.h:140

LibtorchDRLControlTrainer::_return_tensor
torch::Tensor _return_tensor
Definition: LibtorchDRLControlTrainer.h:197

LibtorchDRLControlTrainer::_output_tensor
torch::Tensor _output_tensor
Definition: LibtorchDRLControlTrainer.h:196

LibtorchDRLControlTrainer::_critic_learning_rate
const Real _critic_learning_rate
The learning rate for the optimization algorithm for the critic.
Definition: LibtorchDRLControlTrainer.h:143

LibtorchDRLControlTrainer::LibtorchDRLControlTrainer
LibtorchDRLControlTrainer(const InputParameters &parameters)
construct using input parameters
Definition: LibtorchDRLControlTrainer.C:128

LibtorchDRLControlTrainer::_log_probability_data
std::vector< std::vector< Real > > _log_probability_data
Definition: LibtorchDRLControlTrainer.h:127

LibtorchDRLControlTrainer::_loss_print_frequency
const unsigned int _loss_print_frequency
The frequency the loss should be printed.
Definition: LibtorchDRLControlTrainer.h:184

LibtorchDRLControlTrainer::_num_epochs
const unsigned int _num_epochs
Number of epochs for the training of the emulator.
Definition: LibtorchDRLControlTrainer.h:137

LibtorchDRLControlTrainer::_response_scaling_factors
const std::vector< Real > _response_scaling_factors
Scaling constants for the responses.
Definition: LibtorchDRLControlTrainer.h:95

LibtorchDRLControlTrainer::resetData
void resetData()
Reset data after updating the neural network.
Definition: LibtorchDRLControlTrainer.C:410

LibtorchDRLControlTrainer::_critic_nn
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _critic_nn
Pointer to the critic neural net object.
Definition: LibtorchDRLControlTrainer.h:189

LibtorchDRLControlTrainer::validParams
static InputParameters validParams()
Definition: LibtorchDRLControlTrainer.C:22

Real
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real

LibtorchDRLControlTrainer::evaluateValue
torch::Tensor evaluateValue(torch::Tensor &input)
Function which evaluates the critic to get the value (discounter reward)
Definition: LibtorchDRLControlTrainer.C:393

LibtorchDRLControlTrainer::getOutputDataFromReporter
void getOutputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links)
Extract the output (actions, logarithmic probabilities) values from the postprocessors of the control...
Definition: LibtorchDRLControlTrainer.C:459

LibtorchDRLControlTrainer::_read_from_file
const bool _read_from_file
Switch indicating if an already existing neural net should be read from a file or not...
Definition: LibtorchDRLControlTrainer.h:170

LibtorchDRLControlTrainer::_return_data
std::vector< Real > _return_data
Definition: LibtorchDRLControlTrainer.h:133

LibtorchDRLControlTrainer::_update_frequency
const unsigned int _update_frequency
Number of transients to run and collect data from before updating the controller neural net...
Definition: LibtorchDRLControlTrainer.h:152

LibtorchDRLControlTrainer::_filename_base
const std::string _filename_base
Name of the pytorch output file.
Definition: LibtorchDRLControlTrainer.h:165

LibtorchDRLControlTrainer::computeRewardToGo
void computeRewardToGo()
Compute the return value by discounting the rewards and summing them.
Definition: LibtorchDRLControlTrainer.C:288

GeneralUserObject::parameters
const InputParameters & parameters() const

LibtorchDRLControlTrainer
This trainer is responsible for training neural networks that efficiently control different processes...
Definition: LibtorchDRLControlTrainer.h:28

SurrogateTrainer.h

SurrogateTrainerBase
This is the base trainer class whose main functionality is the API for declaring model data...
Definition: SurrogateTrainer.h:32

LibtorchDRLControlTrainer::_update_counter
unsigned int _update_counter
Counter for number of transient simulations that have been run before updating the controller...
Definition: LibtorchDRLControlTrainer.h:234

LibtorchDRLControlTrainer::_standardize_advantage
const bool _standardize_advantage
Switch to enable the standardization of the advantages.
Definition: LibtorchDRLControlTrainer.h:181

LibtorchDRLControlTrainer::_control_nn
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _control_nn
Pointer to the control (or actor) neural net object.
Definition: LibtorchDRLControlTrainer.h:187

LibtorchDRLControlTrainer::_log_probability_names
const std::vector< ReporterName > _log_probability_names
Log probability reporter names.
Definition: LibtorchDRLControlTrainer.h:104

LibtorchDRLControlTrainer::_control_value_pointers
std::vector< const std::vector< Real > * > _control_value_pointers
Pointers to the current values of the control signals.
Definition: LibtorchDRLControlTrainer.h:101

ReporterName

LibtorchDRLControlTrainer::_reward_value_pointer
const std::vector< Real > * _reward_value_pointer
Pointer to the current values of the reward.
Definition: LibtorchDRLControlTrainer.h:113