https://mooseframework.inl.gov
LibtorchDRLControlTrainer.h
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #ifdef LIBTORCH_ENABLED
11 
12 #pragma once
13 
14 #include <torch/torch.h>
16 
17 #include "libmesh/utility.h"
18 #include "SurrogateTrainer.h"
19 
29 {
30 public:
32 
35 
36  virtual void execute() override;
37 
43 
45  void trainController();
46 
48 
49 protected:
52 
59  void convertDataToTensor(std::vector<std::vector<Real>> & vector_data,
60  torch::Tensor & tensor_data,
61  const bool detach = false);
62 
68  torch::Tensor evaluateValue(torch::Tensor & input);
69 
77  torch::Tensor evaluateAction(torch::Tensor & input, torch::Tensor & output);
78 
80  void computeRewardToGo();
81 
83  void resetData();
84 
86  const std::vector<ReporterName> _response_names;
87 
89  std::vector<const std::vector<Real> *> _response_value_pointers;
90 
92  const std::vector<Real> _response_shift_factors;
93 
95  const std::vector<Real> _response_scaling_factors;
96 
98  const std::vector<ReporterName> _control_names;
99 
101  std::vector<const std::vector<Real> *> _control_value_pointers;
102 
104  const std::vector<ReporterName> _log_probability_names;
105 
107  std::vector<const std::vector<Real> *> _log_probability_value_pointers;
108 
111 
113  const std::vector<Real> * _reward_value_pointer;
114 
116  const unsigned int _input_timesteps;
117 
119  unsigned int _num_inputs;
121  unsigned int _num_outputs;
122 
125  std::vector<std::vector<Real>> _input_data;
126  std::vector<std::vector<Real>> _output_data;
127  std::vector<std::vector<Real>> _log_probability_data;
129 
132  std::vector<Real> _reward_data;
133  std::vector<Real> _return_data;
135 
137  const unsigned int _num_epochs;
138 
140  const std::vector<unsigned int> _num_critic_neurons_per_layer;
141 
144 
146  const std::vector<unsigned int> _num_control_neurons_per_layer;
147 
150 
152  const unsigned int _update_frequency;
153 
156 
159 
161  const std::vector<Real> _action_std;
162 
165  const std::string _filename_base;
166 
170  const bool _read_from_file;
171 
175  const bool _shift_outputs;
176 
179 
182 
184  const unsigned int _loss_print_frequency;
185 
187  std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _control_nn;
189  std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _critic_nn;
190 
192  torch::Tensor _std;
193 
195  torch::Tensor _input_tensor;
196  torch::Tensor _output_tensor;
197  torch::Tensor _return_tensor;
198  torch::Tensor _log_probability_tensor;
199 
200 private:
208  void getInputDataFromReporter(std::vector<std::vector<Real>> & data,
209  const std::vector<const std::vector<Real> *> & reporter_links,
210  const unsigned int num_timesteps);
217  void getOutputDataFromReporter(std::vector<std::vector<Real>> & data,
218  const std::vector<const std::vector<Real> *> & reporter_links);
219 
226  void getRewardDataFromReporter(std::vector<Real> & data,
227  const std::vector<Real> * const reporter_link);
228 
230  void getReporterPointers(const std::vector<ReporterName> & reporter_names,
231  std::vector<const std::vector<Real> *> & pointer_storage);
232 
234  unsigned int _update_counter;
235 };
236 
237 #endif
void computeAverageEpisodeReward()
Compute the average eposiodic reward.
Real averageEpisodeReward()
Function which returns the current average episodic reward.
std::vector< std::vector< Real > > _input_data
Real _average_episode_reward
Storage for the current average episode reward.
torch::Tensor _input_tensor
Torch::tensor version of the input and action data.
const std::vector< ReporterName > _control_names
Control reporter names.
void getRewardDataFromReporter(std::vector< Real > &data, const std::vector< Real > *const reporter_link)
Extract the reward values from the postprocessors of the controlled system This assumes that they are...
const std::vector< Real > _action_std
Standard deviation for the actions.
const bool _shift_outputs
Currently, the controls are executed after the user objects at initial in moose.
void getReporterPointers(const std::vector< ReporterName > &reporter_names, std::vector< const std::vector< Real > *> &pointer_storage)
Getting reporter pointers with given names.
torch::Tensor evaluateAction(torch::Tensor &input, torch::Tensor &output)
Function which evaluates the control net and then computes the logarithmic probability of the action...
const std::vector< unsigned int > _num_control_neurons_per_layer
Number of neurons within the hidden layers in the control neural net.
unsigned int _num_inputs
Number of inputs for the control and critic neural nets.
void convertDataToTensor(std::vector< std::vector< Real >> &vector_data, torch::Tensor &tensor_data, const bool detach=false)
Function to convert input/output data from std::vector<std::vector> to torch::tensor.
void trainController()
The condensed training function.
std::vector< const std::vector< Real > * > _response_value_pointers
Pointers to the current values of the responses.
const ReporterName _reward_name
Reward reporter name.
const std::vector< Real > _response_shift_factors
Shifting constants for the responses.
const Real _clip_param
The clip parameter used while clamping the advantage value.
void getInputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links, const unsigned int num_timesteps)
Extract the response values from the postprocessors of the controlled system.
const std::vector< ReporterName > _response_names
Response reporter names.
std::vector< const std::vector< Real > * > _log_probability_value_pointers
Pointers to the current values of the control log probabilities.
const unsigned int _input_timesteps
Number of timesteps to fetch from the reporters to be the input of then eural nets.
std::vector< std::vector< Real > > _output_data
torch::Tensor _std
standard deviation in a tensor format for sampling the actual control value
const Real _control_learning_rate
The learning rate for the optimization algorithm for the control.
unsigned int _num_outputs
Number of outputs for the control neural network.
const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const
const Real _decay_factor
Decaying factor that is used when calculating the return from the reward.
const std::vector< unsigned int > _num_critic_neurons_per_layer
Number of neurons within the hidden layers in the critic neural net.
const Real _critic_learning_rate
The learning rate for the optimization algorithm for the critic.
LibtorchDRLControlTrainer(const InputParameters &parameters)
construct using input parameters
std::vector< std::vector< Real > > _log_probability_data
const unsigned int _loss_print_frequency
The frequency the loss should be printed.
const unsigned int _num_epochs
Number of epochs for the training of the emulator.
const std::vector< Real > _response_scaling_factors
Scaling constants for the responses.
void resetData()
Reset data after updating the neural network.
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _critic_nn
Pointer to the critic neural net object.
static InputParameters validParams()
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
torch::Tensor evaluateValue(torch::Tensor &input)
Function which evaluates the critic to get the value (discounter reward)
void getOutputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links)
Extract the output (actions, logarithmic probabilities) values from the postprocessors of the control...
const bool _read_from_file
Switch indicating if an already existing neural net should be read from a file or not...
const unsigned int _update_frequency
Number of transients to run and collect data from before updating the controller neural net...
const std::string _filename_base
Name of the pytorch output file.
void computeRewardToGo()
Compute the return value by discounting the rewards and summing them.
const InputParameters & parameters() const
This trainer is responsible for training neural networks that efficiently control different processes...
This is the base trainer class whose main functionality is the API for declaring model data...
unsigned int _update_counter
Counter for number of transient simulations that have been run before updating the controller...
const bool _standardize_advantage
Switch to enable the standardization of the advantages.
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _control_nn
Pointer to the control (or actor) neural net object.
const std::vector< ReporterName > _log_probability_names
Log probability reporter names.
std::vector< const std::vector< Real > * > _control_value_pointers
Pointers to the current values of the control signals.
const std::vector< Real > * _reward_value_pointer
Pointer to the current values of the reward.