https://mooseframework.inl.gov
LibtorchDRLControlTrainer.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #ifdef LIBTORCH_ENABLED
11 
12 #include "LibtorchDataset.h"
14 #include "LibtorchUtils.h"
16 #include "Sampler.h"
17 #include "Function.h"
18 
19 registerMooseObject("StochasticToolsApp", LibtorchDRLControlTrainer);
20 
23 {
25 
26  params.addClassDescription(
27  "Trains a neural network controller using the Proximal Policy Optimization (PPO) algorithm.");
28 
29  params.addRequiredParam<std::vector<ReporterName>>(
30  "response", "Reporter values containing the response values from the model.");
31  params.addParam<std::vector<Real>>(
32  "response_shift_factors",
33  "A shift constant which will be used to shift the response values. This is used for the "
34  "manipulation of the neural net inputs for better training efficiency.");
35  params.addParam<std::vector<Real>>(
36  "response_scaling_factors",
37  "A normalization constant which will be used to divide the response values. This is used for "
38  "the manipulation of the neural net inputs for better training efficiency.");
39  params.addRequiredParam<std::vector<ReporterName>>(
40  "control",
41  "Reporters containing the values of the controlled quantities (control signals) from the "
42  "model simulations.");
43  params.addRequiredParam<std::vector<ReporterName>>(
44  "log_probability",
45  "Reporters containing the log probabilities of the actions taken during the simulations.");
47  "reward", "Reporter containing the earned time-dependent rewards from the simulation.");
48  params.addRangeCheckedParam<unsigned int>(
49  "input_timesteps",
50  1,
51  "1<=input_timesteps",
52  "Number of time steps to use in the input data, if larger than 1, "
53  "data from the previous timesteps will be used as inputs in the training.");
54  params.addParam<unsigned int>("skip_num_rows",
55  1,
56  "Number of rows to ignore from training. We usually skip the 1st "
57  "row from the reporter since it contains only initial values.");
58 
59  params.addRequiredParam<unsigned int>("num_epochs", "Number of epochs for the training.");
60 
62  "critic_learning_rate",
63  "0<critic_learning_rate",
64  "Learning rate (relaxation) for the emulator training.");
65  params.addRequiredParam<std::vector<unsigned int>>(
66  "num_critic_neurons_per_layer", "Number of neurons per layer in the emulator neural net.");
67  params.addParam<std::vector<std::string>>(
68  "critic_activation_functions",
69  std::vector<std::string>({"relu"}),
70  "The type of activation functions to use in the emulator neural net. It is either one value "
71  "or one value per hidden layer.");
72 
74  "control_learning_rate",
75  "0<control_learning_rate",
76  "Learning rate (relaxation) for the control neural net training.");
77  params.addRequiredParam<std::vector<unsigned int>>(
78  "num_control_neurons_per_layer",
79  "Number of neurons per layer for the control neural network.");
80  params.addParam<std::vector<std::string>>(
81  "control_activation_functions",
82  std::vector<std::string>({"relu"}),
83  "The type of activation functions to use in the control neural net. It "
84  "is either one value "
85  "or one value per hidden layer.");
86 
87  params.addParam<std::string>("filename_base",
88  "Filename used to output the neural net parameters.");
89 
90  params.addParam<unsigned int>(
91  "seed", 11, "Random number generator seed for stochastic optimizers.");
92 
93  params.addRequiredParam<std::vector<Real>>(
94  "action_standard_deviations", "Standard deviation value used while sampling the actions.");
95 
96  params.addParam<Real>(
97  "clip_parameter", 0.2, "Clip parameter used while clamping the advantage value.");
98  params.addRangeCheckedParam<unsigned int>(
99  "update_frequency",
100  1,
101  "1<=update_frequency",
102  "Number of transient simulation data to collect for updating the controller neural network.");
103 
104  params.addRangeCheckedParam<Real>(
105  "decay_factor",
106  1.0,
107  "0.0<=decay_factor<=1.0",
108  "Decay factor for calculating the return. This accounts for decreased "
109  "reward values from the later steps.");
110 
111  params.addParam<bool>(
112  "read_from_file", false, "Switch to read the neural network parameters from a file.");
113  params.addParam<bool>(
114  "shift_outputs",
115  true,
116  "If we would like to shift the outputs the realign the input-output pairs.");
117  params.addParam<bool>(
118  "standardize_advantage",
119  true,
120  "Switch to enable the shifting and normalization of the advantages in the PPO algorithm.");
121  params.addParam<unsigned int>("loss_print_frequency",
122  0,
123  "The frequency which is used to print the loss values. If 0, the "
124  "loss values are not printed.");
125  return params;
126 }
127 
129  : SurrogateTrainerBase(parameters),
130  _response_names(getParam<std::vector<ReporterName>>("response")),
131  _response_shift_factors(isParamValid("response_shift_factors")
132  ? getParam<std::vector<Real>>("response_shift_factors")
133  : std::vector<Real>(_response_names.size(), 0.0)),
134  _response_scaling_factors(isParamValid("response_scaling_factors")
135  ? getParam<std::vector<Real>>("response_scaling_factors")
136  : std::vector<Real>(_response_names.size(), 1.0)),
137  _control_names(getParam<std::vector<ReporterName>>("control")),
138  _log_probability_names(getParam<std::vector<ReporterName>>("log_probability")),
139  _reward_name(getParam<ReporterName>("reward")),
140  _reward_value_pointer(&getReporterValueByName<std::vector<Real>>(_reward_name)),
141  _input_timesteps(getParam<unsigned int>("input_timesteps")),
142  _num_inputs(_input_timesteps * _response_names.size()),
143  _num_outputs(_control_names.size()),
144  _input_data(std::vector<std::vector<Real>>(_num_inputs)),
145  _output_data(std::vector<std::vector<Real>>(_num_outputs)),
146  _log_probability_data(std::vector<std::vector<Real>>(_num_outputs)),
147  _num_epochs(getParam<unsigned int>("num_epochs")),
148  _num_critic_neurons_per_layer(
149  getParam<std::vector<unsigned int>>("num_critic_neurons_per_layer")),
150  _critic_learning_rate(getParam<Real>("critic_learning_rate")),
151  _num_control_neurons_per_layer(
152  getParam<std::vector<unsigned int>>("num_control_neurons_per_layer")),
153  _control_learning_rate(getParam<Real>("control_learning_rate")),
154  _update_frequency(getParam<unsigned int>("update_frequency")),
155  _clip_param(getParam<Real>("clip_parameter")),
156  _decay_factor(getParam<Real>("decay_factor")),
157  _action_std(getParam<std::vector<Real>>("action_standard_deviations")),
158  _filename_base(isParamValid("filename_base") ? getParam<std::string>("filename_base") : ""),
159  _read_from_file(getParam<bool>("read_from_file")),
160  _shift_outputs(getParam<bool>("shift_outputs")),
161  _standardize_advantage(getParam<bool>("standardize_advantage")),
162  _loss_print_frequency(getParam<unsigned int>("loss_print_frequency")),
163  _update_counter(_update_frequency)
164 {
165  if (_response_names.size() != _response_shift_factors.size())
166  paramError("response_shift_factors",
167  "The number of shift factors is not the same as the number of responses!");
168 
169  if (_response_names.size() != _response_scaling_factors.size())
170  paramError(
171  "response_scaling_factors",
172  "The number of normalization coefficients is not the same as the number of responses!");
173 
174  // We establish the links with the chosen reporters
178 
179  // Fixing the RNG seed to make sure every experiment is the same.
180  // Otherwise sampling / stochastic gradient descent would be different.
181  torch::manual_seed(getParam<unsigned int>("seed"));
182 
183  // Convert the user input standard deviations to a diagonal tensor
184  _std = torch::eye(_control_names.size());
185  for (unsigned int i = 0; i < _control_names.size(); ++i)
186  _std[i][i] = _action_std[i];
187 
188  bool filename_valid = isParamValid("filename_base");
189 
190  // Initializing the control neural net so that the control can grab it right away
191  _control_nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(
192  filename_valid ? _filename_base + "_control.net" : "control.net",
193  _num_inputs,
194  _num_outputs,
196  getParam<std::vector<std::string>>("control_activation_functions"));
197 
198  // We read parameters for the control neural net if it is requested
199  if (_read_from_file)
200  {
201  try
202  {
203  torch::load(_control_nn, _control_nn->name());
204  _console << "Loaded requested .pt file." << std::endl;
205  }
206  catch (const c10::Error & e)
207  {
208  mooseError("The requested pytorch file could not be loaded for the control neural net.\n",
209  e.msg());
210  }
211  }
212  else if (filename_valid)
213  torch::save(_control_nn, _control_nn->name());
214 
215  // Initialize the critic neural net
216  _critic_nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(
217  filename_valid ? _filename_base + "_ctiric.net" : "ctiric.net",
218  _num_inputs,
219  1,
221  getParam<std::vector<std::string>>("critic_activation_functions"));
222 
223  // We read parameters for the critic neural net if it is requested
224  if (_read_from_file)
225  {
226  try
227  {
228  torch::load(_critic_nn, _critic_nn->name());
229  _console << "Loaded requested .pt file." << std::endl;
230  }
231  catch (const c10::Error & e)
232  {
233  mooseError("The requested pytorch file could not be loaded for the critic neural net.\n",
234  e.msg());
235  }
236  }
237  else if (filename_valid)
238  torch::save(_critic_nn, _critic_nn->name());
239 }
240 
241 void
243 {
244  // Extract data from the reporters
249 
250  // Calculate return from the reward (discounting the reward)
252 
253  _update_counter--;
254 
255  // Only update the NNs when
256  if (_update_counter == 0)
257  {
258  // We compute the average reward first
260 
261  // Transform input/output/return data to torch::Tensor
265 
266  // Discard (detach) the gradient info for return data
268 
269  // We train the controller using the emulator to get a good control strategy
270  trainController();
271 
272  // We clean the training data after contoller update and reset the counter
273  resetData();
274  }
275 }
276 
277 void
279 {
280  if (_reward_data.size())
282  std::accumulate(_reward_data.begin(), _reward_data.end(), 0.0) / _reward_data.size();
283  else
285 }
286 
287 void
289 {
290  // Get reward data from one simulation
291  std::vector<Real> reward_data_per_sim;
292  std::vector<Real> return_data_per_sim;
293  getRewardDataFromReporter(reward_data_per_sim, _reward_value_pointer);
294 
295  // Discount the reward to get the return value, we need this to be able to anticipate
296  // rewards based on the current behavior.
297  Real discounted_reward(0.0);
298  for (int i = reward_data_per_sim.size() - 1; i >= 0; --i)
299  {
300  discounted_reward = reward_data_per_sim[i] + discounted_reward * _decay_factor;
301 
302  // We are inserting to the front of the vector and push the rest back, this will
303  // ensure that the first element of the vector is the discounter reward for the whole transient
304  return_data_per_sim.insert(return_data_per_sim.begin(), discounted_reward);
305  }
306 
307  // Save and accumulate the return values
308  _return_data.insert(_return_data.end(), return_data_per_sim.begin(), return_data_per_sim.end());
309 }
310 
311 void
313 {
314  // Define the optimizers for the training
315  torch::optim::Adam actor_optimizer(_control_nn->parameters(),
316  torch::optim::AdamOptions(_control_learning_rate));
317 
318  torch::optim::Adam critic_optimizer(_critic_nn->parameters(),
319  torch::optim::AdamOptions(_critic_learning_rate));
320 
321  // Compute the approximate value (return) from the critic neural net and use it to compute an
322  // advantage
323  auto value = evaluateValue(_input_tensor).detach();
324  auto advantage = _return_tensor - value;
325 
326  // If requested, standardize the advantage
328  advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10);
329 
330  for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch)
331  {
332  // Get the approximate return from the neural net again (this one does have an associated
333  // gradient)
335  // Get the approximate logarithmic action probability using the control neural net
336  auto curr_log_probability = evaluateAction(_input_tensor, _output_tensor);
337 
338  // Prepare the ratio by using the e^(logx-logy)=x/y expression
339  auto ratio = (curr_log_probability - _log_probability_tensor).exp();
340 
341  // Use clamping for limiting
342  auto surr1 = ratio * advantage;
343  auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage;
344 
345  // Compute loss values for the critic and the control neural net
346  auto actor_loss = -torch::min(surr1, surr2).mean();
347  auto critic_loss = torch::mse_loss(value, _return_tensor);
348 
349  // Update the weights in the neural nets
350  actor_optimizer.zero_grad();
351  actor_loss.backward();
352  actor_optimizer.step();
353 
354  critic_optimizer.zero_grad();
355  critic_loss.backward();
356  critic_optimizer.step();
357 
358  // print loss per epoch
360  if (epoch % _loss_print_frequency == 0)
361  _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN
362  << actor_loss.item<double>() << COLOR_DEFAULT << " | Critic Loss: " << COLOR_GREEN
363  << critic_loss.item<double>() << COLOR_DEFAULT << std::endl;
364  }
365 
366  // Save the controller neural net so our controller can read it, we also save the critic if we
367  // want to continue training
368  torch::save(_control_nn, _control_nn->name());
369  torch::save(_critic_nn, _critic_nn->name());
370 }
371 
372 void
373 LibtorchDRLControlTrainer::convertDataToTensor(std::vector<std::vector<Real>> & vector_data,
374  torch::Tensor & tensor_data,
375  const bool detach)
376 {
377  for (unsigned int i = 0; i < vector_data.size(); ++i)
378  {
379  torch::Tensor input_row;
380  LibtorchUtils::vectorToTensor(vector_data[i], input_row, detach);
381 
382  if (i == 0)
383  tensor_data = input_row;
384  else
385  tensor_data = torch::cat({tensor_data, input_row}, 1);
386  }
387 
388  if (detach)
389  tensor_data.detach();
390 }
391 
392 torch::Tensor
394 {
395  return _critic_nn->forward(input);
396 }
397 
398 torch::Tensor
399 LibtorchDRLControlTrainer::evaluateAction(torch::Tensor & input, torch::Tensor & output)
400 {
401  torch::Tensor var = torch::matmul(_std, _std);
402 
403  // Compute an action and get it's logarithmic proability based on an assumed Gaussian distribution
404  torch::Tensor action = _control_nn->forward(input);
405  return -((action - output) * (action - output)) / (2 * var) - torch::log(_std) -
406  std::log(std::sqrt(2 * M_PI));
407 }
408 
409 void
411 {
412  for (auto & data : _input_data)
413  data.clear();
414  for (auto & data : _output_data)
415  data.clear();
416  for (auto & data : _log_probability_data)
417  data.clear();
418 
419  _reward_data.clear();
420  _return_data.clear();
421 
423 }
424 
425 void
427  std::vector<std::vector<Real>> & data,
428  const std::vector<const std::vector<Real> *> & reporter_links,
429  const unsigned int num_timesteps)
430 {
431  for (const auto & rep_i : index_range(reporter_links))
432  {
433  std::vector<Real> reporter_data = *reporter_links[rep_i];
434 
435  // We shift and scale the inputs to get better training efficiency
436  std::transform(
437  reporter_data.begin(),
438  reporter_data.end(),
439  reporter_data.begin(),
440  [this, &rep_i](Real value) -> Real
441  { return (value - _response_shift_factors[rep_i]) * _response_scaling_factors[rep_i]; });
442 
443  // Fill the corresponding containers
444  for (const auto & start_step : make_range(num_timesteps))
445  {
446  unsigned int row = reporter_links.size() * start_step + rep_i;
447  for (unsigned int fill_i = 1; fill_i < num_timesteps - start_step; ++fill_i)
448  data[row].push_back(reporter_data[0]);
449 
450  data[row].insert(data[row].end(),
451  reporter_data.begin(),
452  reporter_data.begin() + start_step + reporter_data.size() -
453  (num_timesteps - 1) - _shift_outputs);
454  }
455  }
456 }
457 
458 void
460  std::vector<std::vector<Real>> & data,
461  const std::vector<const std::vector<Real> *> & reporter_links)
462 {
463  for (const auto & rep_i : index_range(reporter_links))
464  // Fill the corresponding containers
465  data[rep_i].insert(data[rep_i].end(),
466  reporter_links[rep_i]->begin() + _shift_outputs,
467  reporter_links[rep_i]->end());
468 }
469 
470 void
472  const std::vector<Real> * const reporter_link)
473 {
474  // Fill the corresponding container
475  data.insert(data.end(), reporter_link->begin() + _shift_outputs, reporter_link->end());
476 }
477 
478 void
480  const std::vector<ReporterName> & reporter_names,
481  std::vector<const std::vector<Real> *> & pointer_storage)
482 {
483  pointer_storage.clear();
484  for (const auto & name : reporter_names)
485  pointer_storage.push_back(&getReporterValueByName<std::vector<Real>>(name));
486 }
487 
488 #endif
void computeAverageEpisodeReward()
Compute the average eposiodic reward.
void addRequiredRangeCheckedParam(const std::string &name, const std::string &parsed_function, const std::string &doc_string)
std::vector< std::vector< Real > > _input_data
Real _average_episode_reward
Storage for the current average episode reward.
torch::Tensor _input_tensor
Torch::tensor version of the input and action data.
const std::vector< ReporterName > _control_names
Control reporter names.
void getRewardDataFromReporter(std::vector< Real > &data, const std::vector< Real > *const reporter_link)
Extract the reward values from the postprocessors of the controlled system This assumes that they are...
const std::vector< Real > _action_std
Standard deviation for the actions.
const bool _shift_outputs
Currently, the controls are executed after the user objects at initial in moose.
auto exp(const T &)
void addParam(const std::string &name, const std::initializer_list< typename T::value_type > &value, const std::string &doc_string)
const T & getReporterValueByName(const ReporterName &reporter_name, const std::size_t time_index=0)
void getReporterPointers(const std::vector< ReporterName > &reporter_names, std::vector< const std::vector< Real > *> &pointer_storage)
Getting reporter pointers with given names.
torch::Tensor evaluateAction(torch::Tensor &input, torch::Tensor &output)
Function which evaluates the control net and then computes the logarithmic probability of the action...
const std::vector< unsigned int > _num_control_neurons_per_layer
Number of neurons within the hidden layers in the control neural net.
registerMooseObject("StochasticToolsApp", LibtorchDRLControlTrainer)
unsigned int _num_inputs
Number of inputs for the control and critic neural nets.
void convertDataToTensor(std::vector< std::vector< Real >> &vector_data, torch::Tensor &tensor_data, const bool detach=false)
Function to convert input/output data from std::vector<std::vector> to torch::tensor.
void trainController()
The condensed training function.
std::vector< const std::vector< Real > * > _response_value_pointers
Pointers to the current values of the responses.
const std::vector< Real > _response_shift_factors
Shifting constants for the responses.
const Real _clip_param
The clip parameter used while clamping the advantage value.
virtual const std::string & name() const
void getInputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links, const unsigned int num_timesteps)
Extract the response values from the postprocessors of the controlled system.
void addRequiredParam(const std::string &name, const std::string &doc_string)
const std::vector< ReporterName > _response_names
Response reporter names.
static InputParameters validParams()
bool isParamValid(const std::string &name) const
std::vector< const std::vector< Real > * > _log_probability_value_pointers
Pointers to the current values of the control log probabilities.
const unsigned int _input_timesteps
Number of timesteps to fetch from the reporters to be the input of then eural nets.
std::vector< std::vector< Real > > _output_data
torch::Tensor _std
standard deviation in a tensor format for sampling the actual control value
const Real _control_learning_rate
The learning rate for the optimization algorithm for the control.
Real value(unsigned n, unsigned alpha, unsigned beta, Real x)
unsigned int _num_outputs
Number of outputs for the control neural network.
const Real _decay_factor
Decaying factor that is used when calculating the return from the reward.
const std::vector< unsigned int > _num_critic_neurons_per_layer
Number of neurons within the hidden layers in the critic neural net.
const Real _critic_learning_rate
The learning rate for the optimization algorithm for the critic.
LibtorchDRLControlTrainer(const InputParameters &parameters)
construct using input parameters
void vectorToTensor(std::vector< DataType > &vector, torch::Tensor &tensor, const bool detach=false)
std::vector< std::vector< Real > > _log_probability_data
const unsigned int _loss_print_frequency
The frequency the loss should be printed.
void paramError(const std::string &param, Args... args) const
const unsigned int _num_epochs
Number of epochs for the training of the emulator.
const std::vector< Real > _response_scaling_factors
Scaling constants for the responses.
void resetData()
Reset data after updating the neural network.
template void vectorToTensor< Real >(std::vector< Real > &vector, torch::Tensor &tensor, const bool detach)
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _critic_nn
Pointer to the critic neural net object.
static InputParameters validParams()
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
torch::Tensor evaluateValue(torch::Tensor &input)
Function which evaluates the critic to get the value (discounter reward)
void getOutputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links)
Extract the output (actions, logarithmic probabilities) values from the postprocessors of the control...
const bool _read_from_file
Switch indicating if an already existing neural net should be read from a file or not...
const unsigned int _update_frequency
Number of transients to run and collect data from before updating the controller neural net...
IntRange< T > make_range(T beg, T end)
const std::string _filename_base
Name of the pytorch output file.
void computeRewardToGo()
Compute the return value by discounting the rewards and summing them.
void mooseError(Args &&... args) const
void addClassDescription(const std::string &doc_string)
This trainer is responsible for training neural networks that efficiently control different processes...
void addRangeCheckedParam(const std::string &name, const T &value, const std::string &parsed_function, const std::string &doc_string)
const ConsoleStream _console
This is the base trainer class whose main functionality is the API for declaring model data...
unsigned int _update_counter
Counter for number of transient simulations that have been run before updating the controller...
const bool _standardize_advantage
Switch to enable the standardization of the advantages.
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _control_nn
Pointer to the control (or actor) neural net object.
const std::vector< ReporterName > _log_probability_names
Log probability reporter names.
std::vector< const std::vector< Real > * > _control_value_pointers
Pointers to the current values of the control signals.
void ErrorVector unsigned int
auto index_range(const T &sizable)
const std::vector< Real > * _reward_value_pointer
Pointer to the current values of the reward.