LCOV - code coverage report
Current view: top level - include/libtorch/surrogates - LibtorchDRLControlTrainer.h (source / functions) Hit Total Coverage
Test: idaholab/moose stochastic_tools: f45d79 Lines: 1 1 100.0 %
Date: 2025-07-25 05:00:46 Functions: 0 0 -
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://mooseframework.inl.gov
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : #ifdef MOOSE_LIBTORCH_ENABLED
      11             : 
      12             : #pragma once
      13             : 
      14             : #include <torch/torch.h>
      15             : #include "LibtorchArtificialNeuralNet.h"
      16             : 
      17             : #include "libmesh/utility.h"
      18             : #include "SurrogateTrainer.h"
      19             : 
      20             : /**
      21             :  * This trainer is responsible for training neural networks that efficiently control
      22             :  * different processes. It utilizes the Proximal Policy Optimization algorithms. For more
      23             :  * information on the algorithm, see the following resources: Schulman, John, et al. "Proximal
      24             :  * policy optimization algorithms." arXiv preprint arXiv:1707.06347 (2017).
      25             :  * https://medium.com/analytics-vidhya/coding-ppo-from-scratch-with-pytorch-part-1-4-613dfc1b14c8
      26             :  * https://stable-baselines.readthedocs.io/en/master/modules/ppo2.html
      27             :  */
      28             : class LibtorchDRLControlTrainer : public SurrogateTrainerBase
      29             : {
      30             : public:
      31             :   static InputParameters validParams();
      32             : 
      33             :   /// construct using input parameters
      34             :   LibtorchDRLControlTrainer(const InputParameters & parameters);
      35             : 
      36             :   virtual void execute() override;
      37             : 
      38             :   /**
      39             :    * Function which returns the current average episodic reward. It is only updated
      40             :    * at the end of every episode.
      41             :    */
      42           8 :   Real averageEpisodeReward() { return _average_episode_reward; }
      43             : 
      44             :   /// The condensed training function
      45             :   void trainController();
      46             : 
      47             :   const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const { return *_control_nn; }
      48             : 
      49             : protected:
      50             :   /// Compute the average eposiodic reward
      51             :   void computeAverageEpisodeReward();
      52             : 
      53             :   /**
      54             :    * Function to convert input/output data from std::vector<std::vector> to torch::tensor
      55             :    * @param vector_data The input data in vector-vectors format
      56             :    * @param tensor_data The tensor where we would like to save the results
      57             :    * @param detach If the gradient info needs to be detached from the tensor
      58             :    */
      59             :   void convertDataToTensor(std::vector<std::vector<Real>> & vector_data,
      60             :                            torch::Tensor & tensor_data,
      61             :                            const bool detach = false);
      62             : 
      63             :   /**
      64             :    * Function which evaluates the critic to get the value (discounter reward)
      65             :    * @param input The observation values (responses)
      66             :    * @return The estimated value
      67             :    */
      68             :   torch::Tensor evaluateValue(torch::Tensor & input);
      69             : 
      70             :   /**
      71             :    * Function which evaluates the control net and then computes the logarithmic probability of the
      72             :    * action
      73             :    * @param input The observation values (responses)
      74             :    * @param output The actions corresponding to the observations
      75             :    * @return The estimated value for the logarithmic probability
      76             :    */
      77             :   torch::Tensor evaluateAction(torch::Tensor & input, torch::Tensor & output);
      78             : 
      79             :   /// Compute the return value by discounting the rewards and summing them
      80             :   void computeRewardToGo();
      81             : 
      82             :   /// Reset data after updating the neural network
      83             :   void resetData();
      84             : 
      85             :   /// Response reporter names
      86             :   const std::vector<ReporterName> _response_names;
      87             : 
      88             :   /// Pointers to the current values of the responses
      89             :   std::vector<const std::vector<Real> *> _response_value_pointers;
      90             : 
      91             :   /// Shifting constants for the responses
      92             :   const std::vector<Real> _response_shift_factors;
      93             : 
      94             :   /// Scaling constants for the responses
      95             :   const std::vector<Real> _response_scaling_factors;
      96             : 
      97             :   /// Control reporter names
      98             :   const std::vector<ReporterName> _control_names;
      99             : 
     100             :   /// Pointers to the current values of the control signals
     101             :   std::vector<const std::vector<Real> *> _control_value_pointers;
     102             : 
     103             :   /// Log probability reporter names
     104             :   const std::vector<ReporterName> _log_probability_names;
     105             : 
     106             :   /// Pointers to the current values of the control log probabilities
     107             :   std::vector<const std::vector<Real> *> _log_probability_value_pointers;
     108             : 
     109             :   /// Reward reporter name
     110             :   const ReporterName _reward_name;
     111             : 
     112             :   /// Pointer to the current values of the reward
     113             :   const std::vector<Real> * _reward_value_pointer;
     114             : 
     115             :   /// Number of timesteps to fetch from the reporters to be the input of then eural nets
     116             :   const unsigned int _input_timesteps;
     117             : 
     118             :   /// Number of inputs for the control and critic neural nets
     119             :   unsigned int _num_inputs;
     120             :   /// Number of outputs for the control neural network
     121             :   unsigned int _num_outputs;
     122             : 
     123             :   ///@{
     124             :   /// The gathered data from the reporters, each row represents one QoI, each column represents one time step
     125             :   std::vector<std::vector<Real>> _input_data;
     126             :   std::vector<std::vector<Real>> _output_data;
     127             :   std::vector<std::vector<Real>> _log_probability_data;
     128             :   ///@}
     129             : 
     130             :   ///@{
     131             :   /// The reward and return data. The return is calculated using the _reward_data
     132             :   std::vector<Real> _reward_data;
     133             :   std::vector<Real> _return_data;
     134             :   ///@}
     135             : 
     136             :   /// Number of epochs for the training of the emulator
     137             :   const unsigned int _num_epochs;
     138             : 
     139             :   /// Number of neurons within the hidden layers in the critic neural net
     140             :   const std::vector<unsigned int> _num_critic_neurons_per_layer;
     141             : 
     142             :   /// The learning rate for the optimization algorithm for the critic
     143             :   const Real _critic_learning_rate;
     144             : 
     145             :   /// Number of neurons within the hidden layers in the control neural net
     146             :   const std::vector<unsigned int> _num_control_neurons_per_layer;
     147             : 
     148             :   /// The learning rate for the optimization algorithm for the control
     149             :   const Real _control_learning_rate;
     150             : 
     151             :   /// Number of transients to run and collect data from before updating the controller neural net.
     152             :   const unsigned int _update_frequency;
     153             : 
     154             :   /// The clip parameter used while clamping the advantage value
     155             :   const Real _clip_param;
     156             : 
     157             :   /// Decaying factor that is used when calculating the return from the reward
     158             :   const Real _decay_factor;
     159             : 
     160             :   /// Standard deviation for the actions
     161             :   const std::vector<Real> _action_std;
     162             : 
     163             :   /// Name of the pytorch output file. This is used for loading and storing
     164             :   /// already existing data
     165             :   const std::string _filename_base;
     166             : 
     167             :   /// Switch indicating if an already existing neural net should be read from a
     168             :   /// file or not. This can be used to load existing torch files (from previous
     169             :   /// MOOSE runs for retraining and further manipulation)
     170             :   const bool _read_from_file;
     171             : 
     172             :   /// Currently, the controls are executed after the user objects at initial in moose.
     173             :   /// So using a shift can realign the corresponding input-output values while reading the
     174             :   /// reporters
     175             :   const bool _shift_outputs;
     176             : 
     177             :   /// Storage for the current average episode reward
     178             :   Real _average_episode_reward;
     179             : 
     180             :   /// Switch to enable the standardization of the advantages
     181             :   const bool _standardize_advantage;
     182             : 
     183             :   /// The frequency the loss should be printed
     184             :   const unsigned int _loss_print_frequency;
     185             : 
     186             :   /// Pointer to the control (or actor) neural net object
     187             :   std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _control_nn;
     188             :   /// Pointer to the critic neural net object
     189             :   std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _critic_nn;
     190             : 
     191             :   /// standard deviation in a tensor format for sampling the actual control value
     192             :   torch::Tensor _std;
     193             : 
     194             :   /// Torch::tensor version of the input and action data
     195             :   torch::Tensor _input_tensor;
     196             :   torch::Tensor _output_tensor;
     197             :   torch::Tensor _return_tensor;
     198             :   torch::Tensor _log_probability_tensor;
     199             : 
     200             : private:
     201             :   /**
     202             :    * Extract the response values from the postprocessors of the controlled system.
     203             :    * This assumes that they are stored in an AccumulateReporter
     204             :    * @param data The data where we would like to store the response values
     205             :    * @param reporter_names The names of the reporters which need to be extracted
     206             :    * @param num_timesteps The number of timesteps we want to use for training
     207             :    */
     208             :   void getInputDataFromReporter(std::vector<std::vector<Real>> & data,
     209             :                                 const std::vector<const std::vector<Real> *> & reporter_links,
     210             :                                 const unsigned int num_timesteps);
     211             :   /**
     212             :    * Extract the output (actions, logarithmic probabilities) values from the postprocessors
     213             :    * of the controlled system. This assumes that they are stored in an AccumulateReporter
     214             :    * @param data The data where we would like to store the output values
     215             :    * @param reporter_names The names of the reporters which need to be extracted
     216             :    */
     217             :   void getOutputDataFromReporter(std::vector<std::vector<Real>> & data,
     218             :                                  const std::vector<const std::vector<Real> *> & reporter_links);
     219             : 
     220             :   /**
     221             :    * Extract the reward values from the postprocessors of the controlled system
     222             :    * This assumes that they are stored in an AccumulateReporter.
     223             :    * @param data The data where we would like to store the reward values
     224             :    * @param reporter_names The name of the reporter which need to be extracted
     225             :    */
     226             :   void getRewardDataFromReporter(std::vector<Real> & data,
     227             :                                  const std::vector<Real> * const reporter_link);
     228             : 
     229             :   /// Getting reporter pointers with given names
     230             :   void getReporterPointers(const std::vector<ReporterName> & reporter_names,
     231             :                            std::vector<const std::vector<Real> *> & pointer_storage);
     232             : 
     233             :   /// Counter for number of transient simulations that have been run before updating the controller
     234             :   unsigned int _update_counter;
     235             : };
     236             : 
     237             : #endif

Generated by: LCOV version 1.14