Line data Source code
1 : //* This file is part of the MOOSE framework 2 : //* https://mooseframework.inl.gov 3 : //* 4 : //* All rights reserved, see COPYRIGHT for full restrictions 5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT 6 : //* 7 : //* Licensed under LGPL 2.1, please see LICENSE for details 8 : //* https://www.gnu.org/licenses/lgpl-2.1.html 9 : 10 : #ifdef MOOSE_LIBTORCH_ENABLED 11 : 12 : #pragma once 13 : 14 : #include <torch/torch.h> 15 : #include "LibtorchArtificialNeuralNet.h" 16 : 17 : #include "libmesh/utility.h" 18 : #include "SurrogateTrainer.h" 19 : 20 : /** 21 : * This trainer is responsible for training neural networks that efficiently control 22 : * different processes. It utilizes the Proximal Policy Optimization algorithms. For more 23 : * information on the algorithm, see the following resources: Schulman, John, et al. "Proximal 24 : * policy optimization algorithms." arXiv preprint arXiv:1707.06347 (2017). 25 : * https://medium.com/analytics-vidhya/coding-ppo-from-scratch-with-pytorch-part-1-4-613dfc1b14c8 26 : * https://stable-baselines.readthedocs.io/en/master/modules/ppo2.html 27 : */ 28 : class LibtorchDRLControlTrainer : public SurrogateTrainerBase 29 : { 30 : public: 31 : static InputParameters validParams(); 32 : 33 : /// construct using input parameters 34 : LibtorchDRLControlTrainer(const InputParameters & parameters); 35 : 36 : virtual void execute() override; 37 : 38 : /** 39 : * Function which returns the current average episodic reward. It is only updated 40 : * at the end of every episode. 41 : */ 42 8 : Real averageEpisodeReward() { return _average_episode_reward; } 43 : 44 : /// The condensed training function 45 : void trainController(); 46 : 47 : const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const { return *_control_nn; } 48 : 49 : protected: 50 : /// Compute the average eposiodic reward 51 : void computeAverageEpisodeReward(); 52 : 53 : /** 54 : * Function to convert input/output data from std::vector<std::vector> to torch::tensor 55 : * @param vector_data The input data in vector-vectors format 56 : * @param tensor_data The tensor where we would like to save the results 57 : * @param detach If the gradient info needs to be detached from the tensor 58 : */ 59 : void convertDataToTensor(std::vector<std::vector<Real>> & vector_data, 60 : torch::Tensor & tensor_data, 61 : const bool detach = false); 62 : 63 : /** 64 : * Function which evaluates the critic to get the value (discounter reward) 65 : * @param input The observation values (responses) 66 : * @return The estimated value 67 : */ 68 : torch::Tensor evaluateValue(torch::Tensor & input); 69 : 70 : /** 71 : * Function which evaluates the control net and then computes the logarithmic probability of the 72 : * action 73 : * @param input The observation values (responses) 74 : * @param output The actions corresponding to the observations 75 : * @return The estimated value for the logarithmic probability 76 : */ 77 : torch::Tensor evaluateAction(torch::Tensor & input, torch::Tensor & output); 78 : 79 : /// Compute the return value by discounting the rewards and summing them 80 : void computeRewardToGo(); 81 : 82 : /// Reset data after updating the neural network 83 : void resetData(); 84 : 85 : /// Response reporter names 86 : const std::vector<ReporterName> _response_names; 87 : 88 : /// Pointers to the current values of the responses 89 : std::vector<const std::vector<Real> *> _response_value_pointers; 90 : 91 : /// Shifting constants for the responses 92 : const std::vector<Real> _response_shift_factors; 93 : 94 : /// Scaling constants for the responses 95 : const std::vector<Real> _response_scaling_factors; 96 : 97 : /// Control reporter names 98 : const std::vector<ReporterName> _control_names; 99 : 100 : /// Pointers to the current values of the control signals 101 : std::vector<const std::vector<Real> *> _control_value_pointers; 102 : 103 : /// Log probability reporter names 104 : const std::vector<ReporterName> _log_probability_names; 105 : 106 : /// Pointers to the current values of the control log probabilities 107 : std::vector<const std::vector<Real> *> _log_probability_value_pointers; 108 : 109 : /// Reward reporter name 110 : const ReporterName _reward_name; 111 : 112 : /// Pointer to the current values of the reward 113 : const std::vector<Real> * _reward_value_pointer; 114 : 115 : /// Number of timesteps to fetch from the reporters to be the input of then eural nets 116 : const unsigned int _input_timesteps; 117 : 118 : /// Number of inputs for the control and critic neural nets 119 : unsigned int _num_inputs; 120 : /// Number of outputs for the control neural network 121 : unsigned int _num_outputs; 122 : 123 : ///@{ 124 : /// The gathered data from the reporters, each row represents one QoI, each column represents one time step 125 : std::vector<std::vector<Real>> _input_data; 126 : std::vector<std::vector<Real>> _output_data; 127 : std::vector<std::vector<Real>> _log_probability_data; 128 : ///@} 129 : 130 : ///@{ 131 : /// The reward and return data. The return is calculated using the _reward_data 132 : std::vector<Real> _reward_data; 133 : std::vector<Real> _return_data; 134 : ///@} 135 : 136 : /// Number of epochs for the training of the emulator 137 : const unsigned int _num_epochs; 138 : 139 : /// Number of neurons within the hidden layers in the critic neural net 140 : const std::vector<unsigned int> _num_critic_neurons_per_layer; 141 : 142 : /// The learning rate for the optimization algorithm for the critic 143 : const Real _critic_learning_rate; 144 : 145 : /// Number of neurons within the hidden layers in the control neural net 146 : const std::vector<unsigned int> _num_control_neurons_per_layer; 147 : 148 : /// The learning rate for the optimization algorithm for the control 149 : const Real _control_learning_rate; 150 : 151 : /// Number of transients to run and collect data from before updating the controller neural net. 152 : const unsigned int _update_frequency; 153 : 154 : /// The clip parameter used while clamping the advantage value 155 : const Real _clip_param; 156 : 157 : /// Decaying factor that is used when calculating the return from the reward 158 : const Real _decay_factor; 159 : 160 : /// Standard deviation for the actions 161 : const std::vector<Real> _action_std; 162 : 163 : /// Name of the pytorch output file. This is used for loading and storing 164 : /// already existing data 165 : const std::string _filename_base; 166 : 167 : /// Switch indicating if an already existing neural net should be read from a 168 : /// file or not. This can be used to load existing torch files (from previous 169 : /// MOOSE runs for retraining and further manipulation) 170 : const bool _read_from_file; 171 : 172 : /// Currently, the controls are executed after the user objects at initial in moose. 173 : /// So using a shift can realign the corresponding input-output values while reading the 174 : /// reporters 175 : const bool _shift_outputs; 176 : 177 : /// Storage for the current average episode reward 178 : Real _average_episode_reward; 179 : 180 : /// Switch to enable the standardization of the advantages 181 : const bool _standardize_advantage; 182 : 183 : /// The frequency the loss should be printed 184 : const unsigned int _loss_print_frequency; 185 : 186 : /// Pointer to the control (or actor) neural net object 187 : std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _control_nn; 188 : /// Pointer to the critic neural net object 189 : std::shared_ptr<Moose::LibtorchArtificialNeuralNet> _critic_nn; 190 : 191 : /// standard deviation in a tensor format for sampling the actual control value 192 : torch::Tensor _std; 193 : 194 : /// Torch::tensor version of the input and action data 195 : torch::Tensor _input_tensor; 196 : torch::Tensor _output_tensor; 197 : torch::Tensor _return_tensor; 198 : torch::Tensor _log_probability_tensor; 199 : 200 : private: 201 : /** 202 : * Extract the response values from the postprocessors of the controlled system. 203 : * This assumes that they are stored in an AccumulateReporter 204 : * @param data The data where we would like to store the response values 205 : * @param reporter_names The names of the reporters which need to be extracted 206 : * @param num_timesteps The number of timesteps we want to use for training 207 : */ 208 : void getInputDataFromReporter(std::vector<std::vector<Real>> & data, 209 : const std::vector<const std::vector<Real> *> & reporter_links, 210 : const unsigned int num_timesteps); 211 : /** 212 : * Extract the output (actions, logarithmic probabilities) values from the postprocessors 213 : * of the controlled system. This assumes that they are stored in an AccumulateReporter 214 : * @param data The data where we would like to store the output values 215 : * @param reporter_names The names of the reporters which need to be extracted 216 : */ 217 : void getOutputDataFromReporter(std::vector<std::vector<Real>> & data, 218 : const std::vector<const std::vector<Real> *> & reporter_links); 219 : 220 : /** 221 : * Extract the reward values from the postprocessors of the controlled system 222 : * This assumes that they are stored in an AccumulateReporter. 223 : * @param data The data where we would like to store the reward values 224 : * @param reporter_names The name of the reporter which need to be extracted 225 : */ 226 : void getRewardDataFromReporter(std::vector<Real> & data, 227 : const std::vector<Real> * const reporter_link); 228 : 229 : /// Getting reporter pointers with given names 230 : void getReporterPointers(const std::vector<ReporterName> & reporter_names, 231 : std::vector<const std::vector<Real> *> & pointer_storage); 232 : 233 : /// Counter for number of transient simulations that have been run before updating the controller 234 : unsigned int _update_counter; 235 : }; 236 : 237 : #endif