10 #ifdef LIBTORCH_ENABLED 27 "Trains a neural network controller using the Proximal Policy Optimization (PPO) algorithm.");
30 "response",
"Reporter values containing the response values from the model.");
32 "response_shift_factors",
33 "A shift constant which will be used to shift the response values. This is used for the " 34 "manipulation of the neural net inputs for better training efficiency.");
36 "response_scaling_factors",
37 "A normalization constant which will be used to divide the response values. This is used for " 38 "the manipulation of the neural net inputs for better training efficiency.");
41 "Reporters containing the values of the controlled quantities (control signals) from the " 42 "model simulations.");
45 "Reporters containing the log probabilities of the actions taken during the simulations.");
47 "reward",
"Reporter containing the earned time-dependent rewards from the simulation.");
52 "Number of time steps to use in the input data, if larger than 1, " 53 "data from the previous timesteps will be used as inputs in the training.");
54 params.
addParam<
unsigned int>(
"skip_num_rows",
56 "Number of rows to ignore from training. We usually skip the 1st " 57 "row from the reporter since it contains only initial values.");
59 params.
addRequiredParam<
unsigned int>(
"num_epochs",
"Number of epochs for the training.");
62 "critic_learning_rate",
63 "0<critic_learning_rate",
64 "Learning rate (relaxation) for the emulator training.");
66 "num_critic_neurons_per_layer",
"Number of neurons per layer in the emulator neural net.");
67 params.
addParam<std::vector<std::string>>(
68 "critic_activation_functions",
69 std::vector<std::string>({
"relu"}),
70 "The type of activation functions to use in the emulator neural net. It is either one value " 71 "or one value per hidden layer.");
74 "control_learning_rate",
75 "0<control_learning_rate",
76 "Learning rate (relaxation) for the control neural net training.");
78 "num_control_neurons_per_layer",
79 "Number of neurons per layer for the control neural network.");
80 params.
addParam<std::vector<std::string>>(
81 "control_activation_functions",
82 std::vector<std::string>({
"relu"}),
83 "The type of activation functions to use in the control neural net. It " 84 "is either one value " 85 "or one value per hidden layer.");
87 params.
addParam<std::string>(
"filename_base",
88 "Filename used to output the neural net parameters.");
91 "seed", 11,
"Random number generator seed for stochastic optimizers.");
94 "action_standard_deviations",
"Standard deviation value used while sampling the actions.");
97 "clip_parameter", 0.2,
"Clip parameter used while clamping the advantage value.");
101 "1<=update_frequency",
102 "Number of transient simulation data to collect for updating the controller neural network.");
107 "0.0<=decay_factor<=1.0",
108 "Decay factor for calculating the return. This accounts for decreased " 109 "reward values from the later steps.");
112 "read_from_file",
false,
"Switch to read the neural network parameters from a file.");
116 "If we would like to shift the outputs the realign the input-output pairs.");
118 "standardize_advantage",
120 "Switch to enable the shifting and normalization of the advantages in the PPO algorithm.");
121 params.
addParam<
unsigned int>(
"loss_print_frequency",
123 "The frequency which is used to print the loss values. If 0, the " 124 "loss values are not printed.");
131 _response_shift_factors(isParamValid(
"response_shift_factors")
132 ? getParam<
std::vector<
Real>>(
"response_shift_factors")
133 :
std::vector<
Real>(_response_names.size(), 0.0)),
134 _response_scaling_factors(isParamValid(
"response_scaling_factors")
135 ? getParam<
std::vector<
Real>>(
"response_scaling_factors")
136 :
std::vector<
Real>(_response_names.size(), 1.0)),
138 _log_probability_names(getParam<
std::vector<
ReporterName>>(
"log_probability")),
140 _reward_value_pointer(&getReporterValueByName<
std::vector<
Real>>(_reward_name)),
141 _input_timesteps(getParam<unsigned
int>(
"input_timesteps")),
142 _num_inputs(_input_timesteps * _response_names.size()),
143 _num_outputs(_control_names.size()),
144 _input_data(
std::vector<
std::vector<
Real>>(_num_inputs)),
145 _output_data(
std::vector<
std::vector<
Real>>(_num_outputs)),
146 _log_probability_data(
std::vector<
std::vector<
Real>>(_num_outputs)),
147 _num_epochs(getParam<unsigned
int>(
"num_epochs")),
148 _num_critic_neurons_per_layer(
149 getParam<
std::vector<unsigned
int>>(
"num_critic_neurons_per_layer")),
150 _critic_learning_rate(getParam<
Real>(
"critic_learning_rate")),
151 _num_control_neurons_per_layer(
152 getParam<
std::vector<unsigned
int>>(
"num_control_neurons_per_layer")),
153 _control_learning_rate(getParam<
Real>(
"control_learning_rate")),
154 _update_frequency(getParam<unsigned
int>(
"update_frequency")),
155 _clip_param(getParam<
Real>(
"clip_parameter")),
156 _decay_factor(getParam<
Real>(
"decay_factor")),
157 _action_std(getParam<
std::vector<
Real>>(
"action_standard_deviations")),
158 _filename_base(isParamValid(
"filename_base") ? getParam<
std::string>(
"filename_base") :
""),
159 _read_from_file(getParam<bool>(
"read_from_file")),
160 _shift_outputs(getParam<bool>(
"shift_outputs")),
161 _standardize_advantage(getParam<bool>(
"standardize_advantage")),
162 _loss_print_frequency(getParam<unsigned
int>(
"loss_print_frequency")),
163 _update_counter(_update_frequency)
167 "The number of shift factors is not the same as the number of responses!");
171 "response_scaling_factors",
172 "The number of normalization coefficients is not the same as the number of responses!");
181 torch::manual_seed(getParam<unsigned int>(
"seed"));
191 _control_nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(
196 getParam<std::vector<std::string>>(
"control_activation_functions"));
204 _console <<
"Loaded requested .pt file." << std::endl;
206 catch (
const c10::Error & e)
208 mooseError(
"The requested pytorch file could not be loaded for the control neural net.\n",
212 else if (filename_valid)
216 _critic_nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(
221 getParam<std::vector<std::string>>(
"critic_activation_functions"));
229 _console <<
"Loaded requested .pt file." << std::endl;
231 catch (
const c10::Error & e)
233 mooseError(
"The requested pytorch file could not be loaded for the critic neural net.\n",
237 else if (filename_valid)
291 std::vector<Real> reward_data_per_sim;
292 std::vector<Real> return_data_per_sim;
297 Real discounted_reward(0.0);
298 for (
int i = reward_data_per_sim.size() - 1; i >= 0; --i)
300 discounted_reward = reward_data_per_sim[i] + discounted_reward *
_decay_factor;
304 return_data_per_sim.insert(return_data_per_sim.begin(), discounted_reward);
315 torch::optim::Adam actor_optimizer(
_control_nn->parameters(),
318 torch::optim::Adam critic_optimizer(
_critic_nn->parameters(),
328 advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10);
330 for (
unsigned int epoch = 0; epoch <
_num_epochs; ++epoch)
342 auto surr1 = ratio * advantage;
346 auto actor_loss = -torch::min(surr1, surr2).mean();
350 actor_optimizer.zero_grad();
351 actor_loss.backward();
352 actor_optimizer.step();
354 critic_optimizer.zero_grad();
355 critic_loss.backward();
356 critic_optimizer.step();
361 _console <<
"Epoch: " << epoch <<
" | Actor Loss: " << COLOR_GREEN
362 << actor_loss.item<
double>() << COLOR_DEFAULT <<
" | Critic Loss: " << COLOR_GREEN
363 << critic_loss.item<
double>() << COLOR_DEFAULT << std::endl;
374 torch::Tensor & tensor_data,
377 for (
unsigned int i = 0; i < vector_data.size(); ++i)
379 torch::Tensor input_row;
383 tensor_data = input_row;
385 tensor_data = torch::cat({tensor_data, input_row}, 1);
389 tensor_data.detach();
401 torch::Tensor var = torch::matmul(
_std,
_std);
404 torch::Tensor action =
_control_nn->forward(input);
405 return -((action - output) * (action - output)) / (2 * var) - torch::log(
_std) -
406 std::log(std::sqrt(2 * M_PI));
427 std::vector<std::vector<Real>> & data,
428 const std::vector<
const std::vector<Real> *> & reporter_links,
429 const unsigned int num_timesteps)
431 for (
const auto & rep_i :
index_range(reporter_links))
433 std::vector<Real> reporter_data = *reporter_links[rep_i];
437 reporter_data.begin(),
439 reporter_data.begin(),
444 for (
const auto & start_step :
make_range(num_timesteps))
446 unsigned int row = reporter_links.size() * start_step + rep_i;
447 for (
unsigned int fill_i = 1; fill_i < num_timesteps - start_step; ++fill_i)
448 data[row].push_back(reporter_data[0]);
450 data[row].insert(data[row].end(),
451 reporter_data.begin(),
452 reporter_data.begin() + start_step + reporter_data.size() -
460 std::vector<std::vector<Real>> & data,
461 const std::vector<
const std::vector<Real> *> & reporter_links)
463 for (
const auto & rep_i :
index_range(reporter_links))
465 data[rep_i].insert(data[rep_i].end(),
467 reporter_links[rep_i]->end());
472 const std::vector<Real> *
const reporter_link)
475 data.insert(data.end(), reporter_link->begin() +
_shift_outputs, reporter_link->end());
480 const std::vector<ReporterName> & reporter_names,
481 std::vector<
const std::vector<Real> *> & pointer_storage)
483 pointer_storage.clear();
484 for (
const auto &
name : reporter_names)
void computeAverageEpisodeReward()
Compute the average eposiodic reward.
torch::Tensor _log_probability_tensor
std::vector< std::vector< Real > > _input_data
Real _average_episode_reward
Storage for the current average episode reward.
torch::Tensor _input_tensor
Torch::tensor version of the input and action data.
const std::vector< ReporterName > _control_names
Control reporter names.
void getRewardDataFromReporter(std::vector< Real > &data, const std::vector< Real > *const reporter_link)
Extract the reward values from the postprocessors of the controlled system This assumes that they are...
const std::vector< Real > _action_std
Standard deviation for the actions.
const bool _shift_outputs
Currently, the controls are executed after the user objects at initial in moose.
const T & getReporterValueByName(const ReporterName &reporter_name, const std::size_t time_index=0)
void getReporterPointers(const std::vector< ReporterName > &reporter_names, std::vector< const std::vector< Real > *> &pointer_storage)
Getting reporter pointers with given names.
torch::Tensor evaluateAction(torch::Tensor &input, torch::Tensor &output)
Function which evaluates the control net and then computes the logarithmic probability of the action...
const std::vector< unsigned int > _num_control_neurons_per_layer
Number of neurons within the hidden layers in the control neural net.
registerMooseObject("StochasticToolsApp", LibtorchDRLControlTrainer)
unsigned int _num_inputs
Number of inputs for the control and critic neural nets.
void convertDataToTensor(std::vector< std::vector< Real >> &vector_data, torch::Tensor &tensor_data, const bool detach=false)
Function to convert input/output data from std::vector<std::vector> to torch::tensor.
void trainController()
The condensed training function.
std::vector< const std::vector< Real > * > _response_value_pointers
Pointers to the current values of the responses.
const std::vector< Real > _response_shift_factors
Shifting constants for the responses.
const Real _clip_param
The clip parameter used while clamping the advantage value.
virtual const std::string & name() const
void getInputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links, const unsigned int num_timesteps)
Extract the response values from the postprocessors of the controlled system.
const std::vector< ReporterName > _response_names
Response reporter names.
static InputParameters validParams()
bool isParamValid(const std::string &name) const
std::vector< const std::vector< Real > * > _log_probability_value_pointers
Pointers to the current values of the control log probabilities.
const unsigned int _input_timesteps
Number of timesteps to fetch from the reporters to be the input of then eural nets.
std::vector< std::vector< Real > > _output_data
torch::Tensor _std
standard deviation in a tensor format for sampling the actual control value
const Real _control_learning_rate
The learning rate for the optimization algorithm for the control.
virtual void execute() override
Real value(unsigned n, unsigned alpha, unsigned beta, Real x)
unsigned int _num_outputs
Number of outputs for the control neural network.
const Real _decay_factor
Decaying factor that is used when calculating the return from the reward.
std::vector< Real > _reward_data
const std::vector< unsigned int > _num_critic_neurons_per_layer
Number of neurons within the hidden layers in the critic neural net.
torch::Tensor _return_tensor
torch::Tensor _output_tensor
const Real _critic_learning_rate
The learning rate for the optimization algorithm for the critic.
LibtorchDRLControlTrainer(const InputParameters ¶meters)
construct using input parameters
void vectorToTensor(std::vector< DataType > &vector, torch::Tensor &tensor, const bool detach=false)
std::vector< std::vector< Real > > _log_probability_data
const unsigned int _loss_print_frequency
The frequency the loss should be printed.
void paramError(const std::string ¶m, Args... args) const
const unsigned int _num_epochs
Number of epochs for the training of the emulator.
const std::vector< Real > _response_scaling_factors
Scaling constants for the responses.
void resetData()
Reset data after updating the neural network.
template void vectorToTensor< Real >(std::vector< Real > &vector, torch::Tensor &tensor, const bool detach)
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _critic_nn
Pointer to the critic neural net object.
static InputParameters validParams()
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
torch::Tensor evaluateValue(torch::Tensor &input)
Function which evaluates the critic to get the value (discounter reward)
void getOutputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links)
Extract the output (actions, logarithmic probabilities) values from the postprocessors of the control...
const bool _read_from_file
Switch indicating if an already existing neural net should be read from a file or not...
std::vector< Real > _return_data
const unsigned int _update_frequency
Number of transients to run and collect data from before updating the controller neural net...
IntRange< T > make_range(T beg, T end)
const std::string _filename_base
Name of the pytorch output file.
void computeRewardToGo()
Compute the return value by discounting the rewards and summing them.
void mooseError(Args &&... args) const
This trainer is responsible for training neural networks that efficiently control different processes...
const ConsoleStream _console
This is the base trainer class whose main functionality is the API for declaring model data...
unsigned int _update_counter
Counter for number of transient simulations that have been run before updating the controller...
const bool _standardize_advantage
Switch to enable the standardization of the advantages.
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _control_nn
Pointer to the control (or actor) neural net object.
const std::vector< ReporterName > _log_probability_names
Log probability reporter names.
std::vector< const std::vector< Real > * > _control_value_pointers
Pointers to the current values of the control signals.
void ErrorVector unsigned int
auto index_range(const T &sizable)
const std::vector< Real > * _reward_value_pointer
Pointer to the current values of the reward.