10 #ifdef LIBTORCH_ENABLED 14 #include <torch/torch.h> 17 #include "libmesh/utility.h" 36 virtual void execute()
override;
60 torch::Tensor & tensor_data,
61 const bool detach =
false);
77 torch::Tensor
evaluateAction(torch::Tensor & input, torch::Tensor & output);
189 std::shared_ptr<Moose::LibtorchArtificialNeuralNet>
_critic_nn;
209 const std::vector<
const std::vector<Real> *> & reporter_links,
210 const unsigned int num_timesteps);
218 const std::vector<
const std::vector<Real> *> & reporter_links);
227 const std::vector<Real> *
const reporter_link);
231 std::vector<
const std::vector<Real> *> & pointer_storage);
void computeAverageEpisodeReward()
Compute the average eposiodic reward.
torch::Tensor _log_probability_tensor
Real averageEpisodeReward()
Function which returns the current average episodic reward.
std::vector< std::vector< Real > > _input_data
Real _average_episode_reward
Storage for the current average episode reward.
torch::Tensor _input_tensor
Torch::tensor version of the input and action data.
const std::vector< ReporterName > _control_names
Control reporter names.
void getRewardDataFromReporter(std::vector< Real > &data, const std::vector< Real > *const reporter_link)
Extract the reward values from the postprocessors of the controlled system This assumes that they are...
const std::vector< Real > _action_std
Standard deviation for the actions.
const bool _shift_outputs
Currently, the controls are executed after the user objects at initial in moose.
void getReporterPointers(const std::vector< ReporterName > &reporter_names, std::vector< const std::vector< Real > *> &pointer_storage)
Getting reporter pointers with given names.
torch::Tensor evaluateAction(torch::Tensor &input, torch::Tensor &output)
Function which evaluates the control net and then computes the logarithmic probability of the action...
const std::vector< unsigned int > _num_control_neurons_per_layer
Number of neurons within the hidden layers in the control neural net.
unsigned int _num_inputs
Number of inputs for the control and critic neural nets.
void convertDataToTensor(std::vector< std::vector< Real >> &vector_data, torch::Tensor &tensor_data, const bool detach=false)
Function to convert input/output data from std::vector<std::vector> to torch::tensor.
void trainController()
The condensed training function.
std::vector< const std::vector< Real > * > _response_value_pointers
Pointers to the current values of the responses.
const ReporterName _reward_name
Reward reporter name.
const std::vector< Real > _response_shift_factors
Shifting constants for the responses.
const Real _clip_param
The clip parameter used while clamping the advantage value.
void getInputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links, const unsigned int num_timesteps)
Extract the response values from the postprocessors of the controlled system.
const std::vector< ReporterName > _response_names
Response reporter names.
std::vector< const std::vector< Real > * > _log_probability_value_pointers
Pointers to the current values of the control log probabilities.
const unsigned int _input_timesteps
Number of timesteps to fetch from the reporters to be the input of then eural nets.
std::vector< std::vector< Real > > _output_data
torch::Tensor _std
standard deviation in a tensor format for sampling the actual control value
const Real _control_learning_rate
The learning rate for the optimization algorithm for the control.
virtual void execute() override
unsigned int _num_outputs
Number of outputs for the control neural network.
const Moose::LibtorchArtificialNeuralNet & controlNeuralNet() const
const Real _decay_factor
Decaying factor that is used when calculating the return from the reward.
std::vector< Real > _reward_data
const std::vector< unsigned int > _num_critic_neurons_per_layer
Number of neurons within the hidden layers in the critic neural net.
torch::Tensor _return_tensor
torch::Tensor _output_tensor
const Real _critic_learning_rate
The learning rate for the optimization algorithm for the critic.
LibtorchDRLControlTrainer(const InputParameters ¶meters)
construct using input parameters
std::vector< std::vector< Real > > _log_probability_data
const unsigned int _loss_print_frequency
The frequency the loss should be printed.
const unsigned int _num_epochs
Number of epochs for the training of the emulator.
const std::vector< Real > _response_scaling_factors
Scaling constants for the responses.
void resetData()
Reset data after updating the neural network.
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _critic_nn
Pointer to the critic neural net object.
static InputParameters validParams()
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
torch::Tensor evaluateValue(torch::Tensor &input)
Function which evaluates the critic to get the value (discounter reward)
void getOutputDataFromReporter(std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links)
Extract the output (actions, logarithmic probabilities) values from the postprocessors of the control...
const bool _read_from_file
Switch indicating if an already existing neural net should be read from a file or not...
std::vector< Real > _return_data
const unsigned int _update_frequency
Number of transients to run and collect data from before updating the controller neural net...
const std::string _filename_base
Name of the pytorch output file.
void computeRewardToGo()
Compute the return value by discounting the rewards and summing them.
const InputParameters & parameters() const
This trainer is responsible for training neural networks that efficiently control different processes...
This is the base trainer class whose main functionality is the API for declaring model data...
unsigned int _update_counter
Counter for number of transient simulations that have been run before updating the controller...
const bool _standardize_advantage
Switch to enable the standardization of the advantages.
std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _control_nn
Pointer to the control (or actor) neural net object.
const std::vector< ReporterName > _log_probability_names
Log probability reporter names.
std::vector< const std::vector< Real > * > _control_value_pointers
Pointers to the current values of the control signals.
const std::vector< Real > * _reward_value_pointer
Pointer to the current values of the reward.