This trainer is responsible for training neural networks that efficiently control different processes. More...

#include <LibtorchDRLControlTrainer.h>

Inheritance diagram for LibtorchDRLControlTrainer:

Public Types
typedef DataFileName	DataFileParameterType

Public Member Functions
	LibtorchDRLControlTrainer (const InputParameters &parameters)
	construct using input parameters More...

virtual void	execute () override

Real	averageEpisodeReward ()
	Function which returns the current average episodic reward. More...

void	trainController ()
	The condensed training function. More...

const Moose::LibtorchArtificialNeuralNet &	controlNeuralNet () const

virtual void	initialize ()

virtual void	finalize ()

virtual void	threadJoin (const UserObject &) final

SubProblem &	getSubProblem () const

bool	shouldDuplicateInitialExecution () const

virtual Real	spatialValue (const Point &) const

virtual const std::vector< Point >	spatialPoints () const

void	gatherSum (T &value)

void	gatherMax (T &value)

void	gatherMin (T &value)

void	gatherProxyValueMax (T1 &proxy, T2 &value)

void	gatherProxyValueMin (T1 &proxy, T2 &value)

void	setPrimaryThreadCopy (UserObject *primary)

UserObject *	primaryThreadCopy ()

std::set< UserObjectName >	getDependObjects () const

virtual bool	needThreadedCopy () const

const std::set< std::string > &	getRequestedItems () override

const std::set< std::string > &	getSuppliedItems () override

unsigned int	systemNumber () const

virtual bool	enabled () const

std::shared_ptr< MooseObject >	getSharedPtr ()

std::shared_ptr< const MooseObject >	getSharedPtr () const

MooseApp &	getMooseApp () const

const std::string &	type () const

virtual const std::string &	name () const

std::string	typeAndName () const

std::string	errorPrefix (const std::string &error_type) const

void	callMooseError (std::string msg, const bool with_prefix) const

MooseObjectParameterName	uniqueParameterName (const std::string &parameter_name) const

const InputParameters &	parameters () const

MooseObjectName	uniqueName () const

const T &	getParam (const std::string &name) const

std::vector< std::pair< T1, T2 > >	getParam (const std::string &param1, const std::string &param2) const

const T *	queryParam (const std::string &name) const

const T &	getRenamedParam (const std::string &old_name, const std::string &new_name) const

T	getCheckedPointerParam (const std::string &name, const std::string &error_string="") const

bool	isParamValid (const std::string &name) const

bool	isParamSetByUser (const std::string &nm) const

void	paramError (const std::string &param, Args... args) const

void	paramWarning (const std::string &param, Args... args) const

void	paramInfo (const std::string &param, Args... args) const

void	connectControllableParams (const std::string &parameter, const std::string &object_type, const std::string &object_name, const std::string &object_parameter) const

void	mooseError (Args &&... args) const

void	mooseErrorNonPrefixed (Args &&... args) const

void	mooseDocumentedError (const std::string &repo_name, const unsigned int issue_num, Args &&... args) const

void	mooseWarning (Args &&... args) const

void	mooseWarningNonPrefixed (Args &&... args) const

void	mooseDeprecated (Args &&... args) const

void	mooseInfo (Args &&... args) const

std::string	getDataFileName (const std::string &param) const

std::string	getDataFileNameByName (const std::string &relative_path) const

std::string	getDataFilePath (const std::string &relative_path) const

virtual void	initialSetup ()

virtual void	timestepSetup ()

virtual void	jacobianSetup ()

virtual void	residualSetup ()

virtual void	customSetup (const ExecFlagType &)

const ExecFlagEnum &	getExecuteOnEnum () const

UserObjectName	getUserObjectName (const std::string &param_name) const

const T &	getUserObject (const std::string &param_name, bool is_dependency=true) const

const T &	getUserObjectByName (const UserObjectName &object_name, bool is_dependency=true) const

const UserObject &	getUserObjectBase (const std::string &param_name, bool is_dependency=true) const

const UserObject &	getUserObjectBaseByName (const UserObjectName &object_name, bool is_dependency=true) const

const std::vector< MooseVariableScalar *> &	getCoupledMooseScalarVars ()

const std::set< TagID > &	getScalarVariableCoupleableVectorTags () const

const std::set< TagID > &	getScalarVariableCoupleableMatrixTags () const

const GenericMaterialProperty< T, is_ad > &	getGenericMaterialProperty (const std::string &name, MaterialData &material_data, const unsigned int state=0)

const GenericMaterialProperty< T, is_ad > &	getGenericMaterialProperty (const std::string &name, const unsigned int state=0)

const GenericMaterialProperty< T, is_ad > &	getGenericMaterialProperty (const std::string &name, const unsigned int state=0)

const MaterialProperty< T > &	getMaterialProperty (const std::string &name, MaterialData &material_data, const unsigned int state=0)

const MaterialProperty< T > &	getMaterialProperty (const std::string &name, const unsigned int state=0)

const MaterialProperty< T > &	getMaterialProperty (const std::string &name, const unsigned int state=0)

const ADMaterialProperty< T > &	getADMaterialProperty (const std::string &name, MaterialData &material_data)

const ADMaterialProperty< T > &	getADMaterialProperty (const std::string &name)

const ADMaterialProperty< T > &	getADMaterialProperty (const std::string &name)

const MaterialProperty< T > &	getMaterialPropertyOld (const std::string &name, MaterialData &material_data)

const MaterialProperty< T > &	getMaterialPropertyOld (const std::string &name)

const MaterialProperty< T > &	getMaterialPropertyOld (const std::string &name)

const MaterialProperty< T > &	getMaterialPropertyOlder (const std::string &name, MaterialData &material_data)

const MaterialProperty< T > &	getMaterialPropertyOlder (const std::string &name)

const MaterialProperty< T > &	getMaterialPropertyOlder (const std::string &name)

const GenericMaterialProperty< T, is_ad > &	getGenericMaterialPropertyByName (const MaterialPropertyName &name, MaterialData &material_data, const unsigned int state)

const GenericMaterialProperty< T, is_ad > &	getGenericMaterialPropertyByName (const MaterialPropertyName &name, const unsigned int state=0)

const GenericMaterialProperty< T, is_ad > &	getGenericMaterialPropertyByName (const MaterialPropertyName &name, const unsigned int state=0)

const MaterialProperty< T > &	getMaterialPropertyByName (const MaterialPropertyName &name, MaterialData &material_data, const unsigned int state=0)

const MaterialProperty< T > &	getMaterialPropertyByName (const MaterialPropertyName &name, const unsigned int state=0)

const MaterialProperty< T > &	getMaterialPropertyByName (const MaterialPropertyName &name, const unsigned int state=0)

const ADMaterialProperty< T > &	getADMaterialPropertyByName (const MaterialPropertyName &name, MaterialData &material_data)

const ADMaterialProperty< T > &	getADMaterialPropertyByName (const MaterialPropertyName &name)

const ADMaterialProperty< T > &	getADMaterialPropertyByName (const MaterialPropertyName &name)

const MaterialProperty< T > &	getMaterialPropertyOldByName (const MaterialPropertyName &name, MaterialData &material_data)

const MaterialProperty< T > &	getMaterialPropertyOldByName (const MaterialPropertyName &name)

const MaterialProperty< T > &	getMaterialPropertyOldByName (const MaterialPropertyName &name)

const MaterialProperty< T > &	getMaterialPropertyOlderByName (const MaterialPropertyName &name, MaterialData &material_data)

const MaterialProperty< T > &	getMaterialPropertyOlderByName (const MaterialPropertyName &name)

const MaterialProperty< T > &	getMaterialPropertyOlderByName (const MaterialPropertyName &name)

std::pair< const MaterialProperty< T > *, std::set< SubdomainID > >	getBlockMaterialProperty (const MaterialPropertyName &name)

const GenericMaterialProperty< T, is_ad > &	getGenericZeroMaterialProperty (const std::string &name)

const GenericMaterialProperty< T, is_ad > &	getGenericZeroMaterialProperty ()

const GenericMaterialProperty< T, is_ad > &	getGenericZeroMaterialPropertyByName (const std::string &prop_name)

const MaterialProperty< T > &	getZeroMaterialProperty (Ts... args)

std::set< SubdomainID >	getMaterialPropertyBlocks (const std::string &name)

std::vector< SubdomainName >	getMaterialPropertyBlockNames (const std::string &name)

std::set< BoundaryID >	getMaterialPropertyBoundaryIDs (const std::string &name)

std::vector< BoundaryName >	getMaterialPropertyBoundaryNames (const std::string &name)

void	checkBlockAndBoundaryCompatibility (std::shared_ptr< MaterialBase > discrete)

std::unordered_map< SubdomainID, std::vector< MaterialBase *> >	buildRequiredMaterials (bool allow_stateful=true)

void	statefulPropertiesAllowed (bool)

bool	getMaterialPropertyCalled () const

virtual const std::unordered_set< unsigned int > &	getMatPropDependencies () const

virtual void	resolveOptionalProperties ()

const GenericMaterialProperty< T, is_ad > &	getPossiblyConstantGenericMaterialPropertyByName (const MaterialPropertyName &prop_name, MaterialData &material_data, const unsigned int state)

bool	isImplicit ()

Moose::StateArg	determineState () const

virtual void	subdomainSetup () override

virtual void	subdomainSetup () override

bool	hasUserObject (const std::string &param_name) const

bool	hasUserObject (const std::string &param_name) const

bool	hasUserObject (const std::string &param_name) const

bool	hasUserObject (const std::string &param_name) const

bool	hasUserObjectByName (const UserObjectName &object_name) const

bool	hasUserObjectByName (const UserObjectName &object_name) const

bool	hasUserObjectByName (const UserObjectName &object_name) const

bool	hasUserObjectByName (const UserObjectName &object_name) const

const GenericOptionalMaterialProperty< T, is_ad > &	getGenericOptionalMaterialProperty (const std::string &name, const unsigned int state=0)

const GenericOptionalMaterialProperty< T, is_ad > &	getGenericOptionalMaterialProperty (const std::string &name, const unsigned int state=0)

const OptionalMaterialProperty< T > &	getOptionalMaterialProperty (const std::string &name, const unsigned int state=0)

const OptionalMaterialProperty< T > &	getOptionalMaterialProperty (const std::string &name, const unsigned int state=0)

const OptionalADMaterialProperty< T > &	getOptionalADMaterialProperty (const std::string &name)

const OptionalADMaterialProperty< T > &	getOptionalADMaterialProperty (const std::string &name)

const OptionalMaterialProperty< T > &	getOptionalMaterialPropertyOld (const std::string &name)

const OptionalMaterialProperty< T > &	getOptionalMaterialPropertyOld (const std::string &name)

const OptionalMaterialProperty< T > &	getOptionalMaterialPropertyOlder (const std::string &name)

const OptionalMaterialProperty< T > &	getOptionalMaterialPropertyOlder (const std::string &name)

MaterialBase &	getMaterial (const std::string &name)

MaterialBase &	getMaterial (const std::string &name)

MaterialBase &	getMaterialByName (const std::string &name, bool no_warn=false)

MaterialBase &	getMaterialByName (const std::string &name, bool no_warn=false)

bool	hasMaterialProperty (const std::string &name)

bool	hasMaterialProperty (const std::string &name)

bool	hasMaterialPropertyByName (const std::string &name)

bool	hasMaterialPropertyByName (const std::string &name)

bool	hasADMaterialProperty (const std::string &name)

bool	hasADMaterialProperty (const std::string &name)

bool	hasADMaterialPropertyByName (const std::string &name)

bool	hasADMaterialPropertyByName (const std::string &name)

bool	hasGenericMaterialProperty (const std::string &name)

bool	hasGenericMaterialProperty (const std::string &name)

bool	hasGenericMaterialPropertyByName (const std::string &name)

bool	hasGenericMaterialPropertyByName (const std::string &name)

const Function &	getFunction (const std::string &name) const

const Function &	getFunctionByName (const FunctionName &name) const

bool	hasFunction (const std::string &param_name) const

bool	hasFunctionByName (const FunctionName &name) const

bool	isDefaultPostprocessorValue (const std::string &param_name, const unsigned int index=0) const

bool	hasPostprocessor (const std::string &param_name, const unsigned int index=0) const

bool	hasPostprocessorByName (const PostprocessorName &name) const

std::size_t	coupledPostprocessors (const std::string &param_name) const

const PostprocessorName &	getPostprocessorName (const std::string &param_name, const unsigned int index=0) const

const VectorPostprocessorValue &	getVectorPostprocessorValue (const std::string &param_name, const std::string &vector_name) const

const VectorPostprocessorValue &	getVectorPostprocessorValue (const std::string &param_name, const std::string &vector_name, bool needs_broadcast) const

const VectorPostprocessorValue &	getVectorPostprocessorValueByName (const VectorPostprocessorName &name, const std::string &vector_name) const

const VectorPostprocessorValue &	getVectorPostprocessorValueByName (const VectorPostprocessorName &name, const std::string &vector_name, bool needs_broadcast) const

const VectorPostprocessorValue &	getVectorPostprocessorValueOld (const std::string &param_name, const std::string &vector_name) const

const VectorPostprocessorValue &	getVectorPostprocessorValueOld (const std::string &param_name, const std::string &vector_name, bool needs_broadcast) const

const VectorPostprocessorValue &	getVectorPostprocessorValueOldByName (const VectorPostprocessorName &name, const std::string &vector_name) const

const VectorPostprocessorValue &	getVectorPostprocessorValueOldByName (const VectorPostprocessorName &name, const std::string &vector_name, bool needs_broadcast) const

const ScatterVectorPostprocessorValue &	getScatterVectorPostprocessorValue (const std::string &param_name, const std::string &vector_name) const

const ScatterVectorPostprocessorValue &	getScatterVectorPostprocessorValueByName (const VectorPostprocessorName &name, const std::string &vector_name) const

const ScatterVectorPostprocessorValue &	getScatterVectorPostprocessorValueOld (const std::string &param_name, const std::string &vector_name) const

const ScatterVectorPostprocessorValue &	getScatterVectorPostprocessorValueOldByName (const VectorPostprocessorName &name, const std::string &vector_name) const

bool	hasVectorPostprocessor (const std::string &param_name, const std::string &vector_name) const

bool	hasVectorPostprocessor (const std::string &param_name) const

bool	hasVectorPostprocessorByName (const VectorPostprocessorName &name, const std::string &vector_name) const

bool	hasVectorPostprocessorByName (const VectorPostprocessorName &name) const

const VectorPostprocessorName &	getVectorPostprocessorName (const std::string &param_name) const

T &	getSampler (const std::string &name)

Sampler &	getSampler (const std::string &name)

T &	getSamplerByName (const SamplerName &name)

Sampler &	getSamplerByName (const SamplerName &name)

virtual void	meshChanged ()

virtual void	meshDisplaced ()

PerfGraph &	perfGraph ()

const PostprocessorValue &	getPostprocessorValue (const std::string &param_name, const unsigned int index=0) const

const PostprocessorValue &	getPostprocessorValue (const std::string &param_name, const unsigned int index=0) const

const PostprocessorValue &	getPostprocessorValueOld (const std::string &param_name, const unsigned int index=0) const

const PostprocessorValue &	getPostprocessorValueOld (const std::string &param_name, const unsigned int index=0) const

const PostprocessorValue &	getPostprocessorValueOlder (const std::string &param_name, const unsigned int index=0) const

const PostprocessorValue &	getPostprocessorValueOlder (const std::string &param_name, const unsigned int index=0) const

virtual const PostprocessorValue &	getPostprocessorValueByName (const PostprocessorName &name) const

virtual const PostprocessorValue &	getPostprocessorValueByName (const PostprocessorName &name) const

const PostprocessorValue &	getPostprocessorValueOldByName (const PostprocessorName &name) const

const PostprocessorValue &	getPostprocessorValueOldByName (const PostprocessorName &name) const

const PostprocessorValue &	getPostprocessorValueOlderByName (const PostprocessorName &name) const

const PostprocessorValue &	getPostprocessorValueOlderByName (const PostprocessorName &name) const

bool	isVectorPostprocessorDistributed (const std::string &param_name) const

bool	isVectorPostprocessorDistributed (const std::string &param_name) const

bool	isVectorPostprocessorDistributedByName (const VectorPostprocessorName &name) const

bool	isVectorPostprocessorDistributedByName (const VectorPostprocessorName &name) const

const Distribution &	getDistribution (const std::string &name) const

const T &	getDistribution (const std::string &name) const

const Distribution &	getDistribution (const std::string &name) const

const T &	getDistribution (const std::string &name) const

const Distribution &	getDistributionByName (const DistributionName &name) const

const T &	getDistributionByName (const std::string &name) const

const Distribution &	getDistributionByName (const DistributionName &name) const

const T &	getDistributionByName (const std::string &name) const

const Parallel::Communicator &	comm () const

processor_id_type	n_processors () const

processor_id_type	processor_id () const

const std::string &	modelMetaDataName () const
	Accessor for the name of the model meta data. More...

const FileName &	getModelDataFileName () const
	Get the associated filename. More...

bool	hasModelData () const
	Check if we need to load model data (if the filename parameter is used) More...


template<typename T , typename... Args>
T &	declareModelData (const std::string &data_name, Args &&... args)
	Declare model data for loading from file as well as restart. More...


template<typename T , typename... Args>
const T &	getModelData (const std::string &data_name, Args &&... args) const
	Retrieve model data from the interface. More...

Static Public Member Functions
static InputParameters	validParams ()

static void	sort (typename std::vector< T > &vector)

static void	sortDFS (typename std::vector< T > &vector)

static void	cyclicDependencyError (CyclicDependencyException< T2 > &e, const std::string &header)

Public Attributes
const ConsoleStream	_console

Static Public Attributes
static constexpr PropertyValue::id_type	default_property_id

static constexpr PropertyValue::id_type	zero_property_id

static constexpr auto	SYSTEM

static constexpr auto	NAME

Protected Member Functions
void	computeAverageEpisodeReward ()
	Compute the average eposiodic reward. More...

void	convertDataToTensor (std::vector< std::vector< Real >> &vector_data, torch::Tensor &tensor_data, const bool detach=false)
	Function to convert input/output data from std::vector<std::vector> to torch::tensor. More...

torch::Tensor	evaluateValue (torch::Tensor &input)
	Function which evaluates the critic to get the value (discounter reward) More...

torch::Tensor	evaluateAction (torch::Tensor &input, torch::Tensor &output)
	Function which evaluates the control net and then computes the logarithmic probability of the action. More...

void	computeRewardToGo ()
	Compute the return value by discounting the rewards and summing them. More...

void	resetData ()
	Reset data after updating the neural network. More...

virtual void	addPostprocessorDependencyHelper (const PostprocessorName &name) const override

virtual void	addVectorPostprocessorDependencyHelper (const VectorPostprocessorName &name) const override

virtual void	addUserObjectDependencyHelper (const UserObject &uo) const override

void	addReporterDependencyHelper (const ReporterName &reporter_name) override

const ReporterName &	getReporterName (const std::string &param_name) const

T &	declareRestartableData (const std::string &data_name, Args &&... args)

ManagedValue< T >	declareManagedRestartableDataWithContext (const std::string &data_name, void *context, Args &&... args)

const T &	getRestartableData (const std::string &data_name) const

T &	declareRestartableDataWithContext (const std::string &data_name, void *context, Args &&... args)

T &	declareRecoverableData (const std::string &data_name, Args &&... args)

T &	declareRestartableDataWithObjectName (const std::string &data_name, const std::string &object_name, Args &&... args)

T &	declareRestartableDataWithObjectNameWithContext (const std::string &data_name, const std::string &object_name, void *context, Args &&... args)

std::string	restartableName (const std::string &data_name) const

const T &	getMeshProperty (const std::string &data_name, const std::string &prefix)

const T &	getMeshProperty (const std::string &data_name)

bool	hasMeshProperty (const std::string &data_name, const std::string &prefix) const

bool	hasMeshProperty (const std::string &data_name, const std::string &prefix) const

bool	hasMeshProperty (const std::string &data_name) const

bool	hasMeshProperty (const std::string &data_name) const

std::string	meshPropertyName (const std::string &data_name) const

PerfID	registerTimedSection (const std::string &section_name, const unsigned int level) const

PerfID	registerTimedSection (const std::string &section_name, const unsigned int level, const std::string &live_message, const bool print_dots=true) const

std::string	timedSectionName (const std::string &section_name) const

bool	isCoupledScalar (const std::string &var_name, unsigned int i=0) const

unsigned int	coupledScalarComponents (const std::string &var_name) const

unsigned int	coupledScalar (const std::string &var_name, unsigned int comp=0) const

libMesh::Order	coupledScalarOrder (const std::string &var_name, unsigned int comp=0) const

const VariableValue &	coupledScalarValue (const std::string &var_name, unsigned int comp=0) const

const ADVariableValue &	adCoupledScalarValue (const std::string &var_name, unsigned int comp=0) const

const GenericVariableValue< is_ad > &	coupledGenericScalarValue (const std::string &var_name, unsigned int comp=0) const

const GenericVariableValue< false > &	coupledGenericScalarValue (const std::string &var_name, const unsigned int comp) const

const GenericVariableValue< true > &	coupledGenericScalarValue (const std::string &var_name, const unsigned int comp) const

const VariableValue &	coupledVectorTagScalarValue (const std::string &var_name, TagID tag, unsigned int comp=0) const

const VariableValue &	coupledMatrixTagScalarValue (const std::string &var_name, TagID tag, unsigned int comp=0) const

const VariableValue &	coupledScalarValueOld (const std::string &var_name, unsigned int comp=0) const

const VariableValue &	coupledScalarValueOlder (const std::string &var_name, unsigned int comp=0) const

const VariableValue &	coupledScalarDot (const std::string &var_name, unsigned int comp=0) const

const ADVariableValue &	adCoupledScalarDot (const std::string &var_name, unsigned int comp=0) const

const VariableValue &	coupledScalarDotDot (const std::string &var_name, unsigned int comp=0) const

const VariableValue &	coupledScalarDotOld (const std::string &var_name, unsigned int comp=0) const

const VariableValue &	coupledScalarDotDotOld (const std::string &var_name, unsigned int comp=0) const

const VariableValue &	coupledScalarDotDu (const std::string &var_name, unsigned int comp=0) const

const VariableValue &	coupledScalarDotDotDu (const std::string &var_name, unsigned int comp=0) const

const MooseVariableScalar *	getScalarVar (const std::string &var_name, unsigned int comp) const

virtual void	checkMaterialProperty (const std::string &name, const unsigned int state)

void	markMatPropRequested (const std::string &)

MaterialPropertyName	getMaterialPropertyName (const std::string &name) const

void	checkExecutionStage ()

const T &	getReporterValue (const std::string &param_name, const std::size_t time_index=0)

const T &	getReporterValue (const std::string &param_name, ReporterMode mode, const std::size_t time_index=0)

const T &	getReporterValue (const std::string &param_name, const std::size_t time_index=0)

const T &	getReporterValue (const std::string &param_name, ReporterMode mode, const std::size_t time_index=0)

const T &	getReporterValueByName (const ReporterName &reporter_name, const std::size_t time_index=0)

const T &	getReporterValueByName (const ReporterName &reporter_name, ReporterMode mode, const std::size_t time_index=0)

const T &	getReporterValueByName (const ReporterName &reporter_name, const std::size_t time_index=0)

const T &	getReporterValueByName (const ReporterName &reporter_name, ReporterMode mode, const std::size_t time_index=0)

bool	hasReporterValue (const std::string &param_name) const

bool	hasReporterValue (const std::string &param_name) const

bool	hasReporterValue (const std::string &param_name) const

bool	hasReporterValue (const std::string &param_name) const

bool	hasReporterValueByName (const ReporterName &reporter_name) const

bool	hasReporterValueByName (const ReporterName &reporter_name) const

bool	hasReporterValueByName (const ReporterName &reporter_name) const

bool	hasReporterValueByName (const ReporterName &reporter_name) const

const GenericMaterialProperty< T, is_ad > *	defaultGenericMaterialProperty (const std::string &name)

const GenericMaterialProperty< T, is_ad > *	defaultGenericMaterialProperty (const std::string &name)

const MaterialProperty< T > *	defaultMaterialProperty (const std::string &name)

const MaterialProperty< T > *	defaultMaterialProperty (const std::string &name)

const ADMaterialProperty< T > *	defaultADMaterialProperty (const std::string &name)

const ADMaterialProperty< T > *	defaultADMaterialProperty (const std::string &name)

Static Protected Member Functions
static std::string	meshPropertyName (const std::string &data_name, const std::string &prefix)

Protected Attributes
const std::vector< ReporterName >	_response_names
	Response reporter names. More...

std::vector< const std::vector< Real > * >	_response_value_pointers
	Pointers to the current values of the responses. More...

const std::vector< Real >	_response_shift_factors
	Shifting constants for the responses. More...

const std::vector< Real >	_response_scaling_factors
	Scaling constants for the responses. More...

const std::vector< ReporterName >	_control_names
	Control reporter names. More...

std::vector< const std::vector< Real > * >	_control_value_pointers
	Pointers to the current values of the control signals. More...

const std::vector< ReporterName >	_log_probability_names
	Log probability reporter names. More...

std::vector< const std::vector< Real > * >	_log_probability_value_pointers
	Pointers to the current values of the control log probabilities. More...

const ReporterName	_reward_name
	Reward reporter name. More...

const std::vector< Real > *	_reward_value_pointer
	Pointer to the current values of the reward. More...

const unsigned int	_input_timesteps
	Number of timesteps to fetch from the reporters to be the input of then eural nets. More...

unsigned int	_num_inputs
	Number of inputs for the control and critic neural nets. More...

unsigned int	_num_outputs
	Number of outputs for the control neural network. More...

const unsigned int	_num_epochs
	Number of epochs for the training of the emulator. More...

const std::vector< unsigned int >	_num_critic_neurons_per_layer
	Number of neurons within the hidden layers in the critic neural net. More...

const Real	_critic_learning_rate
	The learning rate for the optimization algorithm for the critic. More...

const std::vector< unsigned int >	_num_control_neurons_per_layer
	Number of neurons within the hidden layers in the control neural net. More...

const Real	_control_learning_rate
	The learning rate for the optimization algorithm for the control. More...

const unsigned int	_update_frequency
	Number of transients to run and collect data from before updating the controller neural net. More...

const Real	_clip_param
	The clip parameter used while clamping the advantage value. More...

const Real	_decay_factor
	Decaying factor that is used when calculating the return from the reward. More...

const std::vector< Real >	_action_std
	Standard deviation for the actions. More...

const std::string	_filename_base
	Name of the pytorch output file. More...

const bool	_read_from_file
	Switch indicating if an already existing neural net should be read from a file or not. More...

const bool	_shift_outputs
	Currently, the controls are executed after the user objects at initial in moose. More...

Real	_average_episode_reward
	Storage for the current average episode reward. More...

const bool	_standardize_advantage
	Switch to enable the standardization of the advantages. More...

const unsigned int	_loss_print_frequency
	The frequency the loss should be printed. More...

std::shared_ptr< Moose::LibtorchArtificialNeuralNet >	_control_nn
	Pointer to the control (or actor) neural net object. More...

std::shared_ptr< Moose::LibtorchArtificialNeuralNet >	_critic_nn
	Pointer to the critic neural net object. More...

torch::Tensor	_std
	standard deviation in a tensor format for sampling the actual control value More...

torch::Tensor	_input_tensor
	Torch::tensor version of the input and action data. More...

torch::Tensor	_output_tensor

torch::Tensor	_return_tensor

torch::Tensor	_log_probability_tensor

SubProblem &	_subproblem

FEProblemBase &	_fe_problem

SystemBase &	_sys

const THREAD_ID	_tid

Assembly &	_assembly

const Moose::CoordinateSystemType &	_coord_sys

const bool	_duplicate_initial_execution

std::set< std::string >	_depend_uo

const bool &	_enabled

MooseApp &	_app

const std::string	_type

const std::string	_name

const InputParameters &	_pars

Factory &	_factory

ActionFactory &	_action_factory

const ExecFlagEnum &	_execute_enum

const ExecFlagType &	_current_execute_flag

MooseApp &	_restartable_app

const std::string	_restartable_system_name

const THREAD_ID	_restartable_tid

const bool	_restartable_read_only

FEProblemBase &	_mci_feproblem

FEProblemBase &	_mdi_feproblem

MooseApp &	_pg_moose_app

const std::string	_prefix

FEProblemBase &	_sc_fe_problem

const THREAD_ID	_sc_tid

const Real &	_real_zero

const VariableValue &	_scalar_zero

const Point &	_point_zero

const InputParameters &	_mi_params

const std::string	_mi_name

const MooseObjectName	_mi_moose_object_name

FEProblemBase &	_mi_feproblem

SubProblem &	_mi_subproblem

const THREAD_ID	_mi_tid

const Moose::MaterialDataType	_material_data_type

MaterialData &	_material_data

bool	_stateful_allowed

bool	_get_material_property_called

std::vector< std::unique_ptr< PropertyValue > >	_default_properties

std::unordered_set< unsigned int >	_material_property_dependencies

const MaterialPropertyName	_get_suffix

const bool	_use_interpolated_state

const InputParameters &	_ti_params

FEProblemBase &	_ti_feproblem

bool	_is_implicit

Real &	_t

const Real &	_t_old

int &	_t_step

Real &	_dt

Real &	_dt_old

bool	_is_transient

const Parallel::Communicator &	_communicator


std::vector< std::vector< Real > >	_input_data

std::vector< std::vector< Real > >	_output_data

std::vector< std::vector< Real > >	_log_probability_data


std::vector< Real >	_reward_data

std::vector< Real >	_return_data

Static Protected Attributes
static const std::string	_interpolated_old

static const std::string	_interpolated_older

Private Member Functions
void	getInputDataFromReporter (std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links, const unsigned int num_timesteps)
	Extract the response values from the postprocessors of the controlled system. More...

void	getOutputDataFromReporter (std::vector< std::vector< Real >> &data, const std::vector< const std::vector< Real > *> &reporter_links)
	Extract the output (actions, logarithmic probabilities) values from the postprocessors of the controlled system. More...

void	getRewardDataFromReporter (std::vector< Real > &data, const std::vector< Real > *const reporter_link)
	Extract the reward values from the postprocessors of the controlled system This assumes that they are stored in an AccumulateReporter. More...

void	getReporterPointers (const std::vector< ReporterName > &reporter_names, std::vector< const std::vector< Real > *> &pointer_storage)
	Getting reporter pointers with given names. More...

Private Attributes
unsigned int	_update_counter
	Counter for number of transient simulations that have been run before updating the controller. More...

Detailed Description

This trainer is responsible for training neural networks that efficiently control different processes.

It utilizes the Proximal Policy Optimization algorithms. For more information on the algorithm, see the following resources: Schulman, John, et al. "Proximal policy optimization algorithms." arXiv preprint arXiv:1707.06347 (2017). https://medium.com/analytics-vidhya/coding-ppo-from-scratch-with-pytorch-part-1-4-613dfc1b14c8 https://stable-baselines.readthedocs.io/en/master/modules/ppo2.html

Definition at line 28 of file LibtorchDRLControlTrainer.h.

Constructor & Destructor Documentation

◆ LibtorchDRLControlTrainer()

LibtorchDRLControlTrainer::LibtorchDRLControlTrainer ( const InputParameters & parameters )

construct using input parameters

Definition at line 128 of file LibtorchDRLControlTrainer.C.

   : SurrogateTrainerBase(parameters),
     _response_names(getParam<std::vector<ReporterName>>("response")),
     _response_shift_factors(isParamValid("response_shift_factors")
                                 ? getParam<std::vector<Real>>("response_shift_factors")
                                 : std::vector<Real>(_response_names.size(), 0.0)),
     _response_scaling_factors(isParamValid("response_scaling_factors")
                                   ? getParam<std::vector<Real>>("response_scaling_factors")
                                   : std::vector<Real>(_response_names.size(), 1.0)),
     _control_names(getParam<std::vector<ReporterName>>("control")),
     _log_probability_names(getParam<std::vector<ReporterName>>("log_probability")),
     _reward_name(getParam<ReporterName>("reward")),
     _reward_value_pointer(&getReporterValueByName<std::vector<Real>>(_reward_name)),
     _input_timesteps(getParam<unsigned int>("input_timesteps")),
     _num_inputs(_input_timesteps * _response_names.size()),
     _num_outputs(_control_names.size()),
     _input_data(std::vector<std::vector<Real>>(_num_inputs)),
     _output_data(std::vector<std::vector<Real>>(_num_outputs)),
     _log_probability_data(std::vector<std::vector<Real>>(_num_outputs)),
     _num_epochs(getParam<unsigned int>("num_epochs")),
     _num_critic_neurons_per_layer(
         getParam<std::vector<unsigned int>>("num_critic_neurons_per_layer")),
     _critic_learning_rate(getParam<Real>("critic_learning_rate")),
     _num_control_neurons_per_layer(
         getParam<std::vector<unsigned int>>("num_control_neurons_per_layer")),
     _control_learning_rate(getParam<Real>("control_learning_rate")),
     _update_frequency(getParam<unsigned int>("update_frequency")),
     _clip_param(getParam<Real>("clip_parameter")),
     _decay_factor(getParam<Real>("decay_factor")),
     _action_std(getParam<std::vector<Real>>("action_standard_deviations")),
     _filename_base(isParamValid("filename_base") ? getParam<std::string>("filename_base") : ""),
     _read_from_file(getParam<bool>("read_from_file")),
     _shift_outputs(getParam<bool>("shift_outputs")),
     _standardize_advantage(getParam<bool>("standardize_advantage")),
     _loss_print_frequency(getParam<unsigned int>("loss_print_frequency")),
     _update_counter(_update_frequency)
 {
   if (_response_names.size() != _response_shift_factors.size())
     paramError("response_shift_factors",
                "The number of shift factors is not the same as the number of responses!");
 
   if (_response_names.size() != _response_scaling_factors.size())
     paramError(
         "response_scaling_factors",
         "The number of normalization coefficients is not the same as the number of responses!");
 
   // We establish the links with the chosen reporters
   getReporterPointers(_response_names, _response_value_pointers);
   getReporterPointers(_control_names, _control_value_pointers);
   getReporterPointers(_log_probability_names, _log_probability_value_pointers);
 
   // Fixing the RNG seed to make sure every experiment is the same.
   // Otherwise sampling / stochastic gradient descent would be different.
   torch::manual_seed(getParam<unsigned int>("seed"));
 
   // Convert the user input standard deviations to a diagonal tensor
   _std = torch::eye(_control_names.size());
   for (unsigned int i = 0; i < _control_names.size(); ++i)
     _std[i][i] = _action_std[i];
 
   bool filename_valid = isParamValid("filename_base");
 
   // Initializing the control neural net so that the control can grab it right away
   _control_nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(
       filename_valid ? _filename_base + "_control.net" : "control.net",
       _num_inputs,
       _num_outputs,
       _num_control_neurons_per_layer,
       getParam<std::vector<std::string>>("control_activation_functions"));
 
   // We read parameters for the control neural net if it is requested
   if (_read_from_file)
   {
     try
     {
       torch::load(_control_nn, _control_nn->name());
       _console << "Loaded requested .pt file." << std::endl;
     }
     catch (const c10::Error & e)
     {
       mooseError("The requested pytorch file could not be loaded for the control neural net.\n",
                  e.msg());
     }
   }
   else if (filename_valid)
     torch::save(_control_nn, _control_nn->name());
 
   // Initialize the critic neural net
   _critic_nn = std::make_shared<Moose::LibtorchArtificialNeuralNet>(
       filename_valid ? _filename_base + "_ctiric.net" : "ctiric.net",
       _num_inputs,
       1,
       _num_critic_neurons_per_layer,
       getParam<std::vector<std::string>>("critic_activation_functions"));
 
   // We read parameters for the critic neural net if it is requested
   if (_read_from_file)
   {
     try
     {
       torch::load(_critic_nn, _critic_nn->name());
       _console << "Loaded requested .pt file." << std::endl;
     }
     catch (const c10::Error & e)
     {
       mooseError("The requested pytorch file could not be loaded for the critic neural net.\n",
                  e.msg());
     }
   }
   else if (filename_valid)
     torch::save(_critic_nn, _critic_nn->name());
 }

Member Function Documentation

◆ averageEpisodeReward()

Real LibtorchDRLControlTrainer::averageEpisodeReward ( )

inline

Function which returns the current average episodic reward.

It is only updated at the end of every episode.

Definition at line 42 of file LibtorchDRLControlTrainer.h.

Referenced by DRLRewardReporter::execute().

42 { return _average_episode_reward; }

LibtorchDRLControlTrainer::_average_episode_reward

Real _average_episode_reward

Storage for the current average episode reward.

Definition: LibtorchDRLControlTrainer.h:178

◆ computeAverageEpisodeReward()

void LibtorchDRLControlTrainer::computeAverageEpisodeReward ( )

protected

Compute the average eposiodic reward.

Definition at line 278 of file LibtorchDRLControlTrainer.C.

Referenced by execute().

 {
   if (_reward_data.size())
     _average_episode_reward =
         std::accumulate(_reward_data.begin(), _reward_data.end(), 0.0) / _reward_data.size();
   else
     _average_episode_reward = 0.0;
 }

◆ computeRewardToGo()

void LibtorchDRLControlTrainer::computeRewardToGo ( )

protected

Compute the return value by discounting the rewards and summing them.

Definition at line 288 of file LibtorchDRLControlTrainer.C.

Referenced by execute().

 {
   // Get reward data from one simulation
   std::vector<Real> reward_data_per_sim;
   std::vector<Real> return_data_per_sim;
   getRewardDataFromReporter(reward_data_per_sim, _reward_value_pointer);
 
   // Discount the reward to get the return value, we need this to be able to anticipate
   // rewards based on the current behavior.
   Real discounted_reward(0.0);
   for (int i = reward_data_per_sim.size() - 1; i >= 0; --i)
   {
     discounted_reward = reward_data_per_sim[i] + discounted_reward * _decay_factor;
 
     // We are inserting to the front of the vector and push the rest back, this will
     // ensure that the first element of the vector is the discounter reward for the whole transient
     return_data_per_sim.insert(return_data_per_sim.begin(), discounted_reward);
   }
 
   // Save and accumulate the return values
   _return_data.insert(_return_data.end(), return_data_per_sim.begin(), return_data_per_sim.end());
 }

◆ controlNeuralNet()

const Moose::LibtorchArtificialNeuralNet& LibtorchDRLControlTrainer::controlNeuralNet ( ) const

inline

Definition at line 47 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchNeuralNetControlTransfer::execute().

47 { return *_control_nn; }

LibtorchDRLControlTrainer::_control_nn

std::shared_ptr< Moose::LibtorchArtificialNeuralNet > _control_nn

Pointer to the control (or actor) neural net object.

Definition: LibtorchDRLControlTrainer.h:187

◆ convertDataToTensor()

void LibtorchDRLControlTrainer::convertDataToTensor	(	std::vector< std::vector< Real >> &	vector_data,
		torch::Tensor &	tensor_data,
		const bool	detach = `false`
	)

protected

Function to convert input/output data from std::vector<std::vector> to torch::tensor.

Parameters

vector_data	The input data in vector-vectors format
tensor_data	The tensor where we would like to save the results
detach	If the gradient info needs to be detached from the tensor

Definition at line 373 of file LibtorchDRLControlTrainer.C.

Referenced by execute().

 {
   for (unsigned int i = 0; i < vector_data.size(); ++i)
   {
     torch::Tensor input_row;
     LibtorchUtils::vectorToTensor(vector_data[i], input_row, detach);
 
     if (i == 0)
       tensor_data = input_row;
     else
       tensor_data = torch::cat({tensor_data, input_row}, 1);
   }
 
   if (detach)
     tensor_data.detach();
 }

◆ declareModelData()

template<typename T , typename... Args>

T & RestartableModelInterface::declareModelData	(	const std::string &	data_name,
		Args &&...	args
	)

inherited

Declare model data for loading from file as well as restart.

Definition at line 78 of file RestartableModelInterface.h.

 {
   return _model_restartable.declareRestartableData<T>(data_name, std::forward<Args>(args)...);
 }

◆ evaluateAction()

torch::Tensor LibtorchDRLControlTrainer::evaluateAction	(	torch::Tensor &	input,
		torch::Tensor &	output
	)

protected

Function which evaluates the control net and then computes the logarithmic probability of the action.

Parameters

input	The observation values (responses)
output	The actions corresponding to the observations

Returns: The estimated value for the logarithmic probability

Definition at line 399 of file LibtorchDRLControlTrainer.C.

Referenced by trainController().

 {
   torch::Tensor var = torch::matmul(_std, _std);
 
   // Compute an action and get it's logarithmic proability based on an assumed Gaussian distribution
   torch::Tensor action = _control_nn->forward(input);
   return -((action - output) * (action - output)) / (2 * var) - torch::log(_std) -
          std::log(std::sqrt(2 * M_PI));
 }

◆ evaluateValue()

torch::Tensor LibtorchDRLControlTrainer::evaluateValue ( torch::Tensor & input )

protected

Function which evaluates the critic to get the value (discounter reward)

Parameters

input The observation values (responses)

Returns: The estimated value

Definition at line 393 of file LibtorchDRLControlTrainer.C.

Referenced by trainController().

 {
   return _critic_nn->forward(input);
 }

◆ execute()

void LibtorchDRLControlTrainer::execute ( )

overridevirtual

Implements GeneralUserObject.

Definition at line 242 of file LibtorchDRLControlTrainer.C.

 {
   // Extract data from the reporters
   getInputDataFromReporter(_input_data, _response_value_pointers, _input_timesteps);
   getOutputDataFromReporter(_output_data, _control_value_pointers);
   getOutputDataFromReporter(_log_probability_data, _log_probability_value_pointers);
   getRewardDataFromReporter(_reward_data, _reward_value_pointer);
 
   // Calculate return from the reward (discounting the reward)
   computeRewardToGo();
 
   _update_counter--;
 
   // Only update the NNs when
   if (_update_counter == 0)
   {
     // We compute the average reward first
     computeAverageEpisodeReward();
 
     // Transform input/output/return data to torch::Tensor
     convertDataToTensor(_input_data, _input_tensor);
     convertDataToTensor(_output_data, _output_tensor);
     convertDataToTensor(_log_probability_data, _log_probability_tensor);
 
     // Discard (detach) the gradient info for return data
     LibtorchUtils::vectorToTensor<Real>(_return_data, _return_tensor, true);
 
     // We train the controller using the emulator to get a good control strategy
     trainController();
 
     // We clean the training data after contoller update and reset the counter
     resetData();
   }
 }

◆ finalize()

virtual void SurrogateTrainerBase::finalize ( )

inlinevirtualinherited

Implements GeneralUserObject.

Reimplemented in SurrogateTrainer, and PODReducedBasisTrainer.

Definition at line 39 of file SurrogateTrainer.h.

39 {} // not required, but available

◆ getInputDataFromReporter()

void LibtorchDRLControlTrainer::getInputDataFromReporter	(	std::vector< std::vector< Real >> &	data,
		const std::vector< const std::vector< Real > *> &	reporter_links,
		const unsigned int	num_timesteps
	)

private

Extract the response values from the postprocessors of the controlled system.

This assumes that they are stored in an AccumulateReporter

Parameters

data	The data where we would like to store the response values
reporter_names	The names of the reporters which need to be extracted
num_timesteps	The number of timesteps we want to use for training

Definition at line 426 of file LibtorchDRLControlTrainer.C.

Referenced by execute().

 {
   for (const auto & rep_i : index_range(reporter_links))
   {
     std::vector<Real> reporter_data = *reporter_links[rep_i];
 
     // We shift and scale the inputs to get better training efficiency
     std::transform(
         reporter_data.begin(),
         reporter_data.end(),
         reporter_data.begin(),
         [this, &rep_i](Real value) -> Real
         { return (value - _response_shift_factors[rep_i]) * _response_scaling_factors[rep_i]; });
 
     // Fill the corresponding containers
     for (const auto & start_step : make_range(num_timesteps))
     {
       unsigned int row = reporter_links.size() * start_step + rep_i;
       for (unsigned int fill_i = 1; fill_i < num_timesteps - start_step; ++fill_i)
         data[row].push_back(reporter_data[0]);
 
       data[row].insert(data[row].end(),
                        reporter_data.begin(),
                        reporter_data.begin() + start_step + reporter_data.size() -
                            (num_timesteps - 1) - _shift_outputs);
     }
   }
 }

◆ getModelData()

template<typename T , typename... Args>

const T & RestartableModelInterface::getModelData	(	const std::string &	data_name,
		Args &&...	args
	)		const

inherited

Retrieve model data from the interface.

Definition at line 85 of file RestartableModelInterface.h.

 {
   return _model_restartable.getRestartableData<T>(data_name, std::forward<Args>(args)...);
 }

◆ getModelDataFileName()

const FileName & RestartableModelInterface::getModelDataFileName ( ) const

inherited

Get the associated filename.

Definition at line 33 of file RestartableModelInterface.C.

 {
   return _model_object.getParam<FileName>("filename");
 }

◆ getOutputDataFromReporter()

void LibtorchDRLControlTrainer::getOutputDataFromReporter	(	std::vector< std::vector< Real >> &	data,
		const std::vector< const std::vector< Real > *> &	reporter_links
	)

private

Extract the output (actions, logarithmic probabilities) values from the postprocessors of the controlled system.

This assumes that they are stored in an AccumulateReporter

Parameters

data	The data where we would like to store the output values
reporter_names	The names of the reporters which need to be extracted

Definition at line 459 of file LibtorchDRLControlTrainer.C.

Referenced by execute().

 {
   for (const auto & rep_i : index_range(reporter_links))
     // Fill the corresponding containers
     data[rep_i].insert(data[rep_i].end(),
                        reporter_links[rep_i]->begin() + _shift_outputs,
                        reporter_links[rep_i]->end());
 }

◆ getReporterPointers()

void LibtorchDRLControlTrainer::getReporterPointers	(	const std::vector< ReporterName > &	reporter_names,
		std::vector< const std::vector< Real > *> &	pointer_storage
	)

private

Getting reporter pointers with given names.

Definition at line 479 of file LibtorchDRLControlTrainer.C.

Referenced by LibtorchDRLControlTrainer().

 {
   pointer_storage.clear();
   for (const auto & name : reporter_names)
     pointer_storage.push_back(&getReporterValueByName<std::vector<Real>>(name));
 }

◆ getRewardDataFromReporter()

void LibtorchDRLControlTrainer::getRewardDataFromReporter	(	std::vector< Real > &	data,
		const std::vector< Real > *const	reporter_link
	)

private

Extract the reward values from the postprocessors of the controlled system This assumes that they are stored in an AccumulateReporter.

Parameters

data	The data where we would like to store the reward values
reporter_names	The name of the reporter which need to be extracted

Definition at line 471 of file LibtorchDRLControlTrainer.C.

Referenced by computeRewardToGo(), and execute().

 {
   // Fill the corresponding container
   data.insert(data.end(), reporter_link->begin() + _shift_outputs, reporter_link->end());
 }

◆ hasModelData()

bool RestartableModelInterface::hasModelData ( ) const

inherited

Check if we need to load model data (if the filename parameter is used)

Definition at line 39 of file RestartableModelInterface.C.

 {
   return _model_object.isParamValid("filename");
 }

◆ initialize()

virtual void SurrogateTrainerBase::initialize ( )

inlinevirtualinherited

Implements GeneralUserObject.

Reimplemented in SurrogateTrainer, ActiveLearningGaussianProcess, and PODReducedBasisTrainer.

Definition at line 38 of file SurrogateTrainer.h.

38 {} // not required, but available

◆ modelMetaDataName()

const std::string& RestartableModelInterface::modelMetaDataName ( ) const

inlineinherited

Accessor for the name of the model meta data.

Definition at line 47 of file RestartableModelInterface.h.

Referenced by SurrogateTrainerOutput::output(), and MappingOutput::output().

47 { return _model_meta_data_name; }

RestartableModelInterface::_model_meta_data_name

const std::string _model_meta_data_name

The model meta data name.

Definition: RestartableModelInterface.h:61

◆ resetData()

void LibtorchDRLControlTrainer::resetData ( )

protected

Reset data after updating the neural network.

Definition at line 410 of file LibtorchDRLControlTrainer.C.

Referenced by execute().

 {
   for (auto & data : _input_data)
     data.clear();
   for (auto & data : _output_data)
     data.clear();
   for (auto & data : _log_probability_data)
     data.clear();
 
   _reward_data.clear();
   _return_data.clear();
 
   _update_counter = _update_frequency;
 }

◆ threadJoin()

virtual void SurrogateTrainerBase::threadJoin ( const UserObject & )

inlinefinalvirtualinherited

Reimplemented from GeneralUserObject.

Definition at line 40 of file SurrogateTrainer.h.

40 {} // GeneralUserObjects are not threaded

◆ trainController()

void LibtorchDRLControlTrainer::trainController ( )

The condensed training function.

Definition at line 312 of file LibtorchDRLControlTrainer.C.

Referenced by execute().

 {
   // Define the optimizers for the training
   torch::optim::Adam actor_optimizer(_control_nn->parameters(),
                                      torch::optim::AdamOptions(_control_learning_rate));
 
   torch::optim::Adam critic_optimizer(_critic_nn->parameters(),
                                       torch::optim::AdamOptions(_critic_learning_rate));
 
   // Compute the approximate value (return) from the critic neural net and use it to compute an
   // advantage
   auto value = evaluateValue(_input_tensor).detach();
   auto advantage = _return_tensor - value;
 
   // If requested, standardize the advantage
   if (_standardize_advantage)
     advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-10);
 
   for (unsigned int epoch = 0; epoch < _num_epochs; ++epoch)
   {
     // Get the approximate return from the neural net again (this one does have an associated
     // gradient)
     value = evaluateValue(_input_tensor);
     // Get the approximate logarithmic action probability using the control neural net
     auto curr_log_probability = evaluateAction(_input_tensor, _output_tensor);
 
     // Prepare the ratio by using the e^(logx-logy)=x/y expression
     auto ratio = (curr_log_probability - _log_probability_tensor).exp();
 
     // Use clamping for limiting
     auto surr1 = ratio * advantage;
     auto surr2 = torch::clamp(ratio, 1.0 - _clip_param, 1.0 + _clip_param) * advantage;
 
     // Compute loss values for the critic and the control neural net
     auto actor_loss = -torch::min(surr1, surr2).mean();
     auto critic_loss = torch::mse_loss(value, _return_tensor);
 
     // Update the weights in the neural nets
     actor_optimizer.zero_grad();
     actor_loss.backward();
     actor_optimizer.step();
 
     critic_optimizer.zero_grad();
     critic_loss.backward();
     critic_optimizer.step();
 
     // print loss per epoch
     if (_loss_print_frequency)
       if (epoch % _loss_print_frequency == 0)
         _console << "Epoch: " << epoch << " | Actor Loss: " << COLOR_GREEN
                  << actor_loss.item<double>() << COLOR_DEFAULT << " | Critic Loss: " << COLOR_GREEN
                  << critic_loss.item<double>() << COLOR_DEFAULT << std::endl;
   }
 
   // Save the controller neural net so our controller can read it, we also save the critic if we
   // want to continue training
   torch::save(_control_nn, _control_nn->name());
   torch::save(_critic_nn, _critic_nn->name());
 }

◆ validParams()

InputParameters LibtorchDRLControlTrainer::validParams ( )

static

Definition at line 22 of file LibtorchDRLControlTrainer.C.

 {
   InputParameters params = SurrogateTrainerBase::validParams();
 
   params.addClassDescription(
       "Trains a neural network controller using the Proximal Policy Optimization (PPO) algorithm.");
 
   params.addRequiredParam<std::vector<ReporterName>>(
       "response", "Reporter values containing the response values from the model.");
   params.addParam<std::vector<Real>>(
       "response_shift_factors",
       "A shift constant which will be used to shift the response values. This is used for the "
       "manipulation of the neural net inputs for better training efficiency.");
   params.addParam<std::vector<Real>>(
       "response_scaling_factors",
       "A normalization constant which will be used to divide the response values. This is used for "
       "the manipulation of the neural net inputs for better training efficiency.");
   params.addRequiredParam<std::vector<ReporterName>>(
       "control",
       "Reporters containing the values of the controlled quantities (control signals) from the "
       "model simulations.");
   params.addRequiredParam<std::vector<ReporterName>>(
       "log_probability",
       "Reporters containing the log probabilities of the actions taken during the simulations.");
   params.addRequiredParam<ReporterName>(
       "reward", "Reporter containing the earned time-dependent rewards from the simulation.");
   params.addRangeCheckedParam<unsigned int>(
       "input_timesteps",
       1,
       "1<=input_timesteps",
       "Number of time steps to use in the input data, if larger than 1, "
       "data from the previous timesteps will be used as inputs in the training.");
   params.addParam<unsigned int>("skip_num_rows",
                                 1,
                                 "Number of rows to ignore from training. We usually skip the 1st "
                                 "row from the reporter since it contains only initial values.");
 
   params.addRequiredParam<unsigned int>("num_epochs", "Number of epochs for the training.");
 
   params.addRequiredRangeCheckedParam<Real>(
       "critic_learning_rate",
       "0<critic_learning_rate",
       "Learning rate (relaxation) for the emulator training.");
   params.addRequiredParam<std::vector<unsigned int>>(
       "num_critic_neurons_per_layer", "Number of neurons per layer in the emulator neural net.");
   params.addParam<std::vector<std::string>>(
       "critic_activation_functions",
       std::vector<std::string>({"relu"}),
       "The type of activation functions to use in the emulator neural net. It is either one value "
       "or one value per hidden layer.");
 
   params.addRequiredRangeCheckedParam<Real>(
       "control_learning_rate",
       "0<control_learning_rate",
       "Learning rate (relaxation) for the control neural net training.");
   params.addRequiredParam<std::vector<unsigned int>>(
       "num_control_neurons_per_layer",
       "Number of neurons per layer for the control neural network.");
   params.addParam<std::vector<std::string>>(
       "control_activation_functions",
       std::vector<std::string>({"relu"}),
       "The type of activation functions to use in the control neural net. It "
       "is either one value "
       "or one value per hidden layer.");
 
   params.addParam<std::string>("filename_base",
                                "Filename used to output the neural net parameters.");
 
   params.addParam<unsigned int>(
       "seed", 11, "Random number generator seed for stochastic optimizers.");
 
   params.addRequiredParam<std::vector<Real>>(
       "action_standard_deviations", "Standard deviation value used while sampling the actions.");
 
   params.addParam<Real>(
       "clip_parameter", 0.2, "Clip parameter used while clamping the advantage value.");
   params.addRangeCheckedParam<unsigned int>(
       "update_frequency",
       1,
       "1<=update_frequency",
       "Number of transient simulation data to collect for updating the controller neural network.");
 
   params.addRangeCheckedParam<Real>(
       "decay_factor",
       1.0,
       "0.0<=decay_factor<=1.0",
       "Decay factor for calculating the return. This accounts for decreased "
       "reward values from the later steps.");
 
   params.addParam<bool>(
       "read_from_file", false, "Switch to read the neural network parameters from a file.");
   params.addParam<bool>(
       "shift_outputs",
       true,
       "If we would like to shift the outputs the realign the input-output pairs.");
   params.addParam<bool>(
       "standardize_advantage",
       true,
       "Switch to enable the shifting and normalization of the advantages in the PPO algorithm.");
   params.addParam<unsigned int>("loss_print_frequency",
                                 0,
                                 "The frequency which is used to print the loss values. If 0, the "
                                 "loss values are not printed.");
   return params;
 }

Member Data Documentation

◆ _action_std

const std::vector<Real> LibtorchDRLControlTrainer::_action_std

protected

Standard deviation for the actions.

Definition at line 161 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _average_episode_reward

Real LibtorchDRLControlTrainer::_average_episode_reward

protected

Storage for the current average episode reward.

Definition at line 178 of file LibtorchDRLControlTrainer.h.

Referenced by averageEpisodeReward(), and computeAverageEpisodeReward().

◆ _clip_param

const Real LibtorchDRLControlTrainer::_clip_param

protected

The clip parameter used while clamping the advantage value.

Definition at line 155 of file LibtorchDRLControlTrainer.h.

Referenced by trainController().

◆ _control_learning_rate

const Real LibtorchDRLControlTrainer::_control_learning_rate

protected

The learning rate for the optimization algorithm for the control.

Definition at line 149 of file LibtorchDRLControlTrainer.h.

Referenced by trainController().

◆ _control_names

const std::vector<ReporterName> LibtorchDRLControlTrainer::_control_names

protected

Control reporter names.

Definition at line 98 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _control_nn

std::shared_ptr<Moose::LibtorchArtificialNeuralNet> LibtorchDRLControlTrainer::_control_nn

protected

Pointer to the control (or actor) neural net object.

Definition at line 187 of file LibtorchDRLControlTrainer.h.

Referenced by controlNeuralNet(), evaluateAction(), LibtorchDRLControlTrainer(), and trainController().

◆ _control_value_pointers

std::vector<const std::vector<Real> *> LibtorchDRLControlTrainer::_control_value_pointers

protected

Pointers to the current values of the control signals.

Definition at line 101 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and LibtorchDRLControlTrainer().

◆ _critic_learning_rate

const Real LibtorchDRLControlTrainer::_critic_learning_rate

protected

The learning rate for the optimization algorithm for the critic.

Definition at line 143 of file LibtorchDRLControlTrainer.h.

Referenced by trainController().

◆ _critic_nn

std::shared_ptr<Moose::LibtorchArtificialNeuralNet> LibtorchDRLControlTrainer::_critic_nn

protected

Pointer to the critic neural net object.

Definition at line 189 of file LibtorchDRLControlTrainer.h.

Referenced by evaluateValue(), LibtorchDRLControlTrainer(), and trainController().

◆ _decay_factor

const Real LibtorchDRLControlTrainer::_decay_factor

protected

Decaying factor that is used when calculating the return from the reward.

Definition at line 158 of file LibtorchDRLControlTrainer.h.

Referenced by computeRewardToGo().

◆ _filename_base

const std::string LibtorchDRLControlTrainer::_filename_base

protected

Name of the pytorch output file.

This is used for loading and storing already existing data

Definition at line 165 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _input_data

std::vector<std::vector<Real> > LibtorchDRLControlTrainer::_input_data

protected

The gathered data from the reporters, each row represents one QoI, each column represents one time step

Definition at line 125 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and resetData().

◆ _input_tensor

torch::Tensor LibtorchDRLControlTrainer::_input_tensor

protected

Torch::tensor version of the input and action data.

Definition at line 195 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and trainController().

◆ _input_timesteps

const unsigned int LibtorchDRLControlTrainer::_input_timesteps

protected

Number of timesteps to fetch from the reporters to be the input of then eural nets.

Definition at line 116 of file LibtorchDRLControlTrainer.h.

Referenced by execute().

◆ _log_probability_data

std::vector<std::vector<Real> > LibtorchDRLControlTrainer::_log_probability_data

protected

Definition at line 127 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and resetData().

◆ _log_probability_names

const std::vector<ReporterName> LibtorchDRLControlTrainer::_log_probability_names

protected

Log probability reporter names.

Definition at line 104 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _log_probability_tensor

torch::Tensor LibtorchDRLControlTrainer::_log_probability_tensor

protected

Definition at line 198 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and trainController().

◆ _log_probability_value_pointers

std::vector<const std::vector<Real> *> LibtorchDRLControlTrainer::_log_probability_value_pointers

protected

Pointers to the current values of the control log probabilities.

Definition at line 107 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and LibtorchDRLControlTrainer().

◆ _loss_print_frequency

const unsigned int LibtorchDRLControlTrainer::_loss_print_frequency

protected

The frequency the loss should be printed.

Definition at line 184 of file LibtorchDRLControlTrainer.h.

Referenced by trainController().

◆ _num_control_neurons_per_layer

const std::vector<unsigned int> LibtorchDRLControlTrainer::_num_control_neurons_per_layer

protected

Number of neurons within the hidden layers in the control neural net.

Definition at line 146 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _num_critic_neurons_per_layer

const std::vector<unsigned int> LibtorchDRLControlTrainer::_num_critic_neurons_per_layer

protected

Number of neurons within the hidden layers in the critic neural net.

Definition at line 140 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _num_epochs

const unsigned int LibtorchDRLControlTrainer::_num_epochs

protected

Number of epochs for the training of the emulator.

Definition at line 137 of file LibtorchDRLControlTrainer.h.

Referenced by trainController().

◆ _num_inputs

unsigned int LibtorchDRLControlTrainer::_num_inputs

protected

Number of inputs for the control and critic neural nets.

Definition at line 119 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _num_outputs

unsigned int LibtorchDRLControlTrainer::_num_outputs

protected

Number of outputs for the control neural network.

Definition at line 121 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _output_data

std::vector<std::vector<Real> > LibtorchDRLControlTrainer::_output_data

protected

Definition at line 126 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and resetData().

◆ _output_tensor

torch::Tensor LibtorchDRLControlTrainer::_output_tensor

protected

Definition at line 196 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and trainController().

◆ _read_from_file

const bool LibtorchDRLControlTrainer::_read_from_file

protected

Switch indicating if an already existing neural net should be read from a file or not.

This can be used to load existing torch files (from previous MOOSE runs for retraining and further manipulation)

Definition at line 170 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _response_names

const std::vector<ReporterName> LibtorchDRLControlTrainer::_response_names

protected

Response reporter names.

Definition at line 86 of file LibtorchDRLControlTrainer.h.

Referenced by LibtorchDRLControlTrainer().

◆ _response_scaling_factors

const std::vector<Real> LibtorchDRLControlTrainer::_response_scaling_factors

protected

Scaling constants for the responses.

Definition at line 95 of file LibtorchDRLControlTrainer.h.

Referenced by getInputDataFromReporter(), and LibtorchDRLControlTrainer().

◆ _response_shift_factors

const std::vector<Real> LibtorchDRLControlTrainer::_response_shift_factors

protected

Shifting constants for the responses.

Definition at line 92 of file LibtorchDRLControlTrainer.h.

Referenced by getInputDataFromReporter(), and LibtorchDRLControlTrainer().

◆ _response_value_pointers

std::vector<const std::vector<Real> *> LibtorchDRLControlTrainer::_response_value_pointers

protected

Pointers to the current values of the responses.

Definition at line 89 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and LibtorchDRLControlTrainer().

◆ _return_data

std::vector<Real> LibtorchDRLControlTrainer::_return_data

protected

Definition at line 133 of file LibtorchDRLControlTrainer.h.

Referenced by computeRewardToGo(), execute(), and resetData().

◆ _return_tensor

torch::Tensor LibtorchDRLControlTrainer::_return_tensor

protected

Definition at line 197 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and trainController().

◆ _reward_data

std::vector<Real> LibtorchDRLControlTrainer::_reward_data

protected

The reward and return data. The return is calculated using the _reward_data

Definition at line 132 of file LibtorchDRLControlTrainer.h.

Referenced by computeAverageEpisodeReward(), execute(), and resetData().

◆ _reward_name

const ReporterName LibtorchDRLControlTrainer::_reward_name

protected

Reward reporter name.

Definition at line 110 of file LibtorchDRLControlTrainer.h.

◆ _reward_value_pointer

const std::vector<Real>* LibtorchDRLControlTrainer::_reward_value_pointer

protected

Pointer to the current values of the reward.

Definition at line 113 of file LibtorchDRLControlTrainer.h.

Referenced by computeRewardToGo(), and execute().

◆ _shift_outputs

const bool LibtorchDRLControlTrainer::_shift_outputs

protected

Currently, the controls are executed after the user objects at initial in moose.

So using a shift can realign the corresponding input-output values while reading the reporters

Definition at line 175 of file LibtorchDRLControlTrainer.h.

Referenced by getInputDataFromReporter(), getOutputDataFromReporter(), and getRewardDataFromReporter().

◆ _standardize_advantage

const bool LibtorchDRLControlTrainer::_standardize_advantage

protected

Switch to enable the standardization of the advantages.

Definition at line 181 of file LibtorchDRLControlTrainer.h.

Referenced by trainController().

◆ _std

torch::Tensor LibtorchDRLControlTrainer::_std

protected

standard deviation in a tensor format for sampling the actual control value

Definition at line 192 of file LibtorchDRLControlTrainer.h.

Referenced by evaluateAction(), and LibtorchDRLControlTrainer().

◆ _update_counter

unsigned int LibtorchDRLControlTrainer::_update_counter

private

Counter for number of transient simulations that have been run before updating the controller.

Definition at line 234 of file LibtorchDRLControlTrainer.h.

Referenced by execute(), and resetData().

◆ _update_frequency

const unsigned int LibtorchDRLControlTrainer::_update_frequency

protected

Number of transients to run and collect data from before updating the controller neural net.

Definition at line 152 of file LibtorchDRLControlTrainer.h.

Referenced by resetData().

The documentation for this class was generated from the following files:

stochastic_tools/include/libtorch/surrogates/LibtorchDRLControlTrainer.h
stochastic_tools/src/libtorch/trainers/LibtorchDRLControlTrainer.C

Public Types

Public Member Functions

Static Public Member Functions

Public Attributes

Static Public Attributes

Protected Member Functions

Static Protected Member Functions

Protected Attributes

Static Protected Attributes

Private Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ LibtorchDRLControlTrainer()

Member Function Documentation

◆ averageEpisodeReward()

◆ computeAverageEpisodeReward()

◆ computeRewardToGo()

◆ controlNeuralNet()

◆ convertDataToTensor()

◆ declareModelData()

◆ evaluateAction()

◆ evaluateValue()

◆ execute()

◆ finalize()

◆ getInputDataFromReporter()

◆ getModelData()

◆ getModelDataFileName()

◆ getOutputDataFromReporter()

◆ getReporterPointers()

◆ getRewardDataFromReporter()

◆ hasModelData()

◆ initialize()

◆ modelMetaDataName()

◆ resetData()

◆ threadJoin()

◆ trainController()

◆ validParams()

Member Data Documentation

◆ _action_std

◆ _average_episode_reward

◆ _clip_param

◆ _control_learning_rate

◆ _control_names

◆ _control_nn

◆ _control_value_pointers

◆ _critic_learning_rate

◆ _critic_nn

◆ _decay_factor

◆ _filename_base

◆ _input_data

◆ _input_tensor

◆ _input_timesteps

◆ _log_probability_data

◆ _log_probability_names

◆ _log_probability_tensor

◆ _log_probability_value_pointers

◆ _loss_print_frequency

◆ _num_control_neurons_per_layer

◆ _num_critic_neurons_per_layer

◆ _num_epochs

◆ _num_inputs

◆ _num_outputs

◆ _output_data

◆ _output_tensor

◆ _read_from_file

◆ _response_names

◆ _response_scaling_factors

◆ _response_shift_factors

◆ _response_value_pointers

◆ _return_data

◆ _return_tensor

◆ _reward_data

◆ _reward_name

◆ _reward_value_pointer

◆ _shift_outputs

◆ _standardize_advantage

◆ _std

◆ _update_counter

◆ _update_frequency