GaussianProcessHandler
Overview
The GaussianProcessHandler is designed to incorporate structures (data) and functions commonly used by every object that needs to use/modify Gaussian Processes such as GaussianProcessTrainer, GaussianProcess, or Active Learning objects. It contains accesses to the covariance function, stores covariance matrices and their corresponding decomposition and inverse action.
Initializing
The object which requires access to Gaussian Process-related functionalities shall have it as a member variable, which is important to enable restarting capabilities:
StochasticTools::GaussianProcessHandler & _gp_handler;
(modules/stochastic_tools/include/surrogates/GaussianProcessTrainer.h)An important step is the initialization of the covariance function, which can either be done using the initialize
function as in GaussianProcessTrainer:
_gp_handler.initialize(
getCovarianceFunctionByName(parameters.get<UserObjectName>("covariance_function")),
tune_parameters,
lower_bounds,
upper_bounds);
(modules/stochastic_tools/src/surrogates/GaussianProcessTrainer.C)Or by linking it directly as in GaussianProcess:
_gp_handler.linkCovarianceFunction(getCovarianceFunctionByName(covar_name));
(modules/stochastic_tools/src/surrogates/GaussianProcess.C)Creating a covariance matrix
Once the covariance function has been added to the handler, one can use it to create a covariance matrix by either using setupCovarianceMatrix
as in GaussianProcessTrainer:
_gp_handler.setupCovarianceMatrix(
_training_params,
_training_data,
_tuning_algorithm,
isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
isParamValid("show_tao") ? getParam<bool>("show_tao") : false);
(modules/stochastic_tools/src/surrogates/GaussianProcessTrainer.C)Or by simply calling the covariance matrix builder as in GaussianProcess:
_gp_handler.getCovarFunction().computeCovarianceMatrix(K_test, test_points, test_points, true);
(modules/stochastic_tools/src/surrogates/GaussianProcess.C)Optimizing hyper parameters
As described in GaussianProcessTrainer, the covariance function might have hyper parameters that need to be optimized to be able to get accurate predictions from the corresponding Gaussian Processes. We can optimize these parameters during the generation of the Covariance matrix:
_gp_handler.setupCovarianceMatrix(
_training_params,
_training_data,
_tuning_algorithm,
isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
isParamValid("show_tao") ? getParam<bool>("show_tao") : false);
(modules/stochastic_tools/src/surrogates/GaussianProcessTrainer.C)Or by simply calling the optimizer:
if (tuneHyperParamsTAO(training_params, training_data, tao_options, show_tao))
(modules/stochastic_tools/src/utils/GaussianProcessHandler.C)(modules/stochastic_tools/include/surrogates/GaussianProcessTrainer.h)
// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html
#pragma once
#include "SurrogateTrainer.h"
#include "Standardizer.h"
#include <Eigen/Dense>
#include "Distribution.h"
#include "CovarianceFunctionBase.h"
#include "CovarianceInterface.h"
#include "GaussianProcessHandler.h"
class GaussianProcessTrainer : public SurrogateTrainer, public CovarianceInterface
{
public:
static InputParameters validParams();
GaussianProcessTrainer(const InputParameters & parameters);
virtual void preTrain() override;
virtual void train() override;
virtual void postTrain() override;
StochasticTools::GaussianProcessHandler & gpHandler() { return _gp_handler; }
const StochasticTools::GaussianProcessHandler & getGPHandler() const { return _gp_handler; }
private:
StochasticTools::GaussianProcessHandler & _gp_handler;
/// Paramaters (x) used for training, along with statistics
RealEigenMatrix & _training_params;
/// Switch for training param (x) standardization
bool _standardize_params;
/// Switch for training data(y) standardization
bool _standardize_data;
/// Flag to toggle hyperparameter tuning/optimization
bool _do_tuning;
/// Enum which contains the hyper parameter optimizaton type requested by the user
MooseEnum _tuning_algorithm;
/// Data from the current sampler row
const std::vector<Real> & _sampler_row;
/// Response value
const Real & _rval;
/// Predictor values from reporters
std::vector<const Real *> _pvals;
/// Columns from sampler for predictors
std::vector<unsigned int> _pcols;
/// Total number of parameters/dimensions
unsigned int _n_params;
/// Data (y) used for training
RealEigenMatrix _training_data;
};
(modules/stochastic_tools/src/surrogates/GaussianProcessTrainer.C)
// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html
#include "GaussianProcessTrainer.h"
#include "Sampler.h"
#include "CartesianProduct.h"
#include <petsctao.h>
#include <petscdmda.h>
#include "libmesh/petsc_vector.h"
#include "libmesh/petsc_matrix.h"
#include <math.h>
registerMooseObject("StochasticToolsApp", GaussianProcessTrainer);
InputParameters
GaussianProcessTrainer::validParams()
{
InputParameters params = SurrogateTrainer::validParams();
params.addClassDescription(
"Provides data preperation and training for a Gaussian Process surrogate model.");
params.addRequiredParam<ReporterName>(
"response", "Reporter value of response results, can be vpp with <vpp_name>/<vector_name>.");
params.addParam<std::vector<ReporterName>>(
"predictors",
std::vector<ReporterName>(),
"Reporter values used as the independent random variables, If 'predictors' and "
"'predictor_cols' are both empty, all sampler columns are used.");
params.addParam<std::vector<unsigned int>>(
"predictor_cols",
std::vector<unsigned int>(),
"Sampler columns used as the independent random variables, If 'predictors' and "
"'predictor_cols' are both empty, all sampler columns are used.");
params.addRequiredParam<UserObjectName>("covariance_function", "Name of covariance function.");
params.addParam<bool>(
"standardize_params", true, "Standardize (center and scale) training parameters (x values)");
params.addParam<bool>(
"standardize_data", true, "Standardize (center and scale) training data (y values)");
// Already preparing to use ADAM here
MooseEnum tuning_type("tao none", "none");
params.addParam<MooseEnum>(
"tuning_algorithm", tuning_type, "Hyper parameter optimizaton algorithm");
params.addParam<std::string>("tao_options",
"Command line options for PETSc/TAO hyperparameter optimization");
params.addParam<bool>("show_tao", "Switch to show TAO solver results");
params.addParam<std::vector<std::string>>("tune_parameters",
"Select hyperparameters to be tuned");
params.addParam<std::vector<Real>>("tuning_min", "Minimum allowable tuning value");
params.addParam<std::vector<Real>>("tuning_max", "Maximum allowable tuning value");
return params;
}
GaussianProcessTrainer::GaussianProcessTrainer(const InputParameters & parameters)
: SurrogateTrainer(parameters),
CovarianceInterface(parameters),
_gp_handler(declareModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
_training_params(declareModelData<RealEigenMatrix>("_training_params")),
_standardize_params(getParam<bool>("standardize_params")),
_standardize_data(getParam<bool>("standardize_data")),
_do_tuning(isParamValid("tune_parameters")),
_tuning_algorithm(getParam<MooseEnum>("tuning_algorithm")),
_sampler_row(getSamplerData()),
_rval(getTrainingData<Real>(getParam<ReporterName>("response"))),
_pvals(getParam<std::vector<ReporterName>>("predictors").size()),
_pcols(getParam<std::vector<unsigned int>>("predictor_cols")),
_n_params((_pvals.empty() && _pcols.empty()) ? _sampler.getNumberOfCols()
: (_pvals.size() + _pcols.size()))
{
const auto & pnames = getParam<std::vector<ReporterName>>("predictors");
for (unsigned int i = 0; i < pnames.size(); ++i)
_pvals[i] = &getTrainingData<Real>(pnames[i]);
// If predictors and predictor_cols are empty, use all sampler columns
if (_pvals.empty() && _pcols.empty())
{
_pcols.resize(_sampler.getNumberOfCols());
std::iota(_pcols.begin(), _pcols.end(), 0);
}
if (_do_tuning && _tuning_algorithm == "none")
paramError("tuning_algorithm",
"No tuning algorithm is selected for the hyper parameter optimization!");
std::vector<std::string> tune_parameters(getParam<std::vector<std::string>>("tune_parameters"));
// Error Checking
if (isParamValid("tuning_min") &&
(getParam<std::vector<Real>>("tuning_min").size() != tune_parameters.size()))
mooseError("tuning_min size does not match tune_parameters");
if (isParamValid("tuning_max") &&
(getParam<std::vector<Real>>("tuning_max").size() != tune_parameters.size()))
mooseError("tuning_max size does not match tune_parameters");
std::vector<Real> lower_bounds, upper_bounds;
if (isParamValid("tuning_min"))
lower_bounds = getParam<std::vector<Real>>("tuning_min");
if (isParamValid("tuning_max"))
upper_bounds = getParam<std::vector<Real>>("tuning_max");
_gp_handler.initialize(
getCovarianceFunctionByName(parameters.get<UserObjectName>("covariance_function")),
tune_parameters,
lower_bounds,
upper_bounds);
}
void
GaussianProcessTrainer::preTrain()
{
_training_params.setZero(_sampler.getNumberOfRows(), _n_params);
_training_data.setZero(_sampler.getNumberOfRows(), 1);
}
void
GaussianProcessTrainer::train()
{
unsigned int d = 0;
for (const auto & val : _pvals)
_training_params(_row, d++) = *val;
for (const auto & col : _pcols)
_training_params(_row, d++) = _sampler_row[col];
// Loading result data from response reporter
_training_data(_row, 0) = _rval;
}
void
GaussianProcessTrainer::postTrain()
{
for (unsigned int ii = 0; ii < _training_params.rows(); ++ii)
{
for (unsigned int jj = 0; jj < _training_params.cols(); ++jj)
gatherSum(_training_params(ii, jj));
gatherSum(_training_data(ii, 0));
}
// Standardize (center and scale) training params
if (_standardize_params)
_gp_handler.standardizeParameters(_training_params);
// if not standardizing data set mean=0, std=1 for use in surrogate
else
_gp_handler.paramStandardizer().set(0, 1, _n_params);
// Standardize (center and scale) training data
if (_standardize_data)
_gp_handler.standardizeData(_training_data);
// if not standardizing data set mean=0, std=1 for use in surrogate
else
_gp_handler.dataStandardizer().set(0, 1, _n_params);
// Setup the covariance
_gp_handler.setupCovarianceMatrix(
_training_params,
_training_data,
_tuning_algorithm,
isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
isParamValid("show_tao") ? getParam<bool>("show_tao") : false);
}
(modules/stochastic_tools/src/surrogates/GaussianProcess.C)
// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html
#include "GaussianProcess.h"
#include "Sampler.h"
#include "CovarianceFunctionBase.h"
registerMooseObject("StochasticToolsApp", GaussianProcess);
InputParameters
GaussianProcess::validParams()
{
InputParameters params = SurrogateModel::validParams();
params.addClassDescription("Computes and evaluates Gaussian Process surrogate model.");
return params;
}
GaussianProcess::GaussianProcess(const InputParameters & parameters)
: SurrogateModel(parameters),
CovarianceInterface(parameters),
_gp_handler(
isParamValid("trainer")
? dynamic_cast<GaussianProcessTrainer &>(getSurrogateTrainer("trainer")).gpHandler()
: setModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
_training_params(getModelData<RealEigenMatrix>("_training_params"))
{
}
void
GaussianProcess::setupCovariance(UserObjectName covar_name)
{
if (_gp_handler.getCovarFunctionPtr() != nullptr)
::mooseError("Attempting to redefine covariance function using setupCovariance.");
_gp_handler.linkCovarianceFunction(getCovarianceFunctionByName(covar_name));
}
Real
GaussianProcess::evaluate(const std::vector<Real> & x) const
{
// Overlaod for evaluate to maintain general compatibility. Only returns mean
Real dummy = 0;
return this->evaluate(x, dummy);
}
Real
GaussianProcess::evaluate(const std::vector<Real> & x, Real & std_dev) const
{
unsigned int _n_params = _training_params.cols();
unsigned int _num_tests = 1;
mooseAssert(x.size() == _n_params,
"Number of parameters provided for evaluation does not match number of parameters "
"used for training.");
RealEigenMatrix test_points(_num_tests, _n_params);
for (unsigned int ii = 0; ii < _n_params; ++ii)
test_points(0, ii) = x[ii];
_gp_handler.getParamStandardizer().getStandardized(test_points);
RealEigenMatrix K_train_test(_training_params.rows(), test_points.rows());
_gp_handler.getCovarFunction().computeCovarianceMatrix(
K_train_test, _training_params, test_points, false);
RealEigenMatrix K_test(test_points.rows(), test_points.rows());
_gp_handler.getCovarFunction().computeCovarianceMatrix(K_test, test_points, test_points, true);
// Compute the predicted mean value (centered)
RealEigenMatrix pred_value = (K_train_test.transpose() * _gp_handler.getKResultsSolve());
// De-center/scale the value and store for return
_gp_handler.getDataStandardizer().getDestandardized(pred_value);
RealEigenMatrix pred_var =
K_test - (K_train_test.transpose() * _gp_handler.getKCholeskyDecomp().solve(K_train_test));
// Vairance computed, take sqrt for standard deviation, scale up by training data std and store
RealEigenMatrix std_dev_mat = pred_var.array().sqrt();
_gp_handler.getDataStandardizer().getDescaled(std_dev_mat);
std_dev = std_dev_mat(0, 0);
return pred_value(0, 0);
}
(modules/stochastic_tools/src/surrogates/GaussianProcessTrainer.C)
// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html
#include "GaussianProcessTrainer.h"
#include "Sampler.h"
#include "CartesianProduct.h"
#include <petsctao.h>
#include <petscdmda.h>
#include "libmesh/petsc_vector.h"
#include "libmesh/petsc_matrix.h"
#include <math.h>
registerMooseObject("StochasticToolsApp", GaussianProcessTrainer);
InputParameters
GaussianProcessTrainer::validParams()
{
InputParameters params = SurrogateTrainer::validParams();
params.addClassDescription(
"Provides data preperation and training for a Gaussian Process surrogate model.");
params.addRequiredParam<ReporterName>(
"response", "Reporter value of response results, can be vpp with <vpp_name>/<vector_name>.");
params.addParam<std::vector<ReporterName>>(
"predictors",
std::vector<ReporterName>(),
"Reporter values used as the independent random variables, If 'predictors' and "
"'predictor_cols' are both empty, all sampler columns are used.");
params.addParam<std::vector<unsigned int>>(
"predictor_cols",
std::vector<unsigned int>(),
"Sampler columns used as the independent random variables, If 'predictors' and "
"'predictor_cols' are both empty, all sampler columns are used.");
params.addRequiredParam<UserObjectName>("covariance_function", "Name of covariance function.");
params.addParam<bool>(
"standardize_params", true, "Standardize (center and scale) training parameters (x values)");
params.addParam<bool>(
"standardize_data", true, "Standardize (center and scale) training data (y values)");
// Already preparing to use ADAM here
MooseEnum tuning_type("tao none", "none");
params.addParam<MooseEnum>(
"tuning_algorithm", tuning_type, "Hyper parameter optimizaton algorithm");
params.addParam<std::string>("tao_options",
"Command line options for PETSc/TAO hyperparameter optimization");
params.addParam<bool>("show_tao", "Switch to show TAO solver results");
params.addParam<std::vector<std::string>>("tune_parameters",
"Select hyperparameters to be tuned");
params.addParam<std::vector<Real>>("tuning_min", "Minimum allowable tuning value");
params.addParam<std::vector<Real>>("tuning_max", "Maximum allowable tuning value");
return params;
}
GaussianProcessTrainer::GaussianProcessTrainer(const InputParameters & parameters)
: SurrogateTrainer(parameters),
CovarianceInterface(parameters),
_gp_handler(declareModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
_training_params(declareModelData<RealEigenMatrix>("_training_params")),
_standardize_params(getParam<bool>("standardize_params")),
_standardize_data(getParam<bool>("standardize_data")),
_do_tuning(isParamValid("tune_parameters")),
_tuning_algorithm(getParam<MooseEnum>("tuning_algorithm")),
_sampler_row(getSamplerData()),
_rval(getTrainingData<Real>(getParam<ReporterName>("response"))),
_pvals(getParam<std::vector<ReporterName>>("predictors").size()),
_pcols(getParam<std::vector<unsigned int>>("predictor_cols")),
_n_params((_pvals.empty() && _pcols.empty()) ? _sampler.getNumberOfCols()
: (_pvals.size() + _pcols.size()))
{
const auto & pnames = getParam<std::vector<ReporterName>>("predictors");
for (unsigned int i = 0; i < pnames.size(); ++i)
_pvals[i] = &getTrainingData<Real>(pnames[i]);
// If predictors and predictor_cols are empty, use all sampler columns
if (_pvals.empty() && _pcols.empty())
{
_pcols.resize(_sampler.getNumberOfCols());
std::iota(_pcols.begin(), _pcols.end(), 0);
}
if (_do_tuning && _tuning_algorithm == "none")
paramError("tuning_algorithm",
"No tuning algorithm is selected for the hyper parameter optimization!");
std::vector<std::string> tune_parameters(getParam<std::vector<std::string>>("tune_parameters"));
// Error Checking
if (isParamValid("tuning_min") &&
(getParam<std::vector<Real>>("tuning_min").size() != tune_parameters.size()))
mooseError("tuning_min size does not match tune_parameters");
if (isParamValid("tuning_max") &&
(getParam<std::vector<Real>>("tuning_max").size() != tune_parameters.size()))
mooseError("tuning_max size does not match tune_parameters");
std::vector<Real> lower_bounds, upper_bounds;
if (isParamValid("tuning_min"))
lower_bounds = getParam<std::vector<Real>>("tuning_min");
if (isParamValid("tuning_max"))
upper_bounds = getParam<std::vector<Real>>("tuning_max");
_gp_handler.initialize(
getCovarianceFunctionByName(parameters.get<UserObjectName>("covariance_function")),
tune_parameters,
lower_bounds,
upper_bounds);
}
void
GaussianProcessTrainer::preTrain()
{
_training_params.setZero(_sampler.getNumberOfRows(), _n_params);
_training_data.setZero(_sampler.getNumberOfRows(), 1);
}
void
GaussianProcessTrainer::train()
{
unsigned int d = 0;
for (const auto & val : _pvals)
_training_params(_row, d++) = *val;
for (const auto & col : _pcols)
_training_params(_row, d++) = _sampler_row[col];
// Loading result data from response reporter
_training_data(_row, 0) = _rval;
}
void
GaussianProcessTrainer::postTrain()
{
for (unsigned int ii = 0; ii < _training_params.rows(); ++ii)
{
for (unsigned int jj = 0; jj < _training_params.cols(); ++jj)
gatherSum(_training_params(ii, jj));
gatherSum(_training_data(ii, 0));
}
// Standardize (center and scale) training params
if (_standardize_params)
_gp_handler.standardizeParameters(_training_params);
// if not standardizing data set mean=0, std=1 for use in surrogate
else
_gp_handler.paramStandardizer().set(0, 1, _n_params);
// Standardize (center and scale) training data
if (_standardize_data)
_gp_handler.standardizeData(_training_data);
// if not standardizing data set mean=0, std=1 for use in surrogate
else
_gp_handler.dataStandardizer().set(0, 1, _n_params);
// Setup the covariance
_gp_handler.setupCovarianceMatrix(
_training_params,
_training_data,
_tuning_algorithm,
isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
isParamValid("show_tao") ? getParam<bool>("show_tao") : false);
}
(modules/stochastic_tools/src/surrogates/GaussianProcess.C)
// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html
#include "GaussianProcess.h"
#include "Sampler.h"
#include "CovarianceFunctionBase.h"
registerMooseObject("StochasticToolsApp", GaussianProcess);
InputParameters
GaussianProcess::validParams()
{
InputParameters params = SurrogateModel::validParams();
params.addClassDescription("Computes and evaluates Gaussian Process surrogate model.");
return params;
}
GaussianProcess::GaussianProcess(const InputParameters & parameters)
: SurrogateModel(parameters),
CovarianceInterface(parameters),
_gp_handler(
isParamValid("trainer")
? dynamic_cast<GaussianProcessTrainer &>(getSurrogateTrainer("trainer")).gpHandler()
: setModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
_training_params(getModelData<RealEigenMatrix>("_training_params"))
{
}
void
GaussianProcess::setupCovariance(UserObjectName covar_name)
{
if (_gp_handler.getCovarFunctionPtr() != nullptr)
::mooseError("Attempting to redefine covariance function using setupCovariance.");
_gp_handler.linkCovarianceFunction(getCovarianceFunctionByName(covar_name));
}
Real
GaussianProcess::evaluate(const std::vector<Real> & x) const
{
// Overlaod for evaluate to maintain general compatibility. Only returns mean
Real dummy = 0;
return this->evaluate(x, dummy);
}
Real
GaussianProcess::evaluate(const std::vector<Real> & x, Real & std_dev) const
{
unsigned int _n_params = _training_params.cols();
unsigned int _num_tests = 1;
mooseAssert(x.size() == _n_params,
"Number of parameters provided for evaluation does not match number of parameters "
"used for training.");
RealEigenMatrix test_points(_num_tests, _n_params);
for (unsigned int ii = 0; ii < _n_params; ++ii)
test_points(0, ii) = x[ii];
_gp_handler.getParamStandardizer().getStandardized(test_points);
RealEigenMatrix K_train_test(_training_params.rows(), test_points.rows());
_gp_handler.getCovarFunction().computeCovarianceMatrix(
K_train_test, _training_params, test_points, false);
RealEigenMatrix K_test(test_points.rows(), test_points.rows());
_gp_handler.getCovarFunction().computeCovarianceMatrix(K_test, test_points, test_points, true);
// Compute the predicted mean value (centered)
RealEigenMatrix pred_value = (K_train_test.transpose() * _gp_handler.getKResultsSolve());
// De-center/scale the value and store for return
_gp_handler.getDataStandardizer().getDestandardized(pred_value);
RealEigenMatrix pred_var =
K_test - (K_train_test.transpose() * _gp_handler.getKCholeskyDecomp().solve(K_train_test));
// Vairance computed, take sqrt for standard deviation, scale up by training data std and store
RealEigenMatrix std_dev_mat = pred_var.array().sqrt();
_gp_handler.getDataStandardizer().getDescaled(std_dev_mat);
std_dev = std_dev_mat(0, 0);
return pred_value(0, 0);
}
(modules/stochastic_tools/src/surrogates/GaussianProcessTrainer.C)
// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html
#include "GaussianProcessTrainer.h"
#include "Sampler.h"
#include "CartesianProduct.h"
#include <petsctao.h>
#include <petscdmda.h>
#include "libmesh/petsc_vector.h"
#include "libmesh/petsc_matrix.h"
#include <math.h>
registerMooseObject("StochasticToolsApp", GaussianProcessTrainer);
InputParameters
GaussianProcessTrainer::validParams()
{
InputParameters params = SurrogateTrainer::validParams();
params.addClassDescription(
"Provides data preperation and training for a Gaussian Process surrogate model.");
params.addRequiredParam<ReporterName>(
"response", "Reporter value of response results, can be vpp with <vpp_name>/<vector_name>.");
params.addParam<std::vector<ReporterName>>(
"predictors",
std::vector<ReporterName>(),
"Reporter values used as the independent random variables, If 'predictors' and "
"'predictor_cols' are both empty, all sampler columns are used.");
params.addParam<std::vector<unsigned int>>(
"predictor_cols",
std::vector<unsigned int>(),
"Sampler columns used as the independent random variables, If 'predictors' and "
"'predictor_cols' are both empty, all sampler columns are used.");
params.addRequiredParam<UserObjectName>("covariance_function", "Name of covariance function.");
params.addParam<bool>(
"standardize_params", true, "Standardize (center and scale) training parameters (x values)");
params.addParam<bool>(
"standardize_data", true, "Standardize (center and scale) training data (y values)");
// Already preparing to use ADAM here
MooseEnum tuning_type("tao none", "none");
params.addParam<MooseEnum>(
"tuning_algorithm", tuning_type, "Hyper parameter optimizaton algorithm");
params.addParam<std::string>("tao_options",
"Command line options for PETSc/TAO hyperparameter optimization");
params.addParam<bool>("show_tao", "Switch to show TAO solver results");
params.addParam<std::vector<std::string>>("tune_parameters",
"Select hyperparameters to be tuned");
params.addParam<std::vector<Real>>("tuning_min", "Minimum allowable tuning value");
params.addParam<std::vector<Real>>("tuning_max", "Maximum allowable tuning value");
return params;
}
GaussianProcessTrainer::GaussianProcessTrainer(const InputParameters & parameters)
: SurrogateTrainer(parameters),
CovarianceInterface(parameters),
_gp_handler(declareModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
_training_params(declareModelData<RealEigenMatrix>("_training_params")),
_standardize_params(getParam<bool>("standardize_params")),
_standardize_data(getParam<bool>("standardize_data")),
_do_tuning(isParamValid("tune_parameters")),
_tuning_algorithm(getParam<MooseEnum>("tuning_algorithm")),
_sampler_row(getSamplerData()),
_rval(getTrainingData<Real>(getParam<ReporterName>("response"))),
_pvals(getParam<std::vector<ReporterName>>("predictors").size()),
_pcols(getParam<std::vector<unsigned int>>("predictor_cols")),
_n_params((_pvals.empty() && _pcols.empty()) ? _sampler.getNumberOfCols()
: (_pvals.size() + _pcols.size()))
{
const auto & pnames = getParam<std::vector<ReporterName>>("predictors");
for (unsigned int i = 0; i < pnames.size(); ++i)
_pvals[i] = &getTrainingData<Real>(pnames[i]);
// If predictors and predictor_cols are empty, use all sampler columns
if (_pvals.empty() && _pcols.empty())
{
_pcols.resize(_sampler.getNumberOfCols());
std::iota(_pcols.begin(), _pcols.end(), 0);
}
if (_do_tuning && _tuning_algorithm == "none")
paramError("tuning_algorithm",
"No tuning algorithm is selected for the hyper parameter optimization!");
std::vector<std::string> tune_parameters(getParam<std::vector<std::string>>("tune_parameters"));
// Error Checking
if (isParamValid("tuning_min") &&
(getParam<std::vector<Real>>("tuning_min").size() != tune_parameters.size()))
mooseError("tuning_min size does not match tune_parameters");
if (isParamValid("tuning_max") &&
(getParam<std::vector<Real>>("tuning_max").size() != tune_parameters.size()))
mooseError("tuning_max size does not match tune_parameters");
std::vector<Real> lower_bounds, upper_bounds;
if (isParamValid("tuning_min"))
lower_bounds = getParam<std::vector<Real>>("tuning_min");
if (isParamValid("tuning_max"))
upper_bounds = getParam<std::vector<Real>>("tuning_max");
_gp_handler.initialize(
getCovarianceFunctionByName(parameters.get<UserObjectName>("covariance_function")),
tune_parameters,
lower_bounds,
upper_bounds);
}
void
GaussianProcessTrainer::preTrain()
{
_training_params.setZero(_sampler.getNumberOfRows(), _n_params);
_training_data.setZero(_sampler.getNumberOfRows(), 1);
}
void
GaussianProcessTrainer::train()
{
unsigned int d = 0;
for (const auto & val : _pvals)
_training_params(_row, d++) = *val;
for (const auto & col : _pcols)
_training_params(_row, d++) = _sampler_row[col];
// Loading result data from response reporter
_training_data(_row, 0) = _rval;
}
void
GaussianProcessTrainer::postTrain()
{
for (unsigned int ii = 0; ii < _training_params.rows(); ++ii)
{
for (unsigned int jj = 0; jj < _training_params.cols(); ++jj)
gatherSum(_training_params(ii, jj));
gatherSum(_training_data(ii, 0));
}
// Standardize (center and scale) training params
if (_standardize_params)
_gp_handler.standardizeParameters(_training_params);
// if not standardizing data set mean=0, std=1 for use in surrogate
else
_gp_handler.paramStandardizer().set(0, 1, _n_params);
// Standardize (center and scale) training data
if (_standardize_data)
_gp_handler.standardizeData(_training_data);
// if not standardizing data set mean=0, std=1 for use in surrogate
else
_gp_handler.dataStandardizer().set(0, 1, _n_params);
// Setup the covariance
_gp_handler.setupCovarianceMatrix(
_training_params,
_training_data,
_tuning_algorithm,
isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
isParamValid("show_tao") ? getParam<bool>("show_tao") : false);
}
(modules/stochastic_tools/src/utils/GaussianProcessHandler.C)
// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html
#include "GaussianProcessHandler.h"
#include "FEProblemBase.h"
#include <petsctao.h>
#include <petscdmda.h>
#include "libmesh/petsc_vector.h"
#include "libmesh/petsc_matrix.h"
#include <math.h>
namespace StochasticTools
{
GaussianProcessHandler::GaussianProcessHandler() : _tao_comm(MPI_COMM_SELF) {}
void
GaussianProcessHandler::initialize(CovarianceFunctionBase * covariance_function,
const std::vector<std::string> params_to_tune,
std::vector<Real> min,
std::vector<Real> max)
{
linkCovarianceFunction(covariance_function);
generateTuningMap(params_to_tune, min, max);
}
void
GaussianProcessHandler::linkCovarianceFunction(CovarianceFunctionBase * covariance_function)
{
_covariance_function = covariance_function;
_covar_type = _covariance_function->type();
}
void
GaussianProcessHandler::setupCovarianceMatrix(const RealEigenMatrix & training_params,
const RealEigenMatrix & training_data,
MooseEnum opt_type,
std::string tao_options,
bool show_tao)
{
_K.resize(training_params.rows(), training_params.rows());
// This already accounts for future addition of an ADAM optimizes
if (opt_type == "tao")
if (tuneHyperParamsTAO(training_params, training_data, tao_options, show_tao))
::mooseError("PETSc/TAO error in hyperparameter tuning.");
_covariance_function->computeCovarianceMatrix(_K, training_params, training_params, true);
// Compute the Cholesly decomposition and inverse action of the covariance matrix
setupStoredMatrices(training_data);
_covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
}
void
GaussianProcessHandler::setupStoredMatrices(const RealEigenMatrix & input)
{
_K_cho_decomp = _K.llt();
_K_results_solve = _K_cho_decomp.solve(input);
}
void
GaussianProcessHandler::generateTuningMap(const std::vector<std::string> params_to_tune,
std::vector<Real> min_vector,
std::vector<Real> max_vector)
{
_num_tunable = 0;
bool upper_bounds_specified = false;
bool lower_bounds_specified = false;
if (min_vector.size())
lower_bounds_specified = true;
if (max_vector.size())
upper_bounds_specified = true;
for (unsigned int param_i = 0; param_i < params_to_tune.size(); ++param_i)
{
const auto & hp = params_to_tune[param_i];
if (_covariance_function->isTunable(hp))
{
unsigned int size;
Real min;
Real max;
// Get size and default min/max
_covariance_function->getTuningData(hp, size, min, max);
// Check for overridden min/max
min = lower_bounds_specified ? min_vector[param_i] : min;
max = upper_bounds_specified ? max_vector[param_i] : max;
// Save data in tuple
_tuning_data[hp] = std::make_tuple(_num_tunable, size, min, max);
_num_tunable += size;
}
}
}
void
GaussianProcessHandler::standardizeParameters(RealEigenMatrix & data, bool keep_moments)
{
if (!keep_moments)
_param_standardizer.computeSet(data);
_param_standardizer.getStandardized(data);
}
void
GaussianProcessHandler::standardizeData(RealEigenMatrix & data, bool keep_moments)
{
if (!keep_moments)
_data_standardizer.computeSet(data);
_data_standardizer.getStandardized(data);
}
PetscErrorCode
GaussianProcessHandler::tuneHyperParamsTAO(const RealEigenMatrix & training_params,
const RealEigenMatrix & training_data,
std::string tao_options,
bool show_tao)
{
PetscErrorCode ierr;
Tao tao;
_training_params = &training_params;
_training_data = &training_data;
// Setup Tao optimization problem
ierr = TaoCreate(_tao_comm.get(), &tao);
CHKERRQ(ierr);
ierr = PetscOptionsSetValue(NULL, "-tao_type", "bncg");
CHKERRQ(ierr);
ierr = PetscOptionsInsertString(NULL, tao_options.c_str());
CHKERRQ(ierr);
ierr = TaoSetFromOptions(tao);
CHKERRQ(ierr);
// Define petsc vetor to hold tunalbe hyper-params
libMesh::PetscVector<Number> theta(_tao_comm, _num_tunable);
ierr = formInitialGuessTAO(theta.vec());
CHKERRQ(ierr);
ierr = TaoSetInitialVector(tao, theta.vec());
CHKERRQ(ierr);
// Get Hyperparameter bounds.
libMesh::PetscVector<Number> lower(_tao_comm, _num_tunable);
libMesh::PetscVector<Number> upper(_tao_comm, _num_tunable);
buildHyperParamBoundsTAO(lower, upper);
CHKERRQ(ierr);
ierr = TaoSetVariableBounds(tao, lower.vec(), upper.vec());
CHKERRQ(ierr);
// Set Objective and Graident Callback ierr =
TaoSetObjectiveAndGradientRoutine(tao, formFunctionGradientWrapper, (void *)this);
CHKERRQ(ierr);
// Solve
ierr = TaoSolve(tao);
CHKERRQ(ierr);
//
if (show_tao)
{
ierr = TaoView(tao, PETSC_VIEWER_STDOUT_WORLD);
theta.print();
}
_covariance_function->loadHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
ierr = TaoDestroy(&tao);
CHKERRQ(ierr);
return 0;
}
PetscErrorCode
GaussianProcessHandler::formInitialGuessTAO(Vec theta_vec)
{
libMesh::PetscVector<Number> theta(theta_vec, _tao_comm);
_covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
mapToPetscVec(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
return 0;
}
void
GaussianProcessHandler::buildHyperParamBoundsTAO(libMesh::PetscVector<Number> & theta_l,
libMesh::PetscVector<Number> & theta_u) const
{
for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
{
for (unsigned int ii = 0; ii < std::get<1>(iter->second); ++ii)
{
theta_l.set(std::get<0>(iter->second) + ii, std::get<2>(iter->second));
theta_u.set(std::get<0>(iter->second) + ii, std::get<3>(iter->second));
}
}
}
PetscErrorCode
GaussianProcessHandler::formFunctionGradientWrapper(
Tao tao, Vec theta_vec, PetscReal * f, Vec grad_vec, void * ptr)
{
GaussianProcessHandler * GP_ptr = (GaussianProcessHandler *)ptr;
GP_ptr->formFunctionGradient(tao, theta_vec, f, grad_vec);
return 0;
}
void
GaussianProcessHandler::formFunctionGradient(Tao /*tao*/,
Vec theta_vec,
PetscReal * f,
Vec grad_vec)
{
libMesh::PetscVector<Number> theta(theta_vec, _tao_comm);
libMesh::PetscVector<Number> grad(grad_vec, _tao_comm);
petscVecToMap(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
_covariance_function->loadHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
_covariance_function->computeCovarianceMatrix(_K, *_training_params, *_training_params, true);
setupStoredMatrices(*_training_data);
// testing auto tuning
RealEigenMatrix dKdhp(_training_params->rows(), _training_params->rows());
RealEigenMatrix alpha = _K_results_solve * _K_results_solve.transpose();
for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
{
std::string hyper_param_name = iter->first;
for (unsigned int ii = 0; ii < std::get<1>(iter->second); ++ii)
{
_covariance_function->computedKdhyper(dKdhp, *_training_params, hyper_param_name, ii);
RealEigenMatrix tmp = alpha * dKdhp - _K_cho_decomp.solve(dKdhp);
grad.set(std::get<0>(iter->second) + ii, -tmp.trace() / 2.0);
}
}
//
Real log_likelihood = 0;
log_likelihood += -(_training_data->transpose() * _K_results_solve)(0, 0);
log_likelihood += -std::log(_K.determinant());
log_likelihood += -_training_data->rows() * std::log(2 * M_PI);
log_likelihood = -log_likelihood / 2;
*f = log_likelihood;
}
void
GaussianProcessHandler::mapToPetscVec(
const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
tuning_data,
const std::unordered_map<std::string, Real> & scalar_map,
const std::unordered_map<std::string, std::vector<Real>> & vector_map,
libMesh::PetscVector<Number> & petsc_vec)
{
for (auto iter = tuning_data.begin(); iter != tuning_data.end(); ++iter)
{
std::string param_name = iter->first;
const auto scalar_it = scalar_map.find(param_name);
if (scalar_it != scalar_map.end())
petsc_vec.set(std::get<0>(iter->second), scalar_it->second);
else
{
const auto vector_it = vector_map.find(param_name);
if (vector_it != vector_map.end())
for (unsigned int ii = 0; ii < std::get<1>(iter->second); ++ii)
petsc_vec.set(std::get<0>(iter->second) + ii, (vector_it->second)[ii]);
}
}
}
void
GaussianProcessHandler::petscVecToMap(
const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
tuning_data,
std::unordered_map<std::string, Real> & scalar_map,
std::unordered_map<std::string, std::vector<Real>> & vector_map,
const libMesh::PetscVector<Number> & petsc_vec)
{
for (auto iter = tuning_data.begin(); iter != tuning_data.end(); ++iter)
{
std::string param_name = iter->first;
if (scalar_map.find(param_name) != scalar_map.end())
scalar_map[param_name] = petsc_vec(std::get<0>(iter->second));
else if (vector_map.find(param_name) != vector_map.end())
for (unsigned int ii = 0; ii < std::get<1>(iter->second); ++ii)
vector_map[param_name][ii] = petsc_vec(std::get<0>(iter->second) + ii);
}
}
} // StochasticTools namespace
template <>
void
dataStore(std::ostream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
{
// Store the L matrix as opposed to the full matrix to avoid compounding
// roundoff error and decomposition error
RealEigenMatrix L(decomp.matrixL());
dataStore(stream, L, context);
}
template <>
void
dataLoad(std::istream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
{
RealEigenMatrix L;
dataLoad(stream, L, context);
decomp.compute(L * L.transpose());
}
template <>
void
dataStore(std::ostream & stream, StochasticTools::GaussianProcessHandler & gp_utils, void * context)
{
dataStore(stream, gp_utils.hyperparamMap(), context);
dataStore(stream, gp_utils.hyperparamVectorMap(), context);
dataStore(stream, gp_utils.covarType(), context);
dataStore(stream, gp_utils.K(), context);
dataStore(stream, gp_utils.KResultsSolve(), context);
dataStore(stream, gp_utils.KCholeskyDecomp(), context);
dataStore(stream, gp_utils.paramStandardizer(), context);
dataStore(stream, gp_utils.dataStandardizer(), context);
}
template <>
void
dataLoad(std::istream & stream, StochasticTools::GaussianProcessHandler & gp_utils, void * context)
{
dataLoad(stream, gp_utils.hyperparamMap(), context);
dataLoad(stream, gp_utils.hyperparamVectorMap(), context);
dataLoad(stream, gp_utils.covarType(), context);
dataLoad(stream, gp_utils.K(), context);
dataLoad(stream, gp_utils.KResultsSolve(), context);
dataLoad(stream, gp_utils.KCholeskyDecomp(), context);
dataLoad(stream, gp_utils.paramStandardizer(), context);
dataLoad(stream, gp_utils.dataStandardizer(), context);
}