GaussianProcessHandler

Overview

The GaussianProcessHandler is designed to incorporate structures (data) and functions commonly used by every object that needs to use/modify Gaussian Processes such as GaussianProcessTrainer, GaussianProcess, or Active Learning objects. It contains accesses to the covariance function, stores covariance matrices and their corresponding decomposition and inverse action.

Initializing

The object which requires access to Gaussian Process-related functionalities shall have it as a member variable, which is important to enable restarting capabilities:

  StochasticTools::GaussianProcessHandler & _gp_handler;

An important step is the initialization of the covariance function, which can either be done using the initialize function as in GaussianProcessTrainer:

  _gp_handler.initialize(
      getCovarianceFunctionByName(parameters.get<UserObjectName>("covariance_function")),
      tune_parameters,
      lower_bounds,
      upper_bounds);

Or by linking it directly as in GaussianProcess:

  _gp_handler.linkCovarianceFunction(getCovarianceFunctionByName(covar_name));

Creating a covariance matrix

Once the covariance function has been added to the handler, one can use it to create a covariance matrix by either using setupCovarianceMatrix as in GaussianProcessTrainer:

  _gp_handler.setupCovarianceMatrix(
      _training_params,
      _training_data,
      _tuning_algorithm,
      isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
      isParamValid("show_tao") ? getParam<bool>("show_tao") : false);

Or by simply calling the covariance matrix builder as in GaussianProcess:

  _gp_handler.getCovarFunction().computeCovarianceMatrix(K_test, test_points, test_points, true);

Optimizing hyper parameters

As described in GaussianProcessTrainer, the covariance function might have hyper parameters that need to be optimized to be able to get accurate predictions from the corresponding Gaussian Processes. We can optimize these parameters during the generation of the Covariance matrix:

  _gp_handler.setupCovarianceMatrix(
      _training_params,
      _training_data,
      _tuning_algorithm,
      isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
      isParamValid("show_tao") ? getParam<bool>("show_tao") : false);

Or by simply calling the optimizer:

    if (tuneHyperParamsTAO(training_params, training_data, tao_options, show_tao))

(modules/stochastic_tools/include/surrogates/GaussianProcessTrainer.h)

// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html

#pragma once

#include "SurrogateTrainer.h"
#include "Standardizer.h"
#include <Eigen/Dense>

#include "Distribution.h"

#include "CovarianceFunctionBase.h"
#include "CovarianceInterface.h"

#include "GaussianProcessHandler.h"

class GaussianProcessTrainer : public SurrogateTrainer, public CovarianceInterface
{
public:
  static InputParameters validParams();
  GaussianProcessTrainer(const InputParameters & parameters);
  virtual void preTrain() override;
  virtual void train() override;
  virtual void postTrain() override;

  StochasticTools::GaussianProcessHandler & gpHandler() { return _gp_handler; }
  const StochasticTools::GaussianProcessHandler & getGPHandler() const { return _gp_handler; }

private:
  StochasticTools::GaussianProcessHandler & _gp_handler;

  /// Paramaters (x) used for training, along with statistics
  RealEigenMatrix & _training_params;

  /// Switch for training param (x) standardization
  bool _standardize_params;

  /// Switch for training data(y) standardization
  bool _standardize_data;

  /// Flag to toggle hyperparameter tuning/optimization
  bool _do_tuning;

  /// Enum which contains the hyper parameter optimizaton type requested by the user
  MooseEnum _tuning_algorithm;

  /// Data from the current sampler row
  const std::vector<Real> & _sampler_row;

  /// Response value
  const Real & _rval;

  /// Predictor values from reporters
  std::vector<const Real *> _pvals;

  /// Columns from sampler for predictors
  std::vector<unsigned int> _pcols;

  /// Total number of parameters/dimensions
  unsigned int _n_params;

  /// Data (y) used for training
  RealEigenMatrix _training_data;
};

(modules/stochastic_tools/src/surrogates/GaussianProcessTrainer.C)

// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html

#include "GaussianProcessTrainer.h"
#include "Sampler.h"
#include "CartesianProduct.h"

#include <petsctao.h>
#include <petscdmda.h>

#include "libmesh/petsc_vector.h"
#include "libmesh/petsc_matrix.h"

#include <math.h>

registerMooseObject("StochasticToolsApp", GaussianProcessTrainer);

InputParameters
GaussianProcessTrainer::validParams()
{
  InputParameters params = SurrogateTrainer::validParams();
  params.addClassDescription(
      "Provides data preperation and training for a Gaussian Process surrogate model.");
  params.addRequiredParam<ReporterName>(
      "response", "Reporter value of response results, can be vpp with <vpp_name>/<vector_name>.");
  params.addParam<std::vector<ReporterName>>(
      "predictors",
      std::vector<ReporterName>(),
      "Reporter values used as the independent random variables, If 'predictors' and "
      "'predictor_cols' are both empty, all sampler columns are used.");
  params.addParam<std::vector<unsigned int>>(
      "predictor_cols",
      std::vector<unsigned int>(),
      "Sampler columns used as the independent random variables, If 'predictors' and "
      "'predictor_cols' are both empty, all sampler columns are used.");
  params.addRequiredParam<UserObjectName>("covariance_function", "Name of covariance function.");
  params.addParam<bool>(
      "standardize_params", true, "Standardize (center and scale) training parameters (x values)");
  params.addParam<bool>(
      "standardize_data", true, "Standardize (center and scale) training data (y values)");
  // Already preparing to use ADAM here
  MooseEnum tuning_type("tao none", "none");
  params.addParam<MooseEnum>(
      "tuning_algorithm", tuning_type, "Hyper parameter optimizaton algorithm");
  params.addParam<std::string>("tao_options",
                               "Command line options for PETSc/TAO hyperparameter optimization");
  params.addParam<bool>("show_tao", "Switch to show TAO solver results");
  params.addParam<std::vector<std::string>>("tune_parameters",
                                            "Select hyperparameters to be tuned");
  params.addParam<std::vector<Real>>("tuning_min", "Minimum allowable tuning value");
  params.addParam<std::vector<Real>>("tuning_max", "Maximum allowable tuning value");
  return params;
}

GaussianProcessTrainer::GaussianProcessTrainer(const InputParameters & parameters)
  : SurrogateTrainer(parameters),
    CovarianceInterface(parameters),
    _gp_handler(declareModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
    _training_params(declareModelData<RealEigenMatrix>("_training_params")),
    _standardize_params(getParam<bool>("standardize_params")),
    _standardize_data(getParam<bool>("standardize_data")),
    _do_tuning(isParamValid("tune_parameters")),
    _tuning_algorithm(getParam<MooseEnum>("tuning_algorithm")),
    _sampler_row(getSamplerData()),
    _rval(getTrainingData<Real>(getParam<ReporterName>("response"))),
    _pvals(getParam<std::vector<ReporterName>>("predictors").size()),
    _pcols(getParam<std::vector<unsigned int>>("predictor_cols")),
    _n_params((_pvals.empty() && _pcols.empty()) ? _sampler.getNumberOfCols()
                                                 : (_pvals.size() + _pcols.size()))
{
  const auto & pnames = getParam<std::vector<ReporterName>>("predictors");
  for (unsigned int i = 0; i < pnames.size(); ++i)
    _pvals[i] = &getTrainingData<Real>(pnames[i]);

  // If predictors and predictor_cols are empty, use all sampler columns
  if (_pvals.empty() && _pcols.empty())
  {
    _pcols.resize(_sampler.getNumberOfCols());
    std::iota(_pcols.begin(), _pcols.end(), 0);
  }

  if (_do_tuning && _tuning_algorithm == "none")
    paramError("tuning_algorithm",
               "No tuning algorithm is selected for the hyper parameter optimization!");

  std::vector<std::string> tune_parameters(getParam<std::vector<std::string>>("tune_parameters"));
  // Error Checking
  if (isParamValid("tuning_min") &&
      (getParam<std::vector<Real>>("tuning_min").size() != tune_parameters.size()))
    mooseError("tuning_min size does not match tune_parameters");
  if (isParamValid("tuning_max") &&
      (getParam<std::vector<Real>>("tuning_max").size() != tune_parameters.size()))
    mooseError("tuning_max size does not match tune_parameters");

  std::vector<Real> lower_bounds, upper_bounds;
  if (isParamValid("tuning_min"))
    lower_bounds = getParam<std::vector<Real>>("tuning_min");
  if (isParamValid("tuning_max"))
    upper_bounds = getParam<std::vector<Real>>("tuning_max");

  _gp_handler.initialize(
      getCovarianceFunctionByName(parameters.get<UserObjectName>("covariance_function")),
      tune_parameters,
      lower_bounds,
      upper_bounds);
}

void
GaussianProcessTrainer::preTrain()
{
  _training_params.setZero(_sampler.getNumberOfRows(), _n_params);
  _training_data.setZero(_sampler.getNumberOfRows(), 1);
}

void
GaussianProcessTrainer::train()
{
  unsigned int d = 0;
  for (const auto & val : _pvals)
    _training_params(_row, d++) = *val;
  for (const auto & col : _pcols)
    _training_params(_row, d++) = _sampler_row[col];

  // Loading result data from response reporter
  _training_data(_row, 0) = _rval;
}

void
GaussianProcessTrainer::postTrain()
{
  for (unsigned int ii = 0; ii < _training_params.rows(); ++ii)
  {
    for (unsigned int jj = 0; jj < _training_params.cols(); ++jj)
      gatherSum(_training_params(ii, jj));
    gatherSum(_training_data(ii, 0));
  }

  // Standardize (center and scale) training params
  if (_standardize_params)
    _gp_handler.standardizeParameters(_training_params);
  // if not standardizing data set mean=0, std=1 for use in surrogate
  else
    _gp_handler.paramStandardizer().set(0, 1, _n_params);

  // Standardize (center and scale) training data
  if (_standardize_data)
    _gp_handler.standardizeData(_training_data);
  // if not standardizing data set mean=0, std=1 for use in surrogate
  else
    _gp_handler.dataStandardizer().set(0, 1, _n_params);

  // Setup the covariance
  _gp_handler.setupCovarianceMatrix(
      _training_params,
      _training_data,
      _tuning_algorithm,
      isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
      isParamValid("show_tao") ? getParam<bool>("show_tao") : false);
}

(modules/stochastic_tools/src/surrogates/GaussianProcess.C)

// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html

#include "GaussianProcess.h"
#include "Sampler.h"

#include "CovarianceFunctionBase.h"

registerMooseObject("StochasticToolsApp", GaussianProcess);

InputParameters
GaussianProcess::validParams()
{
  InputParameters params = SurrogateModel::validParams();
  params.addClassDescription("Computes and evaluates Gaussian Process surrogate model.");
  return params;
}

GaussianProcess::GaussianProcess(const InputParameters & parameters)
  : SurrogateModel(parameters),
    CovarianceInterface(parameters),
    _gp_handler(
        isParamValid("trainer")
            ? dynamic_cast<GaussianProcessTrainer &>(getSurrogateTrainer("trainer")).gpHandler()
            : setModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
    _training_params(getModelData<RealEigenMatrix>("_training_params"))
{
}

void
GaussianProcess::setupCovariance(UserObjectName covar_name)
{
  if (_gp_handler.getCovarFunctionPtr() != nullptr)
    ::mooseError("Attempting to redefine covariance function using setupCovariance.");
  _gp_handler.linkCovarianceFunction(getCovarianceFunctionByName(covar_name));
}

Real
GaussianProcess::evaluate(const std::vector<Real> & x) const
{
  // Overlaod for evaluate to maintain general compatibility. Only returns mean
  Real dummy = 0;
  return this->evaluate(x, dummy);
}

Real
GaussianProcess::evaluate(const std::vector<Real> & x, Real & std_dev) const
{

  unsigned int _n_params = _training_params.cols();
  unsigned int _num_tests = 1;

  mooseAssert(x.size() == _n_params,
              "Number of parameters provided for evaluation does not match number of parameters "
              "used for training.");

  RealEigenMatrix test_points(_num_tests, _n_params);
  for (unsigned int ii = 0; ii < _n_params; ++ii)
    test_points(0, ii) = x[ii];

  _gp_handler.getParamStandardizer().getStandardized(test_points);

  RealEigenMatrix K_train_test(_training_params.rows(), test_points.rows());
  _gp_handler.getCovarFunction().computeCovarianceMatrix(
      K_train_test, _training_params, test_points, false);
  RealEigenMatrix K_test(test_points.rows(), test_points.rows());
  _gp_handler.getCovarFunction().computeCovarianceMatrix(K_test, test_points, test_points, true);

  // Compute the predicted mean value (centered)
  RealEigenMatrix pred_value = (K_train_test.transpose() * _gp_handler.getKResultsSolve());
  // De-center/scale the value and store for return
  _gp_handler.getDataStandardizer().getDestandardized(pred_value);

  RealEigenMatrix pred_var =
      K_test - (K_train_test.transpose() * _gp_handler.getKCholeskyDecomp().solve(K_train_test));

  // Vairance computed, take sqrt for standard deviation, scale up by training data std and store
  RealEigenMatrix std_dev_mat = pred_var.array().sqrt();
  _gp_handler.getDataStandardizer().getDescaled(std_dev_mat);
  std_dev = std_dev_mat(0, 0);

  return pred_value(0, 0);
}

(modules/stochastic_tools/src/surrogates/GaussianProcessTrainer.C)

// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html

#include "GaussianProcessTrainer.h"
#include "Sampler.h"
#include "CartesianProduct.h"

#include <petsctao.h>
#include <petscdmda.h>

#include "libmesh/petsc_vector.h"
#include "libmesh/petsc_matrix.h"

#include <math.h>

registerMooseObject("StochasticToolsApp", GaussianProcessTrainer);

InputParameters
GaussianProcessTrainer::validParams()
{
  InputParameters params = SurrogateTrainer::validParams();
  params.addClassDescription(
      "Provides data preperation and training for a Gaussian Process surrogate model.");
  params.addRequiredParam<ReporterName>(
      "response", "Reporter value of response results, can be vpp with <vpp_name>/<vector_name>.");
  params.addParam<std::vector<ReporterName>>(
      "predictors",
      std::vector<ReporterName>(),
      "Reporter values used as the independent random variables, If 'predictors' and "
      "'predictor_cols' are both empty, all sampler columns are used.");
  params.addParam<std::vector<unsigned int>>(
      "predictor_cols",
      std::vector<unsigned int>(),
      "Sampler columns used as the independent random variables, If 'predictors' and "
      "'predictor_cols' are both empty, all sampler columns are used.");
  params.addRequiredParam<UserObjectName>("covariance_function", "Name of covariance function.");
  params.addParam<bool>(
      "standardize_params", true, "Standardize (center and scale) training parameters (x values)");
  params.addParam<bool>(
      "standardize_data", true, "Standardize (center and scale) training data (y values)");
  // Already preparing to use ADAM here
  MooseEnum tuning_type("tao none", "none");
  params.addParam<MooseEnum>(
      "tuning_algorithm", tuning_type, "Hyper parameter optimizaton algorithm");
  params.addParam<std::string>("tao_options",
                               "Command line options for PETSc/TAO hyperparameter optimization");
  params.addParam<bool>("show_tao", "Switch to show TAO solver results");
  params.addParam<std::vector<std::string>>("tune_parameters",
                                            "Select hyperparameters to be tuned");
  params.addParam<std::vector<Real>>("tuning_min", "Minimum allowable tuning value");
  params.addParam<std::vector<Real>>("tuning_max", "Maximum allowable tuning value");
  return params;
}

GaussianProcessTrainer::GaussianProcessTrainer(const InputParameters & parameters)
  : SurrogateTrainer(parameters),
    CovarianceInterface(parameters),
    _gp_handler(declareModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
    _training_params(declareModelData<RealEigenMatrix>("_training_params")),
    _standardize_params(getParam<bool>("standardize_params")),
    _standardize_data(getParam<bool>("standardize_data")),
    _do_tuning(isParamValid("tune_parameters")),
    _tuning_algorithm(getParam<MooseEnum>("tuning_algorithm")),
    _sampler_row(getSamplerData()),
    _rval(getTrainingData<Real>(getParam<ReporterName>("response"))),
    _pvals(getParam<std::vector<ReporterName>>("predictors").size()),
    _pcols(getParam<std::vector<unsigned int>>("predictor_cols")),
    _n_params((_pvals.empty() && _pcols.empty()) ? _sampler.getNumberOfCols()
                                                 : (_pvals.size() + _pcols.size()))
{
  const auto & pnames = getParam<std::vector<ReporterName>>("predictors");
  for (unsigned int i = 0; i < pnames.size(); ++i)
    _pvals[i] = &getTrainingData<Real>(pnames[i]);

  // If predictors and predictor_cols are empty, use all sampler columns
  if (_pvals.empty() && _pcols.empty())
  {
    _pcols.resize(_sampler.getNumberOfCols());
    std::iota(_pcols.begin(), _pcols.end(), 0);
  }

  if (_do_tuning && _tuning_algorithm == "none")
    paramError("tuning_algorithm",
               "No tuning algorithm is selected for the hyper parameter optimization!");

  std::vector<std::string> tune_parameters(getParam<std::vector<std::string>>("tune_parameters"));
  // Error Checking
  if (isParamValid("tuning_min") &&
      (getParam<std::vector<Real>>("tuning_min").size() != tune_parameters.size()))
    mooseError("tuning_min size does not match tune_parameters");
  if (isParamValid("tuning_max") &&
      (getParam<std::vector<Real>>("tuning_max").size() != tune_parameters.size()))
    mooseError("tuning_max size does not match tune_parameters");

  std::vector<Real> lower_bounds, upper_bounds;
  if (isParamValid("tuning_min"))
    lower_bounds = getParam<std::vector<Real>>("tuning_min");
  if (isParamValid("tuning_max"))
    upper_bounds = getParam<std::vector<Real>>("tuning_max");

  _gp_handler.initialize(
      getCovarianceFunctionByName(parameters.get<UserObjectName>("covariance_function")),
      tune_parameters,
      lower_bounds,
      upper_bounds);
}

void
GaussianProcessTrainer::preTrain()
{
  _training_params.setZero(_sampler.getNumberOfRows(), _n_params);
  _training_data.setZero(_sampler.getNumberOfRows(), 1);
}

void
GaussianProcessTrainer::train()
{
  unsigned int d = 0;
  for (const auto & val : _pvals)
    _training_params(_row, d++) = *val;
  for (const auto & col : _pcols)
    _training_params(_row, d++) = _sampler_row[col];

  // Loading result data from response reporter
  _training_data(_row, 0) = _rval;
}

void
GaussianProcessTrainer::postTrain()
{
  for (unsigned int ii = 0; ii < _training_params.rows(); ++ii)
  {
    for (unsigned int jj = 0; jj < _training_params.cols(); ++jj)
      gatherSum(_training_params(ii, jj));
    gatherSum(_training_data(ii, 0));
  }

  // Standardize (center and scale) training params
  if (_standardize_params)
    _gp_handler.standardizeParameters(_training_params);
  // if not standardizing data set mean=0, std=1 for use in surrogate
  else
    _gp_handler.paramStandardizer().set(0, 1, _n_params);

  // Standardize (center and scale) training data
  if (_standardize_data)
    _gp_handler.standardizeData(_training_data);
  // if not standardizing data set mean=0, std=1 for use in surrogate
  else
    _gp_handler.dataStandardizer().set(0, 1, _n_params);

  // Setup the covariance
  _gp_handler.setupCovarianceMatrix(
      _training_params,
      _training_data,
      _tuning_algorithm,
      isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
      isParamValid("show_tao") ? getParam<bool>("show_tao") : false);
}

(modules/stochastic_tools/src/surrogates/GaussianProcess.C)

// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html

#include "GaussianProcess.h"
#include "Sampler.h"

#include "CovarianceFunctionBase.h"

registerMooseObject("StochasticToolsApp", GaussianProcess);

InputParameters
GaussianProcess::validParams()
{
  InputParameters params = SurrogateModel::validParams();
  params.addClassDescription("Computes and evaluates Gaussian Process surrogate model.");
  return params;
}

GaussianProcess::GaussianProcess(const InputParameters & parameters)
  : SurrogateModel(parameters),
    CovarianceInterface(parameters),
    _gp_handler(
        isParamValid("trainer")
            ? dynamic_cast<GaussianProcessTrainer &>(getSurrogateTrainer("trainer")).gpHandler()
            : setModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
    _training_params(getModelData<RealEigenMatrix>("_training_params"))
{
}

void
GaussianProcess::setupCovariance(UserObjectName covar_name)
{
  if (_gp_handler.getCovarFunctionPtr() != nullptr)
    ::mooseError("Attempting to redefine covariance function using setupCovariance.");
  _gp_handler.linkCovarianceFunction(getCovarianceFunctionByName(covar_name));
}

Real
GaussianProcess::evaluate(const std::vector<Real> & x) const
{
  // Overlaod for evaluate to maintain general compatibility. Only returns mean
  Real dummy = 0;
  return this->evaluate(x, dummy);
}

Real
GaussianProcess::evaluate(const std::vector<Real> & x, Real & std_dev) const
{

  unsigned int _n_params = _training_params.cols();
  unsigned int _num_tests = 1;

  mooseAssert(x.size() == _n_params,
              "Number of parameters provided for evaluation does not match number of parameters "
              "used for training.");

  RealEigenMatrix test_points(_num_tests, _n_params);
  for (unsigned int ii = 0; ii < _n_params; ++ii)
    test_points(0, ii) = x[ii];

  _gp_handler.getParamStandardizer().getStandardized(test_points);

  RealEigenMatrix K_train_test(_training_params.rows(), test_points.rows());
  _gp_handler.getCovarFunction().computeCovarianceMatrix(
      K_train_test, _training_params, test_points, false);
  RealEigenMatrix K_test(test_points.rows(), test_points.rows());
  _gp_handler.getCovarFunction().computeCovarianceMatrix(K_test, test_points, test_points, true);

  // Compute the predicted mean value (centered)
  RealEigenMatrix pred_value = (K_train_test.transpose() * _gp_handler.getKResultsSolve());
  // De-center/scale the value and store for return
  _gp_handler.getDataStandardizer().getDestandardized(pred_value);

  RealEigenMatrix pred_var =
      K_test - (K_train_test.transpose() * _gp_handler.getKCholeskyDecomp().solve(K_train_test));

  // Vairance computed, take sqrt for standard deviation, scale up by training data std and store
  RealEigenMatrix std_dev_mat = pred_var.array().sqrt();
  _gp_handler.getDataStandardizer().getDescaled(std_dev_mat);
  std_dev = std_dev_mat(0, 0);

  return pred_value(0, 0);
}

(modules/stochastic_tools/src/surrogates/GaussianProcessTrainer.C)

// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html

#include "GaussianProcessTrainer.h"
#include "Sampler.h"
#include "CartesianProduct.h"

#include <petsctao.h>
#include <petscdmda.h>

#include "libmesh/petsc_vector.h"
#include "libmesh/petsc_matrix.h"

#include <math.h>

registerMooseObject("StochasticToolsApp", GaussianProcessTrainer);

InputParameters
GaussianProcessTrainer::validParams()
{
  InputParameters params = SurrogateTrainer::validParams();
  params.addClassDescription(
      "Provides data preperation and training for a Gaussian Process surrogate model.");
  params.addRequiredParam<ReporterName>(
      "response", "Reporter value of response results, can be vpp with <vpp_name>/<vector_name>.");
  params.addParam<std::vector<ReporterName>>(
      "predictors",
      std::vector<ReporterName>(),
      "Reporter values used as the independent random variables, If 'predictors' and "
      "'predictor_cols' are both empty, all sampler columns are used.");
  params.addParam<std::vector<unsigned int>>(
      "predictor_cols",
      std::vector<unsigned int>(),
      "Sampler columns used as the independent random variables, If 'predictors' and "
      "'predictor_cols' are both empty, all sampler columns are used.");
  params.addRequiredParam<UserObjectName>("covariance_function", "Name of covariance function.");
  params.addParam<bool>(
      "standardize_params", true, "Standardize (center and scale) training parameters (x values)");
  params.addParam<bool>(
      "standardize_data", true, "Standardize (center and scale) training data (y values)");
  // Already preparing to use ADAM here
  MooseEnum tuning_type("tao none", "none");
  params.addParam<MooseEnum>(
      "tuning_algorithm", tuning_type, "Hyper parameter optimizaton algorithm");
  params.addParam<std::string>("tao_options",
                               "Command line options for PETSc/TAO hyperparameter optimization");
  params.addParam<bool>("show_tao", "Switch to show TAO solver results");
  params.addParam<std::vector<std::string>>("tune_parameters",
                                            "Select hyperparameters to be tuned");
  params.addParam<std::vector<Real>>("tuning_min", "Minimum allowable tuning value");
  params.addParam<std::vector<Real>>("tuning_max", "Maximum allowable tuning value");
  return params;
}

GaussianProcessTrainer::GaussianProcessTrainer(const InputParameters & parameters)
  : SurrogateTrainer(parameters),
    CovarianceInterface(parameters),
    _gp_handler(declareModelData<StochasticTools::GaussianProcessHandler>("_gp_handler")),
    _training_params(declareModelData<RealEigenMatrix>("_training_params")),
    _standardize_params(getParam<bool>("standardize_params")),
    _standardize_data(getParam<bool>("standardize_data")),
    _do_tuning(isParamValid("tune_parameters")),
    _tuning_algorithm(getParam<MooseEnum>("tuning_algorithm")),
    _sampler_row(getSamplerData()),
    _rval(getTrainingData<Real>(getParam<ReporterName>("response"))),
    _pvals(getParam<std::vector<ReporterName>>("predictors").size()),
    _pcols(getParam<std::vector<unsigned int>>("predictor_cols")),
    _n_params((_pvals.empty() && _pcols.empty()) ? _sampler.getNumberOfCols()
                                                 : (_pvals.size() + _pcols.size()))
{
  const auto & pnames = getParam<std::vector<ReporterName>>("predictors");
  for (unsigned int i = 0; i < pnames.size(); ++i)
    _pvals[i] = &getTrainingData<Real>(pnames[i]);

  // If predictors and predictor_cols are empty, use all sampler columns
  if (_pvals.empty() && _pcols.empty())
  {
    _pcols.resize(_sampler.getNumberOfCols());
    std::iota(_pcols.begin(), _pcols.end(), 0);
  }

  if (_do_tuning && _tuning_algorithm == "none")
    paramError("tuning_algorithm",
               "No tuning algorithm is selected for the hyper parameter optimization!");

  std::vector<std::string> tune_parameters(getParam<std::vector<std::string>>("tune_parameters"));
  // Error Checking
  if (isParamValid("tuning_min") &&
      (getParam<std::vector<Real>>("tuning_min").size() != tune_parameters.size()))
    mooseError("tuning_min size does not match tune_parameters");
  if (isParamValid("tuning_max") &&
      (getParam<std::vector<Real>>("tuning_max").size() != tune_parameters.size()))
    mooseError("tuning_max size does not match tune_parameters");

  std::vector<Real> lower_bounds, upper_bounds;
  if (isParamValid("tuning_min"))
    lower_bounds = getParam<std::vector<Real>>("tuning_min");
  if (isParamValid("tuning_max"))
    upper_bounds = getParam<std::vector<Real>>("tuning_max");

  _gp_handler.initialize(
      getCovarianceFunctionByName(parameters.get<UserObjectName>("covariance_function")),
      tune_parameters,
      lower_bounds,
      upper_bounds);
}

void
GaussianProcessTrainer::preTrain()
{
  _training_params.setZero(_sampler.getNumberOfRows(), _n_params);
  _training_data.setZero(_sampler.getNumberOfRows(), 1);
}

void
GaussianProcessTrainer::train()
{
  unsigned int d = 0;
  for (const auto & val : _pvals)
    _training_params(_row, d++) = *val;
  for (const auto & col : _pcols)
    _training_params(_row, d++) = _sampler_row[col];

  // Loading result data from response reporter
  _training_data(_row, 0) = _rval;
}

void
GaussianProcessTrainer::postTrain()
{
  for (unsigned int ii = 0; ii < _training_params.rows(); ++ii)
  {
    for (unsigned int jj = 0; jj < _training_params.cols(); ++jj)
      gatherSum(_training_params(ii, jj));
    gatherSum(_training_data(ii, 0));
  }

  // Standardize (center and scale) training params
  if (_standardize_params)
    _gp_handler.standardizeParameters(_training_params);
  // if not standardizing data set mean=0, std=1 for use in surrogate
  else
    _gp_handler.paramStandardizer().set(0, 1, _n_params);

  // Standardize (center and scale) training data
  if (_standardize_data)
    _gp_handler.standardizeData(_training_data);
  // if not standardizing data set mean=0, std=1 for use in surrogate
  else
    _gp_handler.dataStandardizer().set(0, 1, _n_params);

  // Setup the covariance
  _gp_handler.setupCovarianceMatrix(
      _training_params,
      _training_data,
      _tuning_algorithm,
      isParamValid("tao_options") ? getParam<std::string>("tao_options") : "",
      isParamValid("show_tao") ? getParam<bool>("show_tao") : false);
}

(modules/stochastic_tools/src/utils/GaussianProcessHandler.C)

// This file is part of the MOOSE framework
// https://www.mooseframework.org
//
// All rights reserved, see COPYRIGHT for full restrictions
// https://github.com/idaholab/moose/blob/master/COPYRIGHT
//
// Licensed under LGPL 2.1, please see LICENSE for details
// https://www.gnu.org/licenses/lgpl-2.1.html

#include "GaussianProcessHandler.h"
#include "FEProblemBase.h"

#include <petsctao.h>
#include <petscdmda.h>

#include "libmesh/petsc_vector.h"
#include "libmesh/petsc_matrix.h"

#include <math.h>

namespace StochasticTools
{
GaussianProcessHandler::GaussianProcessHandler() : _tao_comm(MPI_COMM_SELF) {}

void
GaussianProcessHandler::initialize(CovarianceFunctionBase * covariance_function,
                                   const std::vector<std::string> params_to_tune,
                                   std::vector<Real> min,
                                   std::vector<Real> max)
{
  linkCovarianceFunction(covariance_function);
  generateTuningMap(params_to_tune, min, max);
}

void
GaussianProcessHandler::linkCovarianceFunction(CovarianceFunctionBase * covariance_function)
{
  _covariance_function = covariance_function;
  _covar_type = _covariance_function->type();
}

void
GaussianProcessHandler::setupCovarianceMatrix(const RealEigenMatrix & training_params,
                                              const RealEigenMatrix & training_data,
                                              MooseEnum opt_type,
                                              std::string tao_options,
                                              bool show_tao)
{
  _K.resize(training_params.rows(), training_params.rows());

  // This already accounts for future addition of an ADAM optimizes
  if (opt_type == "tao")
    if (tuneHyperParamsTAO(training_params, training_data, tao_options, show_tao))
      ::mooseError("PETSc/TAO error in hyperparameter tuning.");

  _covariance_function->computeCovarianceMatrix(_K, training_params, training_params, true);

  // Compute the Cholesly decomposition and inverse action of the covariance matrix
  setupStoredMatrices(training_data);

  _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
}

void
GaussianProcessHandler::setupStoredMatrices(const RealEigenMatrix & input)
{
  _K_cho_decomp = _K.llt();
  _K_results_solve = _K_cho_decomp.solve(input);
}

void
GaussianProcessHandler::generateTuningMap(const std::vector<std::string> params_to_tune,
                                          std::vector<Real> min_vector,
                                          std::vector<Real> max_vector)
{
  _num_tunable = 0;

  bool upper_bounds_specified = false;
  bool lower_bounds_specified = false;
  if (min_vector.size())
    lower_bounds_specified = true;
  if (max_vector.size())
    upper_bounds_specified = true;

  for (unsigned int param_i = 0; param_i < params_to_tune.size(); ++param_i)
  {
    const auto & hp = params_to_tune[param_i];
    if (_covariance_function->isTunable(hp))
    {
      unsigned int size;
      Real min;
      Real max;
      // Get size and default min/max
      _covariance_function->getTuningData(hp, size, min, max);
      // Check for overridden min/max
      min = lower_bounds_specified ? min_vector[param_i] : min;
      max = upper_bounds_specified ? max_vector[param_i] : max;
      // Save data in tuple
      _tuning_data[hp] = std::make_tuple(_num_tunable, size, min, max);
      _num_tunable += size;
    }
  }
}

void
GaussianProcessHandler::standardizeParameters(RealEigenMatrix & data, bool keep_moments)
{
  if (!keep_moments)
    _param_standardizer.computeSet(data);
  _param_standardizer.getStandardized(data);
}

void
GaussianProcessHandler::standardizeData(RealEigenMatrix & data, bool keep_moments)
{
  if (!keep_moments)
    _data_standardizer.computeSet(data);
  _data_standardizer.getStandardized(data);
}

PetscErrorCode
GaussianProcessHandler::tuneHyperParamsTAO(const RealEigenMatrix & training_params,
                                           const RealEigenMatrix & training_data,
                                           std::string tao_options,
                                           bool show_tao)
{
  PetscErrorCode ierr;
  Tao tao;

  _training_params = &training_params;
  _training_data = &training_data;

  // Setup Tao optimization problem
  ierr = TaoCreate(_tao_comm.get(), &tao);
  CHKERRQ(ierr);
  ierr = PetscOptionsSetValue(NULL, "-tao_type", "bncg");
  CHKERRQ(ierr);
  ierr = PetscOptionsInsertString(NULL, tao_options.c_str());
  CHKERRQ(ierr);
  ierr = TaoSetFromOptions(tao);
  CHKERRQ(ierr);

  // Define petsc vetor to hold tunalbe hyper-params
  libMesh::PetscVector<Number> theta(_tao_comm, _num_tunable);
  ierr = formInitialGuessTAO(theta.vec());
  CHKERRQ(ierr);
  ierr = TaoSetInitialVector(tao, theta.vec());
  CHKERRQ(ierr);

  // Get Hyperparameter bounds.
  libMesh::PetscVector<Number> lower(_tao_comm, _num_tunable);
  libMesh::PetscVector<Number> upper(_tao_comm, _num_tunable);
  buildHyperParamBoundsTAO(lower, upper);
  CHKERRQ(ierr);
  ierr = TaoSetVariableBounds(tao, lower.vec(), upper.vec());
  CHKERRQ(ierr);

  // Set Objective and Graident Callback ierr =
  TaoSetObjectiveAndGradientRoutine(tao, formFunctionGradientWrapper, (void *)this);
  CHKERRQ(ierr);

  // Solve
  ierr = TaoSolve(tao);
  CHKERRQ(ierr);
  //
  if (show_tao)
  {
    ierr = TaoView(tao, PETSC_VIEWER_STDOUT_WORLD);
    theta.print();
  }

  _covariance_function->loadHyperParamMap(_hyperparam_map, _hyperparam_vec_map);

  ierr = TaoDestroy(&tao);
  CHKERRQ(ierr);

  return 0;
}

PetscErrorCode
GaussianProcessHandler::formInitialGuessTAO(Vec theta_vec)
{
  libMesh::PetscVector<Number> theta(theta_vec, _tao_comm);
  _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
  mapToPetscVec(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
  return 0;
}

void
GaussianProcessHandler::buildHyperParamBoundsTAO(libMesh::PetscVector<Number> & theta_l,
                                                 libMesh::PetscVector<Number> & theta_u) const
{
  for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
  {
    for (unsigned int ii = 0; ii < std::get<1>(iter->second); ++ii)
    {
      theta_l.set(std::get<0>(iter->second) + ii, std::get<2>(iter->second));
      theta_u.set(std::get<0>(iter->second) + ii, std::get<3>(iter->second));
    }
  }
}

PetscErrorCode
GaussianProcessHandler::formFunctionGradientWrapper(
    Tao tao, Vec theta_vec, PetscReal * f, Vec grad_vec, void * ptr)
{
  GaussianProcessHandler * GP_ptr = (GaussianProcessHandler *)ptr;
  GP_ptr->formFunctionGradient(tao, theta_vec, f, grad_vec);
  return 0;
}

void
GaussianProcessHandler::formFunctionGradient(Tao /*tao*/,
                                             Vec theta_vec,
                                             PetscReal * f,
                                             Vec grad_vec)
{
  libMesh::PetscVector<Number> theta(theta_vec, _tao_comm);
  libMesh::PetscVector<Number> grad(grad_vec, _tao_comm);

  petscVecToMap(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
  _covariance_function->loadHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
  _covariance_function->computeCovarianceMatrix(_K, *_training_params, *_training_params, true);
  setupStoredMatrices(*_training_data);

  // testing auto tuning
  RealEigenMatrix dKdhp(_training_params->rows(), _training_params->rows());
  RealEigenMatrix alpha = _K_results_solve * _K_results_solve.transpose();
  for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
  {
    std::string hyper_param_name = iter->first;
    for (unsigned int ii = 0; ii < std::get<1>(iter->second); ++ii)
    {
      _covariance_function->computedKdhyper(dKdhp, *_training_params, hyper_param_name, ii);
      RealEigenMatrix tmp = alpha * dKdhp - _K_cho_decomp.solve(dKdhp);
      grad.set(std::get<0>(iter->second) + ii, -tmp.trace() / 2.0);
    }
  }
  //
  Real log_likelihood = 0;
  log_likelihood += -(_training_data->transpose() * _K_results_solve)(0, 0);
  log_likelihood += -std::log(_K.determinant());
  log_likelihood += -_training_data->rows() * std::log(2 * M_PI);
  log_likelihood = -log_likelihood / 2;
  *f = log_likelihood;
}

void
GaussianProcessHandler::mapToPetscVec(
    const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
        tuning_data,
    const std::unordered_map<std::string, Real> & scalar_map,
    const std::unordered_map<std::string, std::vector<Real>> & vector_map,
    libMesh::PetscVector<Number> & petsc_vec)
{
  for (auto iter = tuning_data.begin(); iter != tuning_data.end(); ++iter)
  {
    std::string param_name = iter->first;
    const auto scalar_it = scalar_map.find(param_name);
    if (scalar_it != scalar_map.end())
      petsc_vec.set(std::get<0>(iter->second), scalar_it->second);
    else
    {
      const auto vector_it = vector_map.find(param_name);
      if (vector_it != vector_map.end())
        for (unsigned int ii = 0; ii < std::get<1>(iter->second); ++ii)
          petsc_vec.set(std::get<0>(iter->second) + ii, (vector_it->second)[ii]);
    }
  }
}

void
GaussianProcessHandler::petscVecToMap(
    const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
        tuning_data,
    std::unordered_map<std::string, Real> & scalar_map,
    std::unordered_map<std::string, std::vector<Real>> & vector_map,
    const libMesh::PetscVector<Number> & petsc_vec)
{
  for (auto iter = tuning_data.begin(); iter != tuning_data.end(); ++iter)
  {
    std::string param_name = iter->first;
    if (scalar_map.find(param_name) != scalar_map.end())
      scalar_map[param_name] = petsc_vec(std::get<0>(iter->second));
    else if (vector_map.find(param_name) != vector_map.end())
      for (unsigned int ii = 0; ii < std::get<1>(iter->second); ++ii)
        vector_map[param_name][ii] = petsc_vec(std::get<0>(iter->second) + ii);
  }
}

} // StochasticTools namespace

template <>
void
dataStore(std::ostream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
{
  // Store the L matrix as opposed to the full matrix to avoid compounding
  // roundoff error and decomposition error
  RealEigenMatrix L(decomp.matrixL());
  dataStore(stream, L, context);
}

template <>
void
dataLoad(std::istream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
{
  RealEigenMatrix L;
  dataLoad(stream, L, context);
  decomp.compute(L * L.transpose());
}

template <>
void
dataStore(std::ostream & stream, StochasticTools::GaussianProcessHandler & gp_utils, void * context)
{
  dataStore(stream, gp_utils.hyperparamMap(), context);
  dataStore(stream, gp_utils.hyperparamVectorMap(), context);
  dataStore(stream, gp_utils.covarType(), context);
  dataStore(stream, gp_utils.K(), context);
  dataStore(stream, gp_utils.KResultsSolve(), context);
  dataStore(stream, gp_utils.KCholeskyDecomp(), context);
  dataStore(stream, gp_utils.paramStandardizer(), context);
  dataStore(stream, gp_utils.dataStandardizer(), context);
}

template <>
void
dataLoad(std::istream & stream, StochasticTools::GaussianProcessHandler & gp_utils, void * context)
{
  dataLoad(stream, gp_utils.hyperparamMap(), context);
  dataLoad(stream, gp_utils.hyperparamVectorMap(), context);
  dataLoad(stream, gp_utils.covarType(), context);
  dataLoad(stream, gp_utils.K(), context);
  dataLoad(stream, gp_utils.KResultsSolve(), context);
  dataLoad(stream, gp_utils.KCholeskyDecomp(), context);
  dataLoad(stream, gp_utils.paramStandardizer(), context);
  dataLoad(stream, gp_utils.dataStandardizer(), context);
}

Overview

Install MOOSE

New Users

Examples and Tutorials

Application Usage

Physics and Syntax

Application Development

Framework Development

MOOSEDocs

Infrastructure

Questions

Information and Tools

INL Applications and Remote Access

GaussianProcessHandler

Overview

Initializing

Creating a covariance matrix

Optimizing hyper parameters