https://mooseframework.inl.gov
AdaptiveImportanceSampler.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
12 #include "Distribution.h"
13 #include "Normal.h"
14 #include "Uniform.h"
15 
16 registerMooseObjectAliased("StochasticToolsApp", AdaptiveImportanceSampler, "AdaptiveImportance");
17 
20 {
22  params.addClassDescription("Adaptive Importance Sampler.");
23  params.addRequiredParam<std::vector<DistributionName>>(
24  "distributions",
25  "The distribution names to be sampled, the number of distributions provided defines the "
26  "number of columns per matrix.");
27  params.addRequiredParam<ReporterName>("inputs_reporter", "Reporter with input parameters.");
28  params.addRequiredParam<std::vector<Real>>("proposal_std",
29  "Standard deviations of the proposal distributions");
30  params.addRequiredParam<Real>("output_limit", "Limiting values of the VPPs");
31  params.addRequiredParam<std::vector<Real>>(
32  "initial_values", "Initial input values to get the importance sampler started");
33  params.addRequiredRangeCheckedParam<int>(
34  "num_samples_train",
35  "num_samples_train>0",
36  "Number of samples to learn the importance distribution");
37  params.addRequiredRangeCheckedParam<int>(
38  "num_importance_sampling_steps",
39  "num_importance_sampling_steps>0",
40  "Number of importance sampling steps (after the importance distribution has been trained)");
41  params.addRequiredParam<Real>(
42  "std_factor", "Factor to be multiplied to the standard deviation of the importance samples");
43  params.addParam<bool>("use_absolute_value", false, "Use absolute value of the sub app output");
44  params.addParam<unsigned int>(
45  "num_random_seeds",
46  100000,
47  "Initialize a certain number of random seeds. Change from the default only if you have to.");
48  params.addParam<ReporterName>("flag_sample",
49  "Flag samples if the surrogate prediction was inadequate.");
50  return params;
51 }
52 
54  : Sampler(parameters),
55  TransientInterface(this),
56  _proposal_std(getParam<std::vector<Real>>("proposal_std")),
57  _initial_values(getParam<std::vector<Real>>("initial_values")),
58  _output_limit(getParam<Real>("output_limit")),
59  _num_samples_train(getParam<int>("num_samples_train")),
60  _num_importance_sampling_steps(getParam<int>("num_importance_sampling_steps")),
61  _std_factor(getParam<Real>("std_factor")),
62  _use_absolute_value(getParam<bool>("use_absolute_value")),
63  _num_random_seeds(getParam<unsigned int>("num_random_seeds")),
64  _is_sampling_completed(false),
65  _inputs(getReporterValue<std::vector<std::vector<Real>>>("inputs_reporter")),
66  _retraining_steps(0),
67  _gp_flag(isParamValid("flag_sample") ? &getReporterValue<std::vector<bool>>("flag_sample")
68  : nullptr)
69 {
70  // Filling the `distributions` vector with the user-provided distributions.
71  for (const DistributionName & name : getParam<std::vector<DistributionName>>("distributions"))
73 
74  /* Adaptive Importance Sampling (AdaptiveImportanceSampler) relies on a Markov Chain Monte Carlo
75  (MCMC) algorithm. As such, in MOOSE, any use of MCMC algorithms requires that the `num_steps`
76  parameter in the main App's executioner would control the total number of samples. Therefore,
77  the `num_rows` parameter typically used by exisiting non-MCMC samplers to set the total number
78  of samples has no use here and is fixed to 1.*/
79  setNumberOfRows(1);
80 
81  // Setting the number of columns in the sampler matrix (equal to the number of distributions).
83 
84  /* `inputs_sto` is a member variable that aids in forming the importance distribution.
85  One dimension of this variable is equal to the number of distributions. The other dimension
86  of the variable, at the last step, is equal to the number of samples the user desires.*/
87  _inputs_sto.resize(_distributions.size());
88 
89  // Mapping all the input distributions to a standard normal space
90  for (unsigned int i = 0; i < _distributions.size(); ++i)
91  _inputs_sto[i].push_back(Normal::quantile(_distributions[i]->cdf(_initial_values[i]), 0, 1));
92 
93  /* `prev_value` is a member variable for tracking the previously accepted samples in the
94  MCMC algorithm and proposing the next sample.*/
95  _prev_value.resize(_distributions.size());
96 
97  // Storage for means of input values for proposing the next sample
98  _mean_sto.resize(_distributions.size());
99 
100  // Storage for standard deviations of input values for proposing the next sample
101  _std_sto.resize(_distributions.size());
102 
105 }
106 
107 void
109 {
110  const bool sample = _t_step > 1;
111  const bool gp_flag = _gp_flag ? (*_gp_flag)[0] : false;
112 
113  if (sample && _is_sampling_completed)
114  mooseError("Internal bug: the adaptive sampling is supposed to be completed but another sample "
115  "has been requested.");
116 
118  {
119  /* This is the importance distribution training step. Markov Chains are set up
120  to sample from the importance region or the failure region using the Metropolis
121  algorithm. Given that the previous sample resulted in a model failure, the next
122  sample is proposed such that it is very likely to result in a model failure as well.
123  The `initial_values` and `proposal_std` parameters provided by the user affects the
124  formation of the importance distribution. */
125  if (sample && !gp_flag)
126  {
127  for (dof_id_type j = 0; j < _distributions.size(); ++j)
128  _prev_value[j] = Normal::quantile(_distributions[j]->cdf(_inputs[j][0]), 0.0, 1.0);
129  Real acceptance_ratio = 0.0;
130  for (dof_id_type i = 0; i < _distributions.size(); ++i)
131  acceptance_ratio += std::log(Normal::pdf(_prev_value[i], 0.0, 1.0)) -
132  std::log(Normal::pdf(_inputs_sto[i].back(), 0.0, 1.0));
133  if (acceptance_ratio > std::log(getRand(0, _t_step)))
134  {
135  for (dof_id_type i = 0; i < _distributions.size(); ++i)
136  _inputs_sto[i].push_back(_prev_value[i]);
137  }
138  else
139  {
140  for (dof_id_type i = 0; i < _distributions.size(); ++i)
141  _inputs_sto[i].push_back(_inputs_sto[i].back());
142  }
143  for (dof_id_type i = 0; i < _distributions.size(); ++i)
144  _prev_value[i] =
145  Normal::quantile(getRand(i + 1, _t_step), _inputs_sto[i].back(), _proposal_std[i]);
146  }
147  }
148  else if (sample && !gp_flag)
149  {
150  /* This is the importance sampling step using the importance distribution created
151  in the previous step. Once the importance distribution is known, sampling from
152  it is similar to a regular Monte Carlo sampling. */
153  for (dof_id_type i = 0; i < _distributions.size(); ++i)
154  {
155  if (_t_step == _num_samples_train + 1)
156  {
159  }
160  _prev_value[i] =
162  }
163 
164  // check if we have performed all the importance sampling steps
166  _is_sampling_completed = true;
167  }
168 
169  // When the GP fails, the current time step is 'wasted' and the retraining step doesn't
170  // happen until the next time step. Therefore, keep track of the number of retraining steps
171  // to increase the total number of steps taken.
172  if (sample && gp_flag && _t_step > _num_samples_train)
174 }
175 
176 Real
178 {
179  return _distributions[col_index]->quantile(Normal::cdf(_prev_value[col_index], 0.0, 1.0));
180 }
void setNumberOfRows(dof_id_type n_rows)
int _retraining_steps
Number of retraining performed.
void addRequiredRangeCheckedParam(const std::string &name, const std::string &parsed_function, const std::string &doc_string)
virtual Real cdf(const Real &x) const override
Definition: Normal.C:74
static InputParameters validParams()
const T & getParam(const std::string &name) const
void addParam(const std::string &name, const std::initializer_list< typename T::value_type > &value, const std::string &doc_string)
A class used to perform Adaptive Importance Sampling using a Markov Chain Monte Carlo algorithm...
bool _is_sampling_completed
True if the sampling is completed.
std::vector< const Distribution * > _distributions
Storage for distribution objects to be utilized.
virtual Real computeSample(dof_id_type row_index, dof_id_type col_index) override
Return the sample for the given row (the sample index) and column (the parameter index) ...
virtual void executeSetUp() override
std::vector< std::vector< Real > > _inputs_sto
Storage for previously accepted samples by the decision reporter system.
Real computeMean(const std::vector< Real > &data, const unsigned int &start_index)
compute the mean of a data vector by only considering values from a specific index.
const std::vector< std::vector< Real > > & _inputs
Storage for the inputs vector obtained from the reporter.
void addRequiredParam(const std::string &name, const std::string &doc_string)
virtual Real pdf(const Real &x) const override
Definition: Normal.C:68
std::vector< Real > _std_sto
Storage for standard deviations of input values for proposing the next sample.
AdaptiveImportanceSampler(const InputParameters &parameters)
const std::string & name() const
Real getRand(std::size_t n, unsigned int index=0) const
const int & _num_samples_train
Number of samples to train the importance sampler.
const unsigned int & _num_random_seeds
Initialize a certain number of random seeds. Change from the default only if you have to...
const int & _num_importance_sampling_steps
Number of importance sampling steps (after the importance distribution has been trained) ...
registerMooseObjectAliased("StochasticToolsApp", AdaptiveImportanceSampler, "AdaptiveImportance")
Real computeSTD(const std::vector< Real > &data, const unsigned int &start_index)
compute the standard deviation of a data vector by only considering values from a specific index...
const Real & _std_factor
Factor to be multiplied to the standard deviation of the proposal distribution.
const Distribution & getDistributionByName(const DistributionName &name) const
std::vector< Real > _mean_sto
Storage for means of input values for proposing the next sample.
void setAutoAdvanceGenerators(const bool state)
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
void setNumberOfCols(dof_id_type n_cols)
void mooseError(Args &&... args) const
const std::vector< bool > *const _gp_flag
Indicate whether GP prediction is good or bad to influence next proposed sample.
void addClassDescription(const std::string &doc_string)
const std::vector< Real > & _initial_values
Initial values values vector to start the importance sampler.
static const std::complex< double > j(0, 1)
Complex number "j" (also known as "i")
const std::vector< Real > & _proposal_std
The proposal distribution standard deviations.
static InputParameters validParams()
virtual Real quantile(const Real &p) const override
Definition: Normal.C:80
void ErrorVector unsigned int
uint8_t dof_id_type
std::vector< Real > _prev_value
For proposing the next sample in the MCMC algorithm.
void setNumberOfRandomSeeds(std::size_t number)