https://mooseframework.inl.gov
AdaptiveImportanceSampler.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
12 #include "Distribution.h"
13 #include "Normal.h"
14 #include "Uniform.h"
15 
16 registerMooseObjectAliased("StochasticToolsApp", AdaptiveImportanceSampler, "AdaptiveImportance");
17 
20 {
22  params.addClassDescription("Adaptive Importance Sampler.");
23  params.addRequiredParam<std::vector<DistributionName>>(
24  "distributions",
25  "The distribution names to be sampled, the number of distributions provided defines the "
26  "number of columns per matrix.");
27  params.addRequiredParam<ReporterName>("inputs_reporter", "Reporter with input parameters.");
28  params.addRequiredParam<std::vector<Real>>("proposal_std",
29  "Standard deviations of the proposal distributions");
30  params.addRequiredParam<Real>("output_limit", "Limiting values of the VPPs");
31  params.addRequiredParam<std::vector<Real>>(
32  "initial_values", "Initial input values to get the importance sampler started");
33  params.addRequiredRangeCheckedParam<int>(
34  "num_samples_train",
35  "num_samples_train>0",
36  "Number of samples to learn the importance distribution");
37  params.addRequiredRangeCheckedParam<int>(
38  "num_importance_sampling_steps",
39  "num_importance_sampling_steps>0",
40  "Number of importance sampling steps (after the importance distribution has been trained)");
41  params.addRequiredParam<Real>(
42  "std_factor", "Factor to be multiplied to the standard deviation of the importance samples");
43  params.addParam<bool>("use_absolute_value", false, "Use absolute value of the sub app output");
44  params.addParam<unsigned int>(
45  "num_random_seeds",
46  100000,
47  "Initialize a certain number of random seeds. Change from the default only if you have to.");
48  params.addParam<ReporterName>("flag_sample",
49  "Flag samples if the surrogate prediction was inadequate.");
50  return params;
51 }
52 
54  : Sampler(parameters),
55  TransientInterface(this),
56  _proposal_std(getParam<std::vector<Real>>("proposal_std")),
57  _initial_values(getParam<std::vector<Real>>("initial_values")),
58  _output_limit(getParam<Real>("output_limit")),
59  _num_samples_train(getParam<int>("num_samples_train")),
60  _num_importance_sampling_steps(getParam<int>("num_importance_sampling_steps")),
61  _std_factor(getParam<Real>("std_factor")),
62  _use_absolute_value(getParam<bool>("use_absolute_value")),
63  _num_random_seeds(getParam<unsigned int>("num_random_seeds")),
64  _is_sampling_completed(false),
65  _inputs(getReporterValue<std::vector<std::vector<Real>>>("inputs_reporter")),
66  _retraining_steps(0),
67  _gp_flag(isParamValid("flag_sample") ? &getReporterValue<std::vector<bool>>("flag_sample")
68  : nullptr)
69 {
70  // Filling the `distributions` vector with the user-provided distributions.
71  for (const DistributionName & name : getParam<std::vector<DistributionName>>("distributions"))
73 
74  /* Adaptive Importance Sampling (AdaptiveImportanceSampler) relies on a Markov Chain Monte Carlo
75  (MCMC) algorithm. As such, in MOOSE, any use of MCMC algorithms requires that the `num_steps`
76  parameter in the main App's executioner would control the total number of samples. Therefore,
77  the `num_rows` parameter typically used by exisiting non-MCMC samplers to set the total number
78  of samples has no use here and is fixed to 1.*/
79  setNumberOfRows(1);
80 
81  // Setting the number of columns in the sampler matrix (equal to the number of distributions).
83 
84  /* `inputs_sto` is a member variable that aids in forming the importance distribution.
85  One dimension of this variable is equal to the number of distributions. The other dimension
86  of the variable, at the last step, is equal to the number of samples the user desires.*/
87  _inputs_sto.resize(_distributions.size());
88 
89  // Mapping all the input distributions to a standard normal space
90  for (unsigned int i = 0; i < _distributions.size(); ++i)
91  _inputs_sto[i].push_back(Normal::quantile(_distributions[i]->cdf(_initial_values[i]), 0, 1));
92 
93  /* `prev_value` is a member variable for tracking the previously accepted samples in the
94  MCMC algorithm and proposing the next sample.*/
95  _prev_value.resize(_distributions.size());
96 
97  // `check_step` is a member variable for ensuring that the MCMC algorithm proceeds in a sequential
98  // fashion.
99  _check_step = 0;
100 
101  // Storage for means of input values for proposing the next sample
102  _mean_sto.resize(_distributions.size());
103 
104  // Storage for standard deviations of input values for proposing the next sample
105  _std_sto.resize(_distributions.size());
106 
108 }
109 
110 Real
112 {
113  const bool sample = _t_step > 1 && col_index == 0 && _check_step != _t_step;
114  const bool gp_flag = _gp_flag ? (*_gp_flag)[0] : false;
115 
116  if (sample && _is_sampling_completed)
117  mooseError("Internal bug: the adaptive sampling is supposed to be completed but another sample "
118  "has been requested.");
119 
121  {
122  /* This is the importance distribution training step. Markov Chains are set up
123  to sample from the importance region or the failure region using the Metropolis
124  algorithm. Given that the previous sample resulted in a model failure, the next
125  sample is proposed such that it is very likely to result in a model failure as well.
126  The `initial_values` and `proposal_std` parameters provided by the user affects the
127  formation of the importance distribution. */
128  if (sample && !gp_flag)
129  {
130  for (dof_id_type j = 0; j < _distributions.size(); ++j)
131  _prev_value[j] = Normal::quantile(_distributions[j]->cdf(_inputs[j][0]), 0.0, 1.0);
132  Real acceptance_ratio = 0.0;
133  for (dof_id_type i = 0; i < _distributions.size(); ++i)
134  acceptance_ratio += std::log(Normal::pdf(_prev_value[i], 0.0, 1.0)) -
135  std::log(Normal::pdf(_inputs_sto[i].back(), 0.0, 1.0));
136  if (acceptance_ratio > std::log(getRand(_t_step)))
137  {
138  for (dof_id_type i = 0; i < _distributions.size(); ++i)
139  _inputs_sto[i].push_back(_prev_value[i]);
140  }
141  else
142  {
143  for (dof_id_type i = 0; i < _distributions.size(); ++i)
144  _inputs_sto[i].push_back(_inputs_sto[i].back());
145  }
146  for (dof_id_type i = 0; i < _distributions.size(); ++i)
147  _prev_value[i] =
149  }
150  }
151  else if (sample && !gp_flag)
152  {
153  /* This is the importance sampling step using the importance distribution created
154  in the previous step. Once the importance distribution is known, sampling from
155  it is similar to a regular Monte Carlo sampling. */
156  for (dof_id_type i = 0; i < _distributions.size(); ++i)
157  {
158  if (_t_step == _num_samples_train + 1)
159  {
162  }
163  _prev_value[i] =
165  }
166 
167  // check if we have performed all the importance sampling steps
169  _is_sampling_completed = true;
170  }
171 
172  // When the GP fails, the current time step is 'wasted' and the retraining step doesn't
173  // happen until the next time step. Therefore, keep track of the number of retraining steps
174  // to increase the total number of steps taken.
175  if (sample && gp_flag && _t_step > _num_samples_train)
177 
179  return _distributions[col_index]->quantile(Normal::cdf(_prev_value[col_index], 0.0, 1.0));
180 }
void setNumberOfRows(dof_id_type n_rows)
int _retraining_steps
Number of retraining performed.
void addRequiredRangeCheckedParam(const std::string &name, const std::string &parsed_function, const std::string &doc_string)
int _check_step
Ensure that the MCMC algorithm proceeds in a sequential fashion.
virtual Real cdf(const Real &x) const override
Definition: Normal.C:74
static InputParameters validParams()
void addParam(const std::string &name, const std::initializer_list< typename T::value_type > &value, const std::string &doc_string)
A class used to perform Adaptive Importance Sampling using a Markov Chain Monte Carlo algorithm...
bool _is_sampling_completed
True if the sampling is completed.
std::vector< const Distribution * > _distributions
Storage for distribution objects to be utilized.
virtual Real computeSample(dof_id_type row_index, dof_id_type col_index) override
Return the sample for the given row (the sample index) and column (the parameter index) ...
std::vector< std::vector< Real > > _inputs_sto
Storage for previously accepted samples by the decision reporter system.
Real computeMean(const std::vector< Real > &data, const unsigned int &start_index)
compute the mean of a data vector by only considering values from a specific index.
Real getRand(unsigned int index=0)
virtual const std::string & name() const
const std::vector< std::vector< Real > > & _inputs
Storage for the inputs vector obtained from the reporter.
void addRequiredParam(const std::string &name, const std::string &doc_string)
virtual Real pdf(const Real &x) const override
Definition: Normal.C:68
std::vector< Real > _std_sto
Storage for standard deviations of input values for proposing the next sample.
AdaptiveImportanceSampler(const InputParameters &parameters)
const int & _num_samples_train
Number of samples to train the importance sampler.
const unsigned int & _num_random_seeds
Initialize a certain number of random seeds. Change from the default only if you have to...
const T & getParam(const std::string &name) const
const int & _num_importance_sampling_steps
Number of importance sampling steps (after the importance distribution has been trained) ...
registerMooseObjectAliased("StochasticToolsApp", AdaptiveImportanceSampler, "AdaptiveImportance")
Real computeSTD(const std::vector< Real > &data, const unsigned int &start_index)
compute the standard deviation of a data vector by only considering values from a specific index...
const Real & _std_factor
Factor to be multiplied to the standard deviation of the proposal distribution.
const Distribution & getDistributionByName(const DistributionName &name) const
std::vector< Real > _mean_sto
Storage for means of input values for proposing the next sample.
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
void setNumberOfCols(dof_id_type n_cols)
const std::vector< bool > *const _gp_flag
Indicate whether GP prediction is good or bad to influence next proposed sample.
void mooseError(Args &&... args) const
void addClassDescription(const std::string &doc_string)
const std::vector< Real > & _initial_values
Initial values values vector to start the importance sampler.
static const std::complex< double > j(0, 1)
Complex number "j" (also known as "i")
const std::vector< Real > & _proposal_std
The proposal distribution standard deviations.
static InputParameters validParams()
virtual Real quantile(const Real &p) const override
Definition: Normal.C:80
void ErrorVector unsigned int
uint8_t dof_id_type
std::vector< Real > _prev_value
For proposing the next sample in the MCMC algorithm.
void setNumberOfRandomSeeds(std::size_t number)