https://mooseframework.inl.gov
ParallelSubsetSimulation.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
12 #include "Normal.h"
13 #include "Uniform.h"
14 
15 registerMooseObject("StochasticToolsApp", ParallelSubsetSimulation);
16 
19 {
21  params.addClassDescription("Parallel Subset Simulation sampler.");
22  params.addRequiredParam<std::vector<DistributionName>>(
23  "distributions",
24  "The distribution names to be sampled, the number of distributions provided defines the "
25  "number of columns per matrix.");
26  params.addRequiredParam<ReporterName>("output_reporter",
27  "Reporter with results of samples created by the SubApp.");
28  params.addRequiredParam<ReporterName>("inputs_reporter", "Reporter with input parameters.");
29  params.addRangeCheckedParam<Real>("subset_probability",
30  0.1,
31  "subset_probability>0 & subset_probability<=1",
32  "Conditional probability of each subset");
33  params.addRequiredParam<unsigned int>("num_samplessub", "Number of samples per subset");
34  params.addRequiredParam<unsigned int>("num_subsets", "Number of desired subsets");
35  params.addParam<unsigned int>("num_parallel_chains",
36  "Number of Markov chains to run in parallel, default is based on "
37  "the number of processors used.");
38  params.addParam<bool>("use_absolute_value", false, "Use absolute value of the sub app output");
39  params.addParam<unsigned int>(
40  "num_random_seeds",
41  100000,
42  "Initialize a certain number of random seeds. Change from the default only if you have to.");
43  return params;
44 }
45 
47  : Sampler(parameters),
48  _num_samplessub(getParam<unsigned int>("num_samplessub")),
49  _num_subsets(getParam<unsigned int>("num_subsets")),
50  _use_absolute_value(getParam<bool>("use_absolute_value")),
51  _subset_probability(getParam<Real>("subset_probability")),
52  _num_random_seeds(getParam<unsigned int>("num_random_seeds")),
53  _outputs(getReporterValue<std::vector<Real>>("output_reporter")),
54  _inputs(getReporterValue<std::vector<std::vector<Real>>>("inputs_reporter")),
55  _step(getCheckedPointerParam<FEProblemBase *>("_fe_problem_base")->timeStep()),
56  _count_max(std::floor(1 / _subset_probability)),
57  _check_step(0),
58  _subset(0),
59  _is_sampling_completed(false)
60 {
61  // Fixing the number of rows to the number of processors
62  const dof_id_type nchains = isParamValid("num_parallel_chains")
63  ? getParam<unsigned int>("num_parallel_chains")
65  setNumberOfRows(nchains);
66  if ((_num_samplessub / nchains) % _count_max > 0)
67  mooseError("Number of model evaluations per chain per subset (",
68  _num_samplessub / nchains,
69  ") should be a multiple of requested chain length (",
70  _count_max,
71  ").");
72 
73  // Filling the `distributions` vector with the user-provided distributions.
74  for (const DistributionName & name : getParam<std::vector<DistributionName>>("distributions"))
76 
77  // Setting the number of columns in the sampler matrix (equal to the number of distributions).
79 
80  /* `inputs_sto` is a member variable that aids in deciding the next set of samples
81  in the Subset Simulation algorithm by storing the input parameter values*/
82  _inputs_sto.resize(_distributions.size(), std::vector<Real>(_num_samplessub, 0.0));
83  _outputs_sto.resize(_num_samplessub, 0.0);
84 
85  /* `inputs_sorted` is a member variable which also aids in deciding the next set of samples
86  in the Subset Simulation algorithm by storing the sorted input parameter values
87  by their corresponding output values*/
88  _inputs_sorted.resize(_distributions.size());
89 
90  /* `markov_seed` is a member variable to store the seed input values for proposing
91  the next set of Markov chain samples.*/
92  _markov_seed.resize(_distributions.size());
93 
95 }
96 
97 const unsigned int &
99 {
100  return _num_samplessub;
101 }
102 
103 const bool &
105 {
106  return _use_absolute_value;
107 }
108 
109 const Real &
111 {
112  return _subset_probability;
113 }
114 
115 void
117 {
118  if (_step <= 1 || _check_step == _step)
119  return;
120  _check_step = _step;
121 
123  mooseError("Internal bug: the adaptive sampling is supposed to be completed but another sample "
124  "has been requested.");
125 
127  const unsigned int sub_ind = (_step - 1) - (_num_samplessub / getNumberOfRows()) * _subset;
128  const unsigned int offset = sub_ind * getNumberOfRows();
129 
130  // Get and store the accepted samples input across all the procs from the previous step
131  for (dof_id_type j = 0; j < _distributions.size(); ++j)
132  for (dof_id_type ss = 0; ss < getNumberOfRows(); ++ss)
133  _inputs_sto[j][ss + offset] = Normal::quantile(_distributions[j]->cdf(_inputs[j][ss]), 0, 1);
134 
135  // Get the accepted sample outputs across all the procs from the previous step
136  std::vector<Real> tmp =
138  if (mode == Sampler::SampleMode::GLOBAL)
140  else
141  _local_comm.allgather(tmp);
142  for (dof_id_type ss = 0; ss < getNumberOfRows(); ++ss)
143  _outputs_sto[ss + offset] = tmp[ss];
144 
145  // These are the subsequent subsets which use Markov Chain Monte Carlo sampling scheme
146  if (_subset > 0)
147  {
148  // Check whether the subset index has changed
149  if (sub_ind == 0)
150  {
151  // _inputs_sorted contains the input values corresponding to the largest po percentile
152  // output values
155  }
156 
157  // Reinitialize the starting inputs values for the next set of Markov chains
158  if (sub_ind % _count_max == 0)
159  {
160  const unsigned int soffset = (sub_ind / _count_max) * getNumberOfRows();
161  for (dof_id_type j = 0; j < _distributions.size(); ++j)
162  _markov_seed[j].assign(_inputs_sorted[j].begin() + soffset + getLocalRowBegin(),
163  _inputs_sorted[j].begin() + soffset + getLocalRowEnd());
164  }
165  // Otherwise, use the previously accepted input values to propose the next set of input
166  // values
167  else
168  {
169  for (dof_id_type j = 0; j < _distributions.size(); ++j)
170  _markov_seed[j].assign(_inputs_sto[j].begin() + offset + getLocalRowBegin(),
171  _inputs_sto[j].begin() + offset + getLocalRowEnd());
172  }
173  }
174 
175  // check if we have completed the last sample (sub_ind == _num_samplessub /getNumberOfRows() - 1)
176  // of the last subset (_subset == _num_subsets - 1)
177  if (_subset == _num_subsets - 1 && sub_ind == _num_samplessub / getNumberOfRows() - 1)
178  _is_sampling_completed = true;
179 }
180 
181 Real
183 {
184  unsigned int seed_value = _step > 0 ? (_step - 1) * 2 : 0;
185  Real val;
186 
187  if (_subset == 0)
188  val = getRand(seed_value);
189  else
190  {
191  const dof_id_type loc_ind = row_index - getLocalRowBegin();
192  const Real rv = Normal::quantile(getRand(seed_value), _markov_seed[col_index][loc_ind], 1.0);
193  const Real acceptance_ratio = std::log(Normal::pdf(rv, 0, 1)) -
194  std::log(Normal::pdf(_markov_seed[col_index][loc_ind], 0, 1));
195  const Real new_sample = acceptance_ratio > std::log(getRand(seed_value + 1))
196  ? rv
197  : _markov_seed[col_index][loc_ind];
198  val = Normal::cdf(new_sample, 0, 1);
199  }
200 
201  return _distributions[col_index]->quantile(val);
202 }
void setNumberOfRows(dof_id_type n_rows)
void allgather(const T &send_data, std::vector< T, A > &recv_data) const
const std::vector< std::vector< Real > > & _inputs
Reporter value containing input values from decision reporter.
virtual Real cdf(const Real &x) const override
Definition: Normal.C:74
const std::vector< Real > & _outputs
Reporter value containing calculated outputs.
static InputParameters validParams()
void addParam(const std::string &name, const std::initializer_list< typename T::value_type > &value, const std::string &doc_string)
A class used to perform Parallel Subset Simulation Sampling.
std::vector< Distribution const * > _distributions
Storage for distribution objects to be utilized.
std::vector< Real > computeVectorABS(const std::vector< Real > &data)
return the absolute values in a vector.
registerMooseObject("StochasticToolsApp", ParallelSubsetSimulation)
unsigned int _subset
Track the current subset index.
const bool & getUseAbsoluteValue() const
Access use absolute value bool.
virtual Real computeSample(dof_id_type row_index, dof_id_type col_index) override
std::vector< Real > _outputs_sto
Storage for previously accepted sample outputs across all the subsets.
dof_id_type getLocalRowBegin() const
int _check_step
Ensure that the MCMC algorithm proceeds in a sequential fashion.
const Parallel::Communicator & _communicator
const unsigned int & _num_random_seeds
Initialize a certain number of random seeds. Change from the default only if you have to...
std::vector< std::vector< Real > > _markov_seed
Mean input vector for the next proposed sample inputs across several processors.
Real getRand(unsigned int index=0)
virtual const std::string & name() const
void addRequiredParam(const std::string &name, const std::string &doc_string)
virtual Real pdf(const Real &x) const override
Definition: Normal.C:68
std::vector< std::vector< Real > > _inputs_sorted
Store the sorted input samples according to their corresponding outputs.
bool isParamValid(const std::string &name) const
const unsigned int & _num_subsets
Number of subsets.
bool _is_sampling_completed
True if the sampling is completed.
processor_id_type n_processors() const
const int & _step
Track the current step of the main App.
const unsigned int & getNumSamplesSub() const
Access the number samples per subset.
const bool & _use_absolute_value
Absolute value of the model result. Use this when failure is defined as a non-exceedance rather than ...
const T & getParam(const std::string &name) const
ParallelSubsetSimulation(const InputParameters &parameters)
const Real & getSubsetProbability() const
Access the subset probability.
dof_id_type getLocalRowEnd() const
libMesh::Parallel::Communicator _local_comm
const dof_id_type _min_procs_per_row
const unsigned int _count_max
Maximum length of markov chains based on subset probability.
std::vector< std::vector< Real > > _inputs_sto
Storage for the previously accepted sample inputs across all the subsets.
dof_id_type getNumberOfRows() const
const Distribution & getDistributionByName(const DistributionName &name) const
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
void setNumberOfCols(dof_id_type n_cols)
std::vector< std::vector< Real > > sortInput(const std::vector< std::vector< Real >> &inputs, const std::vector< Real > &outputs, const unsigned int samplessub, const Real subset_prob)
return input values corresponding to the largest po percentile output values.
const Real & _subset_probability
The subset conditional failure probability.
void mooseError(Args &&... args) const
void addClassDescription(const std::string &doc_string)
static const std::complex< double > j(0, 1)
Complex number "j" (also known as "i")
void addRangeCheckedParam(const std::string &name, const T &value, const std::string &parsed_function, const std::string &doc_string)
static InputParameters validParams()
const unsigned int & _num_samplessub
Number of samples per subset.
virtual Real quantile(const Real &p) const override
Definition: Normal.C:80
void ErrorVector unsigned int
virtual void sampleSetUp(const Sampler::SampleMode mode) override
uint8_t dof_id_type
void setNumberOfRandomSeeds(std::size_t number)