https://mooseframework.inl.gov
GenericActiveLearner.h
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://www.mooseframework.org
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #pragma once
11 
12 #include "GeneralReporter.h"
15 #include "GaussianProcess.h"
16 #include "SurrogateModel.h"
21 
22 // forward declarations
23 template <typename SamplerType>
25 
27 
32 template <typename SamplerType>
36 
37 {
38 public:
41  virtual void initialize() override {}
42  virtual void finalize() override {}
43  virtual void execute() override;
44 
45 protected:
51  virtual void setupGPData(const std::vector<Real> & data_out, const DenseMatrix<Real> & data_in);
52 
57  virtual void computeGPOutput(std::vector<Real> & eval_outputs);
58 
62  virtual Real computeConvergenceValue();
63 
67  virtual void evaluateGPTest();
68 
73  virtual void setupGeneric();
74 
80  virtual void includeAdditionalInputs();
81 
87  virtual void getAcquisition(std::vector<Real> & acq_new, std::vector<unsigned int> & indices);
88 
90  SamplerType & _al_sampler;
91 
93  unsigned int _n_dim;
94 
97 
99  const std::vector<std::vector<Real>> & _inputs_test;
100 
102  const std::vector<Real> & _output_value;
103 
105  std::vector<Real> & _output_comm;
106 
108  std::vector<unsigned int> & _sorted_indices;
109 
112 
115 
118 
120  std::vector<Real> & _acquisition_value;
121 
124 
126  std::vector<std::vector<Real>> _inputs_test_modified;
127 
129  std::vector<std::vector<Real>> & _inputs_required;
130 
132  const bool & _penalize_acquisition;
133 
136 
138  std::vector<std::vector<Real>> _gp_inputs;
139 
141  std::vector<Real> _gp_outputs;
142 
144  std::vector<Real> _gp_outputs_test;
145 
147  std::vector<Real> _gp_std_test;
148 
150  std::vector<Real> _length_scales;
151 
153  std::vector<Real> _generic;
154 
156  std::vector<Real> _eval_outputs_current;
157 };
158 
159 template <typename SamplerType>
162 {
165  params.addClassDescription("A generic reporter to support parallel active learning: re-trains GP "
166  "and picks the next best batch.");
167  params.addRequiredParam<ReporterName>("output_value",
168  "Value of the model output from the SubApp.");
169  params.addParam<ReporterValueName>(
170  "outputs_required",
171  "outputs_required",
172  "Modified value of the model output from this reporter class.");
173  params.addRequiredParam<SamplerName>("sampler", "The sampler object.");
174  params.addRequiredParam<UserObjectName>("al_gp", "Active learning GP trainer.");
175  params.addRequiredParam<UserObjectName>("gp_evaluator", "Evaluator for the trained GP.");
176  params.addParam<ReporterValueName>(
177  "sorted_indices",
178  "sorted_indices",
179  "The sorted sample indices in order of importance to evaluate the subApp.");
180  params.addParam<ReporterValueName>(
181  "acquisition_function",
182  "acquisition_function",
183  "The values of the acquistion function in the current iteration.");
184  params.addParam<ReporterValueName>(
185  "convergence_value", "convergence_value", "Value to measure convergence of active learning.");
186  params.addParam<ReporterValueName>(
187  "inputs", "inputs", "Modified value of the model inputs from this reporter class.");
188  params.addRequiredParam<UserObjectName>("acquisition", "Name of the acquisition function.");
189  params.addParam<bool>(
190  "penalize_acquisition",
191  true,
192  "Set true to prevent clustering of the best batch inputs when operating in parallel.");
193  return params;
194 }
195 
196 template <typename SamplerType>
198  const InputParameters & parameters)
199  : GeneralReporter(parameters),
200  ParallelAcquisitionInterface(parameters),
202  _al_sampler(getSampler<SamplerType>("sampler")),
203  _n_dim(_al_sampler.getNumberOfCols()),
204  _props(_al_sampler.getNumParallelProposals()),
205  _inputs_test(_al_sampler.getSampleTries()),
206  _output_value(getReporterValue<std::vector<Real>>("output_value", REPORTER_MODE_DISTRIBUTED)),
207  _output_comm(declareValue<std::vector<Real>>("outputs_required")),
208  _sorted_indices(declareValue<std::vector<unsigned int>>("sorted_indices")),
209  _al_gp(getUserObject<ActiveLearningGaussianProcess>("al_gp")),
210  _gp_eval(getSurrogateModel<GaussianProcessSurrogate>("gp_evaluator")),
211  _acquisition_obj(getParallelAcquisitionFunctionByName(getParam<UserObjectName>("acquisition"))),
212  _acquisition_value(declareValue<std::vector<Real>>("acquisition_function")),
213  _convergence_value(declareValue<Real>("convergence_value")),
214  _inputs_required(declareValue<std::vector<std::vector<Real>>>("inputs")),
215  _penalize_acquisition(getParam<bool>("penalize_acquisition")),
216  _check_step(std::numeric_limits<int>::max())
217 {
218  // Setting up the variable sizes to facilitate active learning.
219  _gp_outputs_test.resize(_inputs_test.size());
220  _gp_std_test.resize(_inputs_test.size());
221  _acquisition_value.resize(_props);
222  _length_scales.resize(_n_dim);
224  _generic.resize(1);
225  _inputs_required.resize(_props, std::vector<Real>(_n_dim, 0.0));
226  _sorted_indices.resize(_props, 1u);
227 }
228 
229 template <typename SamplerType>
230 void
231 GenericActiveLearnerTempl<SamplerType>::setupGPData(const std::vector<Real> & data_out,
232  const DenseMatrix<Real> & data_in)
233 {
234  for (unsigned int i = 0; i < data_out.size(); ++i)
235  {
236  for (unsigned int j = 0; j < _n_dim; ++j)
237  _inputs_required[i][j] = data_in(i, j);
238  _gp_inputs.push_back(_inputs_required[i]);
239  _gp_outputs.push_back(data_out[i]);
240  }
241 }
242 
243 template <typename SamplerType>
244 void
246 {
247  for (unsigned int i = 0; i < eval_outputs.size(); ++i)
248  eval_outputs[i] = _gp_eval.evaluate(_gp_inputs[i]);
249 }
250 
251 template <typename SamplerType>
252 void
254 {
255  _generic = _gp_outputs;
256 }
257 
258 template <typename SamplerType>
259 void
261 {
262  _inputs_test_modified = _inputs_test;
263 }
264 
265 template <typename SamplerType>
266 void
268  std::vector<unsigned int> & indices)
269 {
270  std::vector<Real> acq;
271  acq.resize(_inputs_test.size());
272  includeAdditionalInputs();
273  _acquisition_obj.computeAcquisition(
274  acq, _gp_outputs_test, _gp_std_test, _inputs_test_modified, _gp_inputs, _generic);
275  acq_new = acq;
276  if (_penalize_acquisition)
277  _acquisition_obj.penalizeAcquisition(
278  acq_new, indices, acq, _length_scales, _inputs_test_modified);
279 }
280 
281 template <typename SamplerType>
282 Real
284 {
285  Real convergence_value = 0.0;
286  for (unsigned int ii = 0; ii < _output_comm.size(); ++ii)
287  convergence_value += Utility::pow<2>(_output_comm[ii] - _eval_outputs_current[ii]);
288  convergence_value = std::sqrt(convergence_value) / _output_comm.size();
289  return convergence_value;
290 }
291 
292 template <typename SamplerType>
293 void
295 {
296  for (unsigned int i = 0; i < _gp_outputs_test.size(); ++i)
297  _gp_outputs_test[i] = _gp_eval.evaluate(_inputs_test[i], _gp_std_test[i]);
298 }
299 
300 template <typename SamplerType>
301 void
303 {
304  if (_al_sampler.getNumberOfLocalRows() == 0 || _check_step == _t_step)
305  {
306  _check_step = _t_step;
307  return;
308  }
309 
310  DenseMatrix<Real> data_in(_al_sampler.getNumberOfRows(), _al_sampler.getNumberOfCols());
311  for (dof_id_type ss = _al_sampler.getLocalRowBegin(); ss < _al_sampler.getLocalRowEnd(); ++ss)
312  {
313  const auto data = _al_sampler.getNextLocalRow();
314  for (unsigned int j = 0; j < _al_sampler.getNumberOfCols(); ++j)
315  data_in(ss, j) = data[j];
316  }
317  _communicator.sum(data_in.get_values());
318  _output_comm = _output_value;
319  _communicator.allgather(_output_comm);
320 
321  if (_t_step > 1)
322  {
323  // Setup the GP training data
324  setupGPData(_output_comm, data_in);
325 
326  // Compute the convergence value before re-training the GP
327  if (_t_step > 2)
328  {
329  computeGPOutput(_eval_outputs_current);
330  _convergence_value = computeConvergenceValue();
331  }
332 
333  // Retrain the GP and get the length scales
334  _al_gp.reTrain(_gp_inputs, _gp_outputs);
335  _length_scales = _al_gp.getLengthScales();
336 
337  // Evaluate the GP on all the test samples sent by the Sampler
338  evaluateGPTest();
339 
340  // Setup the generic variable for acquisition computation (depends on the objective:
341  // optimization, UQ, etc.)
342  setupGeneric();
343 
344  // Get the acquisition function values and ordering of indices as per the acquisition
345  std::vector<Real> acq_new;
346  std::vector<unsigned int> indices;
347  indices.resize(_inputs_test.size());
348  getAcquisition(acq_new, indices);
349 
350  // Output the acquisition function values and the best ordering of the indices
351  std::copy_n(indices.begin(), _props, _sorted_indices.begin());
352  std::copy_n(acq_new.begin(), _props, _acquisition_value.begin());
353  }
354  else
355  std::iota(_sorted_indices.begin(), _sorted_indices.end(), 0);
356 
357  // Track the current step
358  _check_step = _t_step;
359 }
A generic reporter to support parallel active learning: re-trains GP and picks the next best batch...
std::vector< std::vector< Real > > _inputs_test_modified
Storage for all the modified proposed samples to test the GP model.
std::vector< std::vector< Real > > _gp_inputs
Storage for the GP re-training inputs.
const std::vector< Real > & _output_value
Model output value from SubApp.
void addParam(const std::string &name, const std::initializer_list< typename T::value_type > &value, const std::string &doc_string)
GenericActiveLearnerTempl< GenericActiveLearningSampler > GenericActiveLearner
virtual void computeGPOutput(std::vector< Real > &eval_outputs)
Computes the outputs of the trained GP model.
std::vector< Real > _eval_outputs_current
The GP outputs from the current iteration before re-training (to evaluate convergence) ...
const InputParameters & parameters() const
std::vector< Real > & _acquisition_value
The acquistion function values in the current iteration.
const SurrogateModel & _gp_eval
The GP evaluator object that permits re-evaluations.
virtual void initialize() override
std::vector< Real > _length_scales
Storage for the length scales after the GP training.
static InputParameters validParams()
dof_id_type _props
Storage for the number of parallel proposals.
All ParallelAcquisition functions should inherit from this class.
void addRequiredParam(const std::string &name, const std::string &doc_string)
unsigned int _n_dim
The input dimension for GP, equal to Sampler columns.
auto max(const L &left, const R &right)
std::vector< unsigned int > & _sorted_indices
The selected sample indices to evaluate the subApp.
virtual void execute() override
virtual void finalize() override
const std::vector< std::vector< Real > > & _inputs_test
Storage for all the proposed samples to test the GP model.
virtual void evaluateGPTest()
Evaluate the GP on all the test samples sent by the Sampler.
const bool & _penalize_acquisition
Penalize acquisition to prevent clustering when operating in parallel.
static InputParameters validParams()
Real & _convergence_value
For monitoring convergence of active learning.
std::vector< Real > & _output_comm
Modified value of model output by this reporter class.
virtual void includeAdditionalInputs()
Include additional inputs before evaluating the acquisition function.
const ReporterMode REPORTER_MODE_DISTRIBUTED
ParallelAcquisitionFunctionBase & _acquisition_obj
Storage for the parallel acquisition object to be utilized.
virtual void setupGeneric()
Setup the generic variable for acquisition computation (depends on the objective: optimization...
GenericActiveLearnerTempl(const InputParameters &parameters)
virtual Real computeConvergenceValue()
Computes the convergence value during active learning.
virtual void setupGPData(const std::vector< Real > &data_out, const DenseMatrix< Real > &data_in)
Sets up the training data for the GP model.
virtual void getAcquisition(std::vector< Real > &acq_new, std::vector< unsigned int > &indices)
Output the acquisition function values and ordering of the indices.
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
Interface for objects that need to use samplers.
const ActiveLearningGaussianProcess & _al_gp
The active learning GP trainer that permits re-training.
void addClassDescription(const std::string &doc_string)
std::vector< Real > _gp_outputs
Storage for the GP re-training outputs.
static const std::complex< double > j(0, 1)
Complex number "j" (also known as "i")
std::vector< Real > _gp_std_test
Outputs of GP model standard deviation for the test samples.
SamplerType & _al_sampler
The base sampler.
std::vector< std::vector< Real > > & _inputs_required
Transmit the required inputs to the json file.
std::vector< Real > _generic
A generic parameter to be passed to the acquisition function.
int _check_step
Ensure that the MCMC algorithm proceeds in a sequential fashion.
void ErrorVector unsigned int
std::vector< Real > _gp_outputs_test
Outputs of GP model for the test samples.
uint8_t dof_id_type