38 "Sampler used to create predictor and response data.");
41 "Reporter value used to determine if a sample's multiapp solve converged.");
42 params.
addParam<
bool>(
"skip_unconverged_samples",
44 "True to skip samples where the multiapp did not converge, " 45 "'stochastic_reporter' is required to do this.");
51 "Reporter value of response results, can be vpp with <vpp_name>/<vector_name> or sampler " 52 "column with 'sampler/col_<index>'.");
54 params.
addParam<std::vector<ReporterName>>(
56 std::vector<ReporterName>(),
57 "Reporter values used as the independent random variables, If 'predictors' and " 58 "'predictor_cols' are both empty, all sampler columns are used.");
59 params.
addParam<std::vector<unsigned int>>(
61 std::vector<unsigned int>(),
62 "Sampler columns used as the independent random variables, If 'predictors' and " 63 "'predictor_cols' are both empty, all sampler columns are used.");
66 MooseEnum cv_type(
"none=0 k_fold=1",
"none");
70 "Cross-validation method to use for dataset. Options are 'none' or 'k_fold'.");
72 "cv_splits", 10,
"cv_splits > 1",
"Number of splits (k) to use in k-fold cross-validation.");
73 params.
addParam<UserObjectName>(
"cv_surrogate",
74 "Name of Surrogate object used for model cross-validation.");
76 "cv_n_trials", 1,
"Number of repeated trials of cross-validation to perform.");
77 params.
addParam<
unsigned int>(
"cv_seed",
78 std::numeric_limits<unsigned int>::max(),
79 "Seed used to initialize random number generator for data " 80 "splitting during cross validation.");
88 _sampler(getSampler(
"sampler")),
92 _pcols(getParam<
std::vector<unsigned
int>>(
"predictor_cols")),
93 _n_outputs(declareModelData<unsigned
int>(
"_n_outputs", 1)),
94 _row_data(_sampler.getNumberOfCols()),
95 _skip_unconverged(getParam<bool>(
"skip_unconverged_samples")),
98 _n_splits(getParam<unsigned
int>(
"cv_splits")),
99 _cv_n_trials(getParam<unsigned
int>(
"cv_n_trials")),
100 _cv_seed(getParam<unsigned
int>(
"cv_seed")),
101 _doing_cv(_cv_type !=
"none"),
102 _cv_trial_scores(declareModelData<
std::vector<
std::vector<
Real>>>(
"cv_scores"))
108 "'converged_reporter' needs to be specified to skip unconverged sample.");
109 _converged = &getTrainingData<bool>(getParam<ReporterName>(
"converged_reporter"));
116 "To perform cross-validation, the option cv_surrogate needs to be specified",
117 " to provide a Surrogate object for training and evaluation.");
121 "The specified number of splits (cv_splits = ",
124 " exceeds the number of rows in Sampler '",
125 getParam<SamplerName>(
"sampler"),
132 if (getParam<MooseEnum>(
"response_type") == 0)
133 _rval = &getTrainingData<Real>(getParam<ReporterName>(
"response"));
134 else if (getParam<MooseEnum>(
"response_type") == 1)
135 _rvecval = &getTrainingData<std::vector<Real>>(getParam<ReporterName>(
"response"));
137 const auto & pnames = getParam<std::vector<ReporterName>>(
"predictors");
138 for (
unsigned int i = 0; i < pnames.size(); ++i)
139 _pvals[i] = &getTrainingData<Real>(pnames[i]);
168 mooseError(
"Predictor reporter value ",
name,
" is not of supported mode.");
200 mooseError(
"Number of sampler columns has changed.");
213 " does not match sampler size (",
232 for (
unsigned int i = 0; i <
_row_data.size(); ++i)
237 pair.second->setCurrentIndex((pair.second->isDistributed() ?
_local_row :
_row));
254 std::vector<Real> cv_score(1, 0.0);
258 std::vector<std::vector<dof_id_type>> split_indices;
261 std::vector<dof_id_type> indices_flat(n_rows);
262 std::iota(indices_flat.begin(), indices_flat.end(), 0);
269 split_indices[
k].insert(split_indices[
k].begin(),
270 std::make_move_iterator(indices_flat.begin()),
271 std::make_move_iterator(indices_flat.begin() + num_ind));
272 std::sort(split_indices[
k].begin(), split_indices[
k].end());
273 indices_flat.erase(indices_flat.begin(), indices_flat.begin() + num_ind);
277 std::vector<dof_id_type> split_ids_buffer;
281 split_ids_buffer = split_indices[
k];
286 auto first = std::lower_bound(
288 auto last = std::upper_bound(
298 std::vector<Real> split_mse(1, 0.0);
299 std::vector<Real> row_mse(1, 0.0);
308 for (
unsigned int i = 0; i <
_row_data.size(); ++i)
312 pair.second->setCurrentIndex(
320 split_mse.resize(row_mse.size(), 0.0);
323 for (
unsigned int r = 0; r < split_mse.size(); ++r)
324 split_mse[r] += row_mse[r];
332 cv_score.resize(split_mse.size(), 0.0);
335 cv_score[r] += split_mse[r] / n_rows;
341 cv_score[r] = std::sqrt(cv_score[r]);
349 std::vector<Real> error(1, 0.0);
361 std::vector<Real> model_eval(error.size());
374 for (
const auto & val :
_pvals)
376 for (
const auto & col :
_pcols)
SurrogateTrainerBase(const InputParameters ¶meters)
virtual void initialize() final
const bool _doing_cv
Set to true if cross validation is being performed, controls behavior in execute().
const Real * _rval
Response value.
unsigned int _n_dims
Dimension of predictor data - either _sampler.getNumberOfCols() or _pvals.size() + _pcols...
static InputParameters validParams()
void updatePredictorRow()
const unsigned int & _cv_n_trials
Number of repeated trials of cross validation to perform.
const ReporterMode REPORTER_MODE_ROOT
const std::vector< Real > * _rvecval
Vector response value.
void seed(std::size_t i, unsigned int seed)
std::vector< unsigned int > _pcols
Columns from sampler for predictors.
void shuffle(std::vector< T > &data, MooseRandom &generator, const std::size_t seed_index=0)
std::vector< Real > getNextLocalRow()
std::vector< const Real * > _pvals
Predictor values from reporters.
dof_id_type getLocalRowBegin() const
const Parallel::Communicator & _communicator
T & getSurrogateModel(const std::string &name) const
Get a SurrogateModel/Trainer with a given name.
MooseRandom _cv_generator
Random number generator used for shuffling sampler rows during splitting.
std::vector< std::vector< Real > > & _cv_trial_scores
RMSE scores from each CV trial - can be grabbed by VPP or Reporter.
dof_id_type getNumberOfLocalRows() const
virtual const std::string & name() const
static InputParameters validParams()
bool isParamValid(const std::string &name) const
void checkIntegrity() const
std::vector< Real > crossValidate()
const ReporterData & getReporterData() const
dof_id_type _row
During training loop, this is the row index of the data.
dof_id_type _local_row
During training loop, this is the local row index of the data.
std::vector< Real > _predictor_data
Predictor data for current row - can be combination of Sampler and Reporter values.
static InputParameters validParams()
virtual Real evaluate(const std::vector< Real > &x) const
Evaluate surrogate model given a row of parameters.
std::vector< dof_id_type > _skip_indices
void paramError(const std::string ¶m, Args... args) const
const ReporterMode REPORTER_MODE_DISTRIBUTED
std::unordered_map< ReporterName, std::shared_ptr< TrainingDataBase > > _training_data
Vector of reporter names and their corresponding values (to be filled by getTrainingData) ...
void broadcast(T &data, const unsigned int root_id=0, const bool identical_sizes=false) const
dof_id_type getLocalRowEnd() const
virtual std::vector< Real > evaluateModelError(const SurrogateModel &surr)
dof_id_type getNumberOfRows() const
const ReporterProducerEnum & getReporterMode(const ReporterName &reporter_name) const
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
Interface for objects that need to use samplers.
FEProblemBase & _fe_problem
std::vector< Real > _row_data
Sampler data for the current row.
IntRange< T > make_range(T beg, T end)
unsigned int _local_sample_size
Number of samples (locally) used to train the model.
void mooseError(Args &&... args) const
unsigned int _current_sample_size
Number of samples used to train the model.
const unsigned int & _n_splits
Number of splits (k) to split sampler data into.
const bool * _converged
Whether or not the current sample has a converged solution.
const ReporterMode REPORTER_MODE_REPLICATED
SurrogateTrainer(const InputParameters ¶meters)
const SurrogateModel * _cv_surrogate
SurrogateModel used to evaluate model error relative to test points.
static InputParameters validParams()
This is the base trainer class whose main functionality is the API for declaring model data...
processor_id_type processor_id() const
static const std::string k
void ErrorVector unsigned int
const bool _skip_unconverged
Whether or not we are skipping samples that have unconverged solutions.
dof_id_type getNumberOfCols() const
const unsigned int & _cv_seed
Seed used for _cv_generator.
An interface class which manages the model data save and load functionalities from moose objects (suc...
virtual void execute() final