LCOV - code coverage report
Current view: top level - src/trainers - SurrogateTrainer.C (source / functions) Hit Total Coverage
Test: idaholab/moose stochastic_tools: #32971 (54bef8) with base c6cf66 Lines: 187 196 95.4 %
Date: 2026-05-29 20:40:35 Functions: 11 11 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://mooseframework.inl.gov
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : #include "SurrogateTrainer.h"
      11             : #include "SurrogateModel.h"
      12             : #include "Sampler.h"
      13             : #include "StochasticToolsApp.h"
      14             : #include "MooseRandom.h"
      15             : #include "Shuffle.h"
      16             : #include <algorithm>
      17             : 
      18             : InputParameters
      19        1508 : SurrogateTrainerBase::validParams()
      20             : {
      21        1508 :   InputParameters params = GeneralUserObject::validParams();
      22        1508 :   params += RestartableModelInterface::validParams();
      23        1508 :   params.registerBase("SurrogateTrainer");
      24        1508 :   return params;
      25           0 : }
      26             : 
      27         759 : SurrogateTrainerBase::SurrogateTrainerBase(const InputParameters & parameters)
      28             :   : GeneralUserObject(parameters),
      29         759 :     RestartableModelInterface(*this, /*read_only=*/false, _type + "_" + name())
      30             : {
      31         759 : }
      32             : 
      33             : InputParameters
      34        1176 : SurrogateTrainer::validParams()
      35             : {
      36        1176 :   InputParameters params = SurrogateTrainerBase::validParams();
      37        2352 :   params.addRequiredParam<SamplerName>("sampler",
      38             :                                        "Sampler used to create predictor and response data.");
      39        2352 :   params.addParam<ReporterName>(
      40             :       "converged_reporter",
      41             :       "Reporter value used to determine if a sample's multiapp solve converged.");
      42        2352 :   params.addParam<bool>("skip_unconverged_samples",
      43        2352 :                         false,
      44             :                         "True to skip samples where the multiapp did not converge, "
      45             :                         "'stochastic_reporter' is required to do this.");
      46             : 
      47             :   // Common Training Data
      48        2352 :   MooseEnum data_type("real=0 vector_real=1", "real");
      49        2352 :   params.addRequiredParam<ReporterName>(
      50             :       "response",
      51             :       "Reporter value of response results, can be vpp with <vpp_name>/<vector_name> or sampler "
      52             :       "column with 'sampler/col_<index>'.");
      53        2352 :   params.addParam<MooseEnum>("response_type", data_type, "Response data type.");
      54        1176 :   params.addParam<std::vector<ReporterName>>(
      55             :       "predictors",
      56        1176 :       std::vector<ReporterName>(),
      57             :       "Reporter values used as the independent random variables, If 'predictors' and "
      58             :       "'predictor_cols' are both empty, all sampler columns are used.");
      59        1176 :   params.addParam<std::vector<unsigned int>>(
      60             :       "predictor_cols",
      61        1176 :       std::vector<unsigned int>(),
      62             :       "Sampler columns used as the independent random variables, If 'predictors' and "
      63             :       "'predictor_cols' are both empty, all sampler columns are used.");
      64             :   // End Common Training Data
      65             : 
      66        2352 :   MooseEnum cv_type("none=0 k_fold=1", "none");
      67        2352 :   params.addParam<MooseEnum>(
      68             :       "cv_type",
      69             :       cv_type,
      70             :       "Cross-validation method to use for dataset. Options are 'none' or 'k_fold'.");
      71        3528 :   params.addRangeCheckedParam<unsigned int>(
      72        2352 :       "cv_splits", 10, "cv_splits > 1", "Number of splits (k) to use in k-fold cross-validation.");
      73        2352 :   params.addParam<UserObjectName>("cv_surrogate",
      74             :                                   "Name of Surrogate object used for model cross-validation.");
      75        2352 :   params.addParam<unsigned int>(
      76        2352 :       "cv_n_trials", 1, "Number of repeated trials of cross-validation to perform.");
      77        2352 :   params.addParam<unsigned int>("cv_seed",
      78        2352 :                                 std::numeric_limits<unsigned int>::max(),
      79             :                                 "Seed used to initialize random number generator for data "
      80             :                                 "splitting during cross validation.");
      81             : 
      82        1176 :   return params;
      83        1176 : }
      84             : 
      85         590 : SurrogateTrainer::SurrogateTrainer(const InputParameters & parameters)
      86             :   : SurrogateTrainerBase(parameters),
      87             :     SurrogateModelInterface(this),
      88         590 :     _sampler(getSampler("sampler")),
      89         590 :     _rval(nullptr),
      90         590 :     _rvecval(nullptr),
      91        1180 :     _pvals(getParam<std::vector<ReporterName>>("predictors").size()),
      92        1180 :     _pcols(getParam<std::vector<unsigned int>>("predictor_cols")),
      93        1180 :     _n_outputs(declareModelData<unsigned int>("_n_outputs", 1)),
      94         590 :     _row_data(_sampler.getNumberOfCols()),
      95        1180 :     _skip_unconverged(getParam<bool>("skip_unconverged_samples")),
      96         590 :     _converged(nullptr),
      97        1180 :     _cv_type(getParam<MooseEnum>("cv_type")),
      98        1180 :     _n_splits(getParam<unsigned int>("cv_splits")),
      99        1180 :     _cv_n_trials(getParam<unsigned int>("cv_n_trials")),
     100        1180 :     _cv_seed(getParam<unsigned int>("cv_seed")),
     101         590 :     _doing_cv(_cv_type != "none"),
     102        1770 :     _cv_trial_scores(declareModelData<std::vector<std::vector<Real>>>("cv_scores"))
     103             : {
     104         590 :   if (_skip_unconverged)
     105             :   {
     106           0 :     if (!isParamValid("converged_reporter"))
     107           0 :       paramError("skip_unconverged_samples",
     108             :                  "'converged_reporter' needs to be specified to skip unconverged sample.");
     109           0 :     _converged = &getTrainingData<bool>(getParam<ReporterName>("converged_reporter"));
     110             :   }
     111             : 
     112         590 :   if (_doing_cv)
     113             :   {
     114         168 :     if (!isParamValid("cv_surrogate"))
     115           2 :       paramError("cv_type",
     116             :                  "To perform cross-validation, the option cv_surrogate needs to be specified",
     117             :                  " to provide a Surrogate object for training and evaluation.");
     118             : 
     119          82 :     if (_n_splits > _sampler.getNumberOfRows())
     120           0 :       paramError("cv_splits",
     121             :                  "The specified number of splits (cv_splits = ",
     122           0 :                  _n_splits,
     123             :                  ")",
     124             :                  " exceeds the number of rows in Sampler '",
     125             :                  getParam<SamplerName>("sampler"),
     126             :                  "'");
     127             : 
     128          82 :     _cv_generator.seed(0, _cv_seed);
     129             :   }
     130             : 
     131             :   // Get TrainingData for responses and predictors
     132        1764 :   if (getParam<MooseEnum>("response_type") == 0)
     133        1066 :     _rval = &getTrainingData<Real>(getParam<ReporterName>("response"));
     134         165 :   else if (getParam<MooseEnum>("response_type") == 1)
     135         110 :     _rvecval = &getTrainingData<std::vector<Real>>(getParam<ReporterName>("response"));
     136             : 
     137         588 :   const auto & pnames = getParam<std::vector<ReporterName>>("predictors");
     138         602 :   for (unsigned int i = 0; i < pnames.size(); ++i)
     139          14 :     _pvals[i] = &getTrainingData<Real>(pnames[i]);
     140             : 
     141             :   // If predictors and predictor_cols are empty, use all sampler columns
     142         588 :   if (_pvals.empty() && _pcols.empty())
     143             :   {
     144         574 :     _pcols.resize(_sampler.getNumberOfCols());
     145             :     std::iota(_pcols.begin(), _pcols.end(), 0);
     146             :   }
     147         588 :   _n_dims = _pvals.size() + _pcols.size();
     148             : 
     149         588 :   _predictor_data.resize(_n_dims);
     150         588 : }
     151             : 
     152             : void
     153         586 : SurrogateTrainer::initialize()
     154             : {
     155             :   // Figure out if data is distributed
     156        1186 :   for (auto & pair : _training_data)
     157             :   {
     158         600 :     const ReporterName & name = pair.first;
     159             :     TrainingDataBase & data = *pair.second;
     160             : 
     161         600 :     const auto & mode = _fe_problem.getReporterData().getReporterMode(name);
     162         600 :     if (mode == REPORTER_MODE_DISTRIBUTED || (mode == REPORTER_MODE_ROOT && processor_id() != 0))
     163         436 :       data.isDistributed() = true;
     164         164 :     else if (mode == REPORTER_MODE_REPLICATED ||
     165         122 :              (mode == REPORTER_MODE_ROOT && processor_id() == 0))
     166         164 :       data.isDistributed() = false;
     167             :     else
     168           0 :       mooseError("Predictor reporter value ", name, " is not of supported mode.");
     169             :   }
     170             : 
     171         586 :   if (_doing_cv)
     172          82 :     _cv_surrogate = &getSurrogateModel("cv_surrogate");
     173         586 : }
     174             : 
     175             : void
     176         586 : SurrogateTrainer::execute()
     177             : {
     178         586 :   if (_doing_cv)
     179         328 :     for (const auto & trial : make_range(_cv_n_trials))
     180             :     {
     181         246 :       std::vector<Real> trial_score = crossValidate();
     182             : 
     183             :       // Expand _cv_trial_scores with more columns if necessary, then insert values.
     184         391 :       for (unsigned int r = _cv_trial_scores.size(); r < trial_score.size(); ++r)
     185         290 :         _cv_trial_scores.push_back(std::vector<Real>(_cv_n_trials, 0.0));
     186         681 :       for (auto r : make_range(trial_score.size()))
     187         435 :         _cv_trial_scores[r][trial] = trial_score[r];
     188         246 :     }
     189             : 
     190         586 :   _current_sample_size = _sampler.getNumberOfRows();
     191         586 :   _local_sample_size = _sampler.getNumberOfLocalRows();
     192         586 :   executeTraining();
     193         584 : }
     194             : 
     195             : void
     196        1078 : SurrogateTrainer::checkIntegrity() const
     197             : {
     198             :   // Check that the number of sampler columns hasn't changed
     199        1078 :   if (_row_data.size() != _sampler.getNumberOfCols())
     200           0 :     mooseError("Number of sampler columns has changed.");
     201             : 
     202             :   // Check that training data is correctly sized
     203        2168 :   for (auto & pair : _training_data)
     204             :   {
     205        1092 :     dof_id_type rsize = pair.second->size();
     206             :     dof_id_type nrow =
     207        1092 :         pair.second->isDistributed() ? _sampler.getNumberOfLocalRows() : _sampler.getNumberOfRows();
     208        1092 :     if (rsize != nrow)
     209           2 :       mooseError("Reporter value ",
     210           2 :                  pair.first,
     211             :                  " of size ",
     212             :                  rsize,
     213             :                  " does not match sampler size (",
     214             :                  nrow,
     215             :                  ").");
     216             :   }
     217        1076 : }
     218             : 
     219             : void
     220        1078 : SurrogateTrainer::executeTraining()
     221             : {
     222        1078 :   checkIntegrity();
     223        1076 :   _row = _sampler.getLocalRowBegin();
     224        1076 :   _local_row = 0;
     225             : 
     226        1076 :   preTrain();
     227             : 
     228      119491 :   for (_row = _sampler.getLocalRowBegin(); _row < _sampler.getLocalRowEnd(); ++_row)
     229             :   {
     230             :     // Need to do this manually in order to keep the iterators valid
     231      118415 :     const std::vector<Real> data = _sampler.getNextLocalRow();
     232      722170 :     for (unsigned int i = 0; i < _row_data.size(); ++i)
     233      603755 :       _row_data[i] = data[i];
     234             : 
     235             :     // Set training data
     236      242235 :     for (auto & pair : _training_data)
     237      123820 :       pair.second->setCurrentIndex((pair.second->isDistributed() ? _local_row : _row));
     238             : 
     239      118415 :     updatePredictorRow();
     240             : 
     241      118415 :     if ((!_skip_unconverged || *_converged) &&
     242      118415 :         std::find(_skip_indices.begin(), _skip_indices.end(), _row) == _skip_indices.end())
     243      115415 :       train();
     244             : 
     245      118415 :     _local_row++;
     246      118415 :   }
     247             : 
     248        1076 :   postTrain();
     249        1076 : }
     250             : 
     251             : std::vector<Real>
     252         246 : SurrogateTrainer::crossValidate()
     253             : {
     254         246 :   std::vector<Real> cv_score(1, 0.0);
     255             : 
     256             :   // Get skipped indices for each split
     257         246 :   dof_id_type n_rows = _sampler.getNumberOfRows();
     258             :   std::vector<std::vector<dof_id_type>> split_indices;
     259         246 :   if (processor_id() == 0)
     260             :   {
     261         165 :     std::vector<dof_id_type> indices_flat(n_rows);
     262             :     std::iota(indices_flat.begin(), indices_flat.end(), 0);
     263         165 :     MooseUtils::shuffle(indices_flat, _cv_generator, 0);
     264             : 
     265         165 :     split_indices.resize(_n_splits);
     266         495 :     for (const auto & k : make_range(_n_splits))
     267             :     {
     268         660 :       const dof_id_type num_ind = n_rows / _n_splits + (k < (n_rows % _n_splits) ? 1 : 0);
     269         330 :       split_indices[k].insert(split_indices[k].begin(),
     270             :                               std::make_move_iterator(indices_flat.begin()),
     271             :                               std::make_move_iterator(indices_flat.begin() + num_ind));
     272         330 :       std::sort(split_indices[k].begin(), split_indices[k].end());
     273             :       indices_flat.erase(indices_flat.begin(), indices_flat.begin() + num_ind);
     274             :     }
     275         165 :   }
     276             : 
     277             :   std::vector<dof_id_type> split_ids_buffer;
     278         738 :   for (const auto & k : make_range(_n_splits))
     279             :   {
     280         492 :     if (processor_id() == 0)
     281         330 :       split_ids_buffer = split_indices[k];
     282         492 :     _communicator.broadcast(split_ids_buffer, 0);
     283             : 
     284         492 :     _current_sample_size = _sampler.getNumberOfRows() - split_ids_buffer.size();
     285             : 
     286         492 :     auto first = std::lower_bound(
     287         492 :         split_ids_buffer.begin(), split_ids_buffer.end(), _sampler.getLocalRowBegin());
     288         492 :     auto last = std::upper_bound(
     289         492 :         split_ids_buffer.begin(), split_ids_buffer.end(), _sampler.getLocalRowEnd());
     290         492 :     _skip_indices.insert(_skip_indices.begin(), first, last);
     291             : 
     292         492 :     _local_sample_size = _sampler.getNumberOfLocalRows() - _skip_indices.size();
     293             : 
     294             :     // Train the model
     295         492 :     executeTraining();
     296             : 
     297             :     // Evaluate the model
     298         492 :     std::vector<Real> split_mse(1, 0.0);
     299         492 :     std::vector<Real> row_mse(1, 0.0);
     300             : 
     301             :     auto skipped_row = _skip_indices.begin();
     302             : 
     303        6984 :     for (dof_id_type p = _sampler.getLocalRowBegin(); p < _sampler.getLocalRowEnd(); ++p)
     304             :     {
     305        6000 :       const std::vector<Real> row = _sampler.getNextLocalRow();
     306        6000 :       if (skipped_row != _skip_indices.end() && p == *skipped_row)
     307             :       {
     308       10200 :         for (unsigned int i = 0; i < _row_data.size(); ++i)
     309        7200 :           _row_data[i] = row[i];
     310             : 
     311        6000 :         for (auto & pair : _training_data)
     312        3000 :           pair.second->setCurrentIndex(
     313        3000 :               (pair.second->isDistributed() ? p - _sampler.getLocalRowBegin() : p));
     314             : 
     315        3000 :         updatePredictorRow();
     316             : 
     317        6000 :         row_mse = evaluateModelError(*_cv_surrogate);
     318             : 
     319             :         // Expand split_mse if needed.
     320        3000 :         split_mse.resize(row_mse.size(), 0.0);
     321             : 
     322             :         // Increment errors
     323        7350 :         for (unsigned int r = 0; r < split_mse.size(); ++r)
     324        4350 :           split_mse[r] += row_mse[r];
     325             : 
     326             :         skipped_row++;
     327             :       }
     328        6000 :     }
     329             :     gatherSum(split_mse);
     330             : 
     331             :     // Expand cv_score if necessary.
     332         492 :     cv_score.resize(split_mse.size(), 0.0);
     333             : 
     334        1362 :     for (auto r : make_range(split_mse.size()))
     335         870 :       cv_score[r] += split_mse[r] / n_rows;
     336             : 
     337         492 :     _skip_indices.clear();
     338         492 :   }
     339             : 
     340         681 :   for (auto r : make_range(cv_score.size()))
     341         435 :     cv_score[r] = std::sqrt(cv_score[r]);
     342             : 
     343         246 :   return cv_score;
     344         246 : }
     345             : 
     346             : std::vector<Real>
     347        3000 : SurrogateTrainer::evaluateModelError(const SurrogateModel & surr)
     348             : {
     349        3000 :   std::vector<Real> error(1, 0.0);
     350             : 
     351        3000 :   if (_rval)
     352             :   {
     353        2850 :     Real model_eval = surr.evaluate(_predictor_data);
     354        2850 :     error[0] = MathUtils::pow(model_eval - (*_rval), 2);
     355             :   }
     356         150 :   else if (_rvecval)
     357             :   {
     358         150 :     error.resize(_rvecval->size());
     359             : 
     360             :     // Evaluate for vector response.
     361         150 :     std::vector<Real> model_eval(error.size());
     362         150 :     surr.evaluate(_predictor_data, model_eval);
     363        1650 :     for (auto r : make_range(_rvecval->size()))
     364        1500 :       error[r] = MathUtils::pow(model_eval[r] - (*_rvecval)[r], 2);
     365         150 :   }
     366             : 
     367        3000 :   return error;
     368           0 : }
     369             : 
     370             : void
     371      121415 : SurrogateTrainer::updatePredictorRow()
     372             : {
     373             :   unsigned int d = 0;
     374      126820 :   for (const auto & val : _pvals)
     375        5405 :     _predictor_data[d++] = *val;
     376      726965 :   for (const auto & col : _pcols)
     377      605550 :     _predictor_data[d++] = _row_data[col];
     378      121415 : }

Generated by: LCOV version 1.14