Line data Source code
1 : //* This file is part of the MOOSE framework
2 : //* https://mooseframework.inl.gov
3 : //*
4 : //* All rights reserved, see COPYRIGHT for full restrictions
5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 : //*
7 : //* Licensed under LGPL 2.1, please see LICENSE for details
8 : //* https://www.gnu.org/licenses/lgpl-2.1.html
9 :
10 : #include "GaussianProcess.h"
11 : #include "FEProblemBase.h"
12 :
13 : #include <petsctao.h>
14 : #include <petscdmda.h>
15 :
16 : #include "libmesh/petsc_vector.h"
17 : #include "libmesh/petsc_matrix.h"
18 :
19 : #include <cmath>
20 :
21 : #include "MooseRandom.h"
22 : #include "Shuffle.h"
23 :
24 : namespace StochasticTools
25 : {
26 :
27 358 : GaussianProcess::GPOptimizerOptions::GPOptimizerOptions(const bool show_every_nth_iteration,
28 : const unsigned int num_iter,
29 : const unsigned int batch_size,
30 : const Real learning_rate,
31 : const Real b1,
32 : const Real b2,
33 : const Real eps,
34 358 : const Real lambda)
35 358 : : show_every_nth_iteration(show_every_nth_iteration),
36 358 : num_iter(num_iter),
37 358 : batch_size(batch_size),
38 358 : learning_rate(learning_rate),
39 358 : b1(b1),
40 358 : b2(b2),
41 358 : eps(eps),
42 358 : lambda(lambda)
43 : {
44 358 : }
45 :
46 728 : GaussianProcess::GaussianProcess() {}
47 :
48 : void
49 354 : GaussianProcess::initialize(CovarianceFunctionBase * covariance_function,
50 : const std::vector<std::string> & params_to_tune,
51 : const std::vector<Real> & min,
52 : const std::vector<Real> & max)
53 : {
54 354 : linkCovarianceFunction(covariance_function);
55 354 : generateTuningMap(params_to_tune, min, max);
56 354 : }
57 :
58 : void
59 386 : GaussianProcess::linkCovarianceFunction(CovarianceFunctionBase * covariance_function)
60 : {
61 386 : _covariance_function = covariance_function;
62 386 : _covar_type = _covariance_function->type();
63 386 : _covar_name = _covariance_function->name();
64 386 : _covariance_function->dependentCovarianceTypes(_dependent_covar_types);
65 386 : _dependent_covar_names = _covariance_function->dependentCovarianceNames();
66 386 : _num_outputs = _covariance_function->numOutputs();
67 386 : }
68 :
69 : void
70 622 : GaussianProcess::setupCovarianceMatrix(const RealEigenMatrix & training_params,
71 : const RealEigenMatrix & training_data,
72 : const GPOptimizerOptions & opts)
73 : {
74 622 : const bool batch_decision = opts.batch_size > 0 && (opts.batch_size <= training_params.rows());
75 510 : _batch_size = batch_decision ? opts.batch_size : training_params.rows();
76 622 : _K.resize(_num_outputs * _batch_size, _num_outputs * _batch_size);
77 :
78 622 : if (_tuning_data.size())
79 398 : tuneHyperParamsAdam(training_params, training_data, opts);
80 :
81 622 : _K.resize(training_params.rows() * training_data.cols(),
82 : training_params.rows() * training_data.cols());
83 622 : _covariance_function->computeCovarianceMatrix(_K, training_params, training_params, true);
84 :
85 : RealEigenMatrix flattened_data =
86 622 : training_data.reshaped(training_params.rows() * training_data.cols(), 1);
87 :
88 : // Compute the Cholesky decomposition and inverse action of the covariance matrix
89 622 : setupStoredMatrices(flattened_data);
90 :
91 622 : _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
92 622 : }
93 :
94 : void
95 998622 : GaussianProcess::setupStoredMatrices(const RealEigenMatrix & input)
96 : {
97 998622 : _K_cho_decomp = _K.llt();
98 998622 : _K_results_solve = _K_cho_decomp.solve(input);
99 998622 : }
100 :
101 : void
102 354 : GaussianProcess::generateTuningMap(const std::vector<std::string> & params_to_tune,
103 : const std::vector<Real> & min_vector,
104 : const std::vector<Real> & max_vector)
105 : {
106 354 : _num_tunable = 0;
107 :
108 : const bool upper_bounds_specified = min_vector.size();
109 : const bool lower_bounds_specified = max_vector.size();
110 :
111 838 : for (const auto param_i : index_range(params_to_tune))
112 : {
113 : const auto & hp = params_to_tune[param_i];
114 484 : if (_covariance_function->isTunable(hp))
115 : {
116 : unsigned int size;
117 : Real min;
118 : Real max;
119 : // Get size and default min/max
120 484 : const bool found = _covariance_function->getTuningData(hp, size, min, max);
121 :
122 484 : if (!found)
123 0 : ::mooseError("The covariance parameter ", hp, " could not be found!");
124 :
125 : // Check for overridden min/max
126 484 : min = lower_bounds_specified ? min_vector[param_i] : min;
127 484 : max = upper_bounds_specified ? max_vector[param_i] : max;
128 : // Save data in tuple
129 : _tuning_data[hp] = std::make_tuple(_num_tunable, size, min, max);
130 484 : _num_tunable += size;
131 : }
132 : }
133 354 : }
134 :
135 : void
136 622 : GaussianProcess::standardizeParameters(RealEigenMatrix & data, bool keep_moments)
137 : {
138 622 : if (!keep_moments)
139 622 : _param_standardizer.computeSet(data);
140 622 : _param_standardizer.getStandardized(data);
141 622 : }
142 :
143 : void
144 622 : GaussianProcess::standardizeData(RealEigenMatrix & data, bool keep_moments)
145 : {
146 622 : if (!keep_moments)
147 622 : _data_standardizer.computeSet(data);
148 622 : _data_standardizer.getStandardized(data);
149 622 : }
150 :
151 : void
152 398 : GaussianProcess::tuneHyperParamsAdam(const RealEigenMatrix & training_params,
153 : const RealEigenMatrix & training_data,
154 : const GPOptimizerOptions & opts)
155 : {
156 398 : std::vector<Real> theta(_num_tunable, 0.0);
157 398 : _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
158 :
159 398 : mapToVec(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
160 :
161 : // Internal params for Adam; set to the recommended values in the paper
162 398 : Real b1 = opts.b1;
163 398 : Real b2 = opts.b2;
164 398 : Real eps = opts.eps;
165 :
166 398 : std::vector<Real> m0(_num_tunable, 0.0);
167 398 : std::vector<Real> v0(_num_tunable, 0.0);
168 :
169 : Real new_val;
170 : Real m_hat;
171 : Real v_hat;
172 : Real store_loss = 0.0;
173 : std::vector<Real> grad1;
174 :
175 : // Initialize randomizer
176 398 : std::vector<unsigned int> v_sequence(training_params.rows());
177 : std::iota(std::begin(v_sequence), std::end(v_sequence), 0);
178 398 : RealEigenMatrix inputs(_batch_size, training_params.cols());
179 398 : RealEigenMatrix outputs(_batch_size, training_data.cols());
180 398 : if (opts.show_every_nth_iteration)
181 : Moose::out << "OPTIMIZING GP HYPER-PARAMETERS USING Adam" << std::endl;
182 998398 : for (unsigned int ss = 0; ss < opts.num_iter; ++ss)
183 : {
184 : // Shuffle data
185 : MooseRandom generator;
186 998000 : generator.seed(0, 1980);
187 998000 : generator.saveState();
188 : MooseUtils::shuffle<unsigned int>(v_sequence, generator, 0);
189 16100000 : for (unsigned int ii = 0; ii < _batch_size; ++ii)
190 : {
191 52856000 : for (unsigned int jj = 0; jj < training_params.cols(); ++jj)
192 37754000 : inputs(ii, jj) = training_params(v_sequence[ii], jj);
193 :
194 30364000 : for (unsigned int jj = 0; jj < training_data.cols(); ++jj)
195 15262000 : outputs(ii, jj) = training_data(v_sequence[ii], jj);
196 : }
197 :
198 998000 : store_loss = getLoss(inputs, outputs);
199 998000 : if (opts.show_every_nth_iteration && ((ss + 1) % opts.show_every_nth_iteration == 0))
200 : Moose::out << "Iteration: " << ss + 1 << " LOSS: " << store_loss << std::endl;
201 1996000 : grad1 = getGradient(inputs);
202 3026000 : for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
203 : {
204 2028000 : const auto first_index = std::get<0>(iter->second);
205 2028000 : const auto num_entries = std::get<1>(iter->second);
206 5540000 : for (unsigned int ii = 0; ii < num_entries; ++ii)
207 : {
208 3512000 : const auto global_index = first_index + ii;
209 3512000 : m0[global_index] = b1 * m0[global_index] + (1 - b1) * grad1[global_index];
210 3512000 : v0[global_index] =
211 3512000 : b2 * v0[global_index] + (1 - b2) * grad1[global_index] * grad1[global_index];
212 3512000 : m_hat = m0[global_index] / (1 - std::pow(b1, (ss + 1)));
213 3512000 : v_hat = v0[global_index] / (1 - std::pow(b2, (ss + 1)));
214 3512000 : new_val = theta[global_index] - opts.learning_rate * m_hat / (std::sqrt(v_hat) + eps);
215 :
216 3512000 : const auto min_value = std::get<2>(iter->second);
217 3512000 : const auto max_value = std::get<3>(iter->second);
218 :
219 3512000 : theta[global_index] = std::min(std::max(new_val, min_value), max_value);
220 : }
221 : }
222 998000 : vecToMap(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
223 998000 : _covariance_function->loadHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
224 : }
225 398 : if (opts.show_every_nth_iteration)
226 : {
227 : Moose::out << "OPTIMIZED GP HYPER-PARAMETERS:" << std::endl;
228 244 : Moose::out << Moose::stringify(theta) << std::endl;
229 : Moose::out << "FINAL LOSS: " << store_loss << std::endl;
230 : }
231 398 : }
232 :
233 : Real
234 998000 : GaussianProcess::getLoss(RealEigenMatrix & inputs, RealEigenMatrix & outputs)
235 : {
236 998000 : _covariance_function->computeCovarianceMatrix(_K, inputs, inputs, true);
237 :
238 998000 : RealEigenMatrix flattened_data = outputs.reshaped(outputs.rows() * outputs.cols(), 1);
239 :
240 998000 : setupStoredMatrices(flattened_data);
241 :
242 : Real log_likelihood = 0;
243 998000 : log_likelihood += -(flattened_data.transpose() * _K_results_solve)(0, 0);
244 998000 : log_likelihood += -std::log(_K.determinant());
245 998000 : log_likelihood -= _batch_size * std::log(2 * M_PI);
246 998000 : log_likelihood = -log_likelihood / 2;
247 998000 : return log_likelihood;
248 : }
249 :
250 : std::vector<Real>
251 998000 : GaussianProcess::getGradient(RealEigenMatrix & inputs) const
252 : {
253 998000 : RealEigenMatrix dKdhp(_batch_size, _batch_size);
254 1996000 : RealEigenMatrix alpha = _K_results_solve * _K_results_solve.transpose();
255 : std::vector<Real> grad_vec;
256 998000 : grad_vec.resize(_num_tunable);
257 3026000 : for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
258 : {
259 2028000 : std::string hyper_param_name = iter->first;
260 2028000 : const auto first_index = std::get<0>(iter->second);
261 2028000 : const auto num_entries = std::get<1>(iter->second);
262 5540000 : for (unsigned int ii = 0; ii < num_entries; ++ii)
263 : {
264 3512000 : const auto global_index = first_index + ii;
265 3512000 : _covariance_function->computedKdhyper(dKdhp, inputs, hyper_param_name, ii);
266 7024000 : RealEigenMatrix tmp = alpha * dKdhp - _K_cho_decomp.solve(dKdhp);
267 3512000 : grad_vec[global_index] = -tmp.trace() / 2.0;
268 : }
269 : }
270 998000 : return grad_vec;
271 : }
272 :
273 : void
274 398 : GaussianProcess::mapToVec(
275 : const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
276 : tuning_data,
277 : const std::unordered_map<std::string, Real> & scalar_map,
278 : const std::unordered_map<std::string, std::vector<Real>> & vector_map,
279 : std::vector<Real> & vec) const
280 : {
281 1226 : for (auto iter : tuning_data)
282 : {
283 : const std::string & param_name = iter.first;
284 : const auto scalar_it = scalar_map.find(param_name);
285 828 : if (scalar_it != scalar_map.end())
286 398 : vec[std::get<0>(iter.second)] = scalar_it->second;
287 : else
288 : {
289 : const auto vector_it = vector_map.find(param_name);
290 430 : if (vector_it != vector_map.end())
291 1528 : for (unsigned int ii = 0; ii < std::get<1>(iter.second); ++ii)
292 1098 : vec[std::get<0>(iter.second) + ii] = (vector_it->second)[ii];
293 : }
294 : }
295 398 : }
296 :
297 : void
298 998000 : GaussianProcess::vecToMap(
299 : const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
300 : tuning_data,
301 : std::unordered_map<std::string, Real> & scalar_map,
302 : std::unordered_map<std::string, std::vector<Real>> & vector_map,
303 : const std::vector<Real> & vec) const
304 : {
305 3026000 : for (auto iter : tuning_data)
306 : {
307 : const std::string & param_name = iter.first;
308 2028000 : if (scalar_map.find(param_name) != scalar_map.end())
309 998000 : scalar_map[param_name] = vec[std::get<0>(iter.second)];
310 1030000 : else if (vector_map.find(param_name) != vector_map.end())
311 3544000 : for (unsigned int ii = 0; ii < std::get<1>(iter.second); ++ii)
312 2514000 : vector_map[param_name][ii] = vec[std::get<0>(iter.second) + ii];
313 : }
314 998000 : }
315 :
316 : } // StochasticTools namespace
317 :
318 : template <>
319 : void
320 20 : dataStore(std::ostream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
321 : {
322 : // Store the L matrix as opposed to the full matrix to avoid compounding
323 : // roundoff error and decomposition error
324 20 : RealEigenMatrix L(decomp.matrixL());
325 20 : dataStore(stream, L, context);
326 20 : }
327 :
328 : template <>
329 : void
330 32 : dataLoad(std::istream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
331 : {
332 : RealEigenMatrix L;
333 32 : dataLoad(stream, L, context);
334 32 : decomp.compute(L * L.transpose());
335 32 : }
336 :
337 : template <>
338 : void
339 20 : dataStore(std::ostream & stream, StochasticTools::GaussianProcess & gp_utils, void * context)
340 : {
341 20 : dataStore(stream, gp_utils.hyperparamMap(), context);
342 20 : dataStore(stream, gp_utils.hyperparamVectorMap(), context);
343 20 : dataStore(stream, gp_utils.covarType(), context);
344 20 : dataStore(stream, gp_utils.covarName(), context);
345 : dataStore(stream, gp_utils.covarNumOutputs(), context);
346 20 : dataStore(stream, gp_utils.dependentCovarNames(), context);
347 20 : dataStore(stream, gp_utils.dependentCovarTypes(), context);
348 20 : dataStore(stream, gp_utils.K(), context);
349 20 : dataStore(stream, gp_utils.KResultsSolve(), context);
350 20 : dataStore(stream, gp_utils.KCholeskyDecomp(), context);
351 20 : dataStore(stream, gp_utils.paramStandardizer(), context);
352 20 : dataStore(stream, gp_utils.dataStandardizer(), context);
353 20 : }
354 :
355 : template <>
356 : void
357 32 : dataLoad(std::istream & stream, StochasticTools::GaussianProcess & gp_utils, void * context)
358 : {
359 32 : dataLoad(stream, gp_utils.hyperparamMap(), context);
360 32 : dataLoad(stream, gp_utils.hyperparamVectorMap(), context);
361 32 : dataLoad(stream, gp_utils.covarType(), context);
362 32 : dataLoad(stream, gp_utils.covarName(), context);
363 : dataLoad(stream, gp_utils.covarNumOutputs(), context);
364 32 : dataLoad(stream, gp_utils.dependentCovarNames(), context);
365 32 : dataLoad(stream, gp_utils.dependentCovarTypes(), context);
366 32 : dataLoad(stream, gp_utils.K(), context);
367 32 : dataLoad(stream, gp_utils.KResultsSolve(), context);
368 32 : dataLoad(stream, gp_utils.KCholeskyDecomp(), context);
369 32 : dataLoad(stream, gp_utils.paramStandardizer(), context);
370 32 : dataLoad(stream, gp_utils.dataStandardizer(), context);
371 32 : }
|