Line data Source code
1 : //* This file is part of the MOOSE framework
2 : //* https://mooseframework.inl.gov
3 : //*
4 : //* All rights reserved, see COPYRIGHT for full restrictions
5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 : //*
7 : //* Licensed under LGPL 2.1, please see LICENSE for details
8 : //* https://www.gnu.org/licenses/lgpl-2.1.html
9 :
10 : #include "GaussianProcess.h"
11 : #include "FEProblemBase.h"
12 :
13 : #include <petsctao.h>
14 : #include <petscdmda.h>
15 :
16 : #include "libmesh/petsc_vector.h"
17 : #include "libmesh/petsc_matrix.h"
18 :
19 : #include <cmath>
20 :
21 : #include "MooseRandom.h"
22 : #include "Shuffle.h"
23 :
24 : namespace StochasticTools
25 : {
26 :
27 215 : GaussianProcess::GPOptimizerOptions::GPOptimizerOptions(const bool show_every_nth_iteration,
28 : const unsigned int num_iter,
29 : const unsigned int batch_size,
30 : const Real learning_rate,
31 : const Real b1,
32 : const Real b2,
33 : const Real eps,
34 215 : const Real lambda)
35 215 : : show_every_nth_iteration(show_every_nth_iteration),
36 215 : num_iter(num_iter),
37 215 : batch_size(batch_size),
38 215 : learning_rate(learning_rate),
39 215 : b1(b1),
40 215 : b2(b2),
41 215 : eps(eps),
42 215 : lambda(lambda)
43 : {
44 215 : }
45 :
46 435 : GaussianProcess::GaussianProcess() {}
47 :
48 : void
49 213 : GaussianProcess::initialize(CovarianceFunctionBase * covariance_function,
50 : const std::vector<std::string> & params_to_tune,
51 : const std::vector<Real> & min,
52 : const std::vector<Real> & max)
53 : {
54 213 : linkCovarianceFunction(covariance_function);
55 213 : generateTuningMap(params_to_tune, min, max);
56 213 : }
57 :
58 : void
59 227 : GaussianProcess::linkCovarianceFunction(CovarianceFunctionBase * covariance_function)
60 : {
61 227 : _covariance_function = covariance_function;
62 227 : _covar_type = _covariance_function->type();
63 227 : _covar_name = _covariance_function->name();
64 227 : _covariance_function->dependentCovarianceTypes(_dependent_covar_types);
65 227 : _dependent_covar_names = _covariance_function->dependentCovarianceNames();
66 227 : _num_outputs = _covariance_function->numOutputs();
67 227 : }
68 :
69 : void
70 442 : GaussianProcess::setupCovarianceMatrix(const RealEigenMatrix & training_params,
71 : const RealEigenMatrix & training_data,
72 : const GPOptimizerOptions & opts)
73 : {
74 442 : const bool batch_decision = opts.batch_size > 0 && (opts.batch_size <= training_params.rows());
75 393 : _batch_size = batch_decision ? opts.batch_size : training_params.rows();
76 442 : _K.resize(_num_outputs * _batch_size, _num_outputs * _batch_size);
77 :
78 442 : if (_tuning_data.size())
79 288 : tuneHyperParamsAdam(training_params, training_data, opts);
80 :
81 442 : _K.resize(training_params.rows() * training_data.cols(),
82 : training_params.rows() * training_data.cols());
83 442 : _covariance_function->computeCovarianceMatrix(_K, training_params, training_params, true);
84 :
85 : RealEigenMatrix flattened_data =
86 442 : training_data.reshaped(training_params.rows() * training_data.cols(), 1);
87 :
88 : // Compute the Cholesky decomposition and inverse action of the covariance matrix
89 442 : setupStoredMatrices(flattened_data);
90 :
91 442 : _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
92 442 : }
93 :
94 : void
95 515442 : GaussianProcess::setupStoredMatrices(const RealEigenMatrix & input)
96 : {
97 515442 : _K_cho_decomp = _K.llt();
98 515442 : _K_results_solve = _K_cho_decomp.solve(input);
99 515442 : }
100 :
101 : void
102 213 : GaussianProcess::generateTuningMap(const std::vector<std::string> & params_to_tune,
103 : const std::vector<Real> & min_vector,
104 : const std::vector<Real> & max_vector)
105 : {
106 213 : _num_tunable = 0;
107 :
108 : const bool upper_bounds_specified = min_vector.size();
109 : const bool lower_bounds_specified = max_vector.size();
110 :
111 525 : for (const auto param_i : index_range(params_to_tune))
112 : {
113 : const auto & hp = params_to_tune[param_i];
114 312 : if (_covariance_function->isTunable(hp))
115 : {
116 : unsigned int size;
117 : Real min;
118 : Real max;
119 : // Get size and default min/max
120 312 : const bool found = _covariance_function->getTuningData(hp, size, min, max);
121 :
122 312 : if (!found)
123 0 : ::mooseError("The covariance parameter ", hp, " could not be found!");
124 :
125 : // Check for overridden min/max
126 312 : min = lower_bounds_specified ? min_vector[param_i] : min;
127 312 : max = upper_bounds_specified ? max_vector[param_i] : max;
128 : // Save data in tuple
129 : _tuning_data[hp] = std::make_tuple(_num_tunable, size, min, max);
130 312 : _num_tunable += size;
131 : }
132 : }
133 213 : }
134 :
135 : void
136 442 : GaussianProcess::standardizeParameters(RealEigenMatrix & data, bool keep_moments)
137 : {
138 442 : if (!keep_moments)
139 442 : _param_standardizer.computeSet(data);
140 442 : _param_standardizer.getStandardized(data);
141 442 : }
142 :
143 : void
144 442 : GaussianProcess::standardizeData(RealEigenMatrix & data, bool keep_moments)
145 : {
146 442 : if (!keep_moments)
147 442 : _data_standardizer.computeSet(data);
148 442 : _data_standardizer.getStandardized(data);
149 442 : }
150 :
151 : void
152 288 : GaussianProcess::tuneHyperParamsAdam(const RealEigenMatrix & training_params,
153 : const RealEigenMatrix & training_data,
154 : const GPOptimizerOptions & opts)
155 : {
156 288 : std::vector<Real> theta(_num_tunable, 0.0);
157 288 : _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
158 :
159 288 : mapToVec(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
160 :
161 : // Internal params for Adam; set to the recommended values in the paper
162 288 : Real b1 = opts.b1;
163 288 : Real b2 = opts.b2;
164 288 : Real eps = opts.eps;
165 : static constexpr Real lambda = 1e-4;
166 :
167 288 : std::vector<Real> m0(_num_tunable, 0.0);
168 288 : std::vector<Real> v0(_num_tunable, 0.0);
169 :
170 : Real new_val;
171 : Real m_hat;
172 : Real v_hat;
173 : Real store_loss = 0.0;
174 : std::vector<Real> grad1;
175 :
176 : // Initialize randomizer
177 288 : std::vector<unsigned int> v_sequence(training_params.rows());
178 : std::iota(std::begin(v_sequence), std::end(v_sequence), 0);
179 288 : RealEigenMatrix inputs(_batch_size, training_params.cols());
180 288 : RealEigenMatrix outputs(_batch_size, training_data.cols());
181 288 : if (opts.show_every_nth_iteration)
182 : Moose::out << "OPTIMIZING GP HYPER-PARAMETERS USING Adam" << std::endl;
183 515288 : for (unsigned int ss = 0; ss < opts.num_iter; ++ss)
184 : {
185 : // Shuffle data
186 : MooseRandom generator;
187 515000 : generator.seed(0, 1980);
188 515000 : generator.saveState();
189 : MooseUtils::shuffle<unsigned int>(v_sequence, generator, 0);
190 8119000 : for (unsigned int ii = 0; ii < _batch_size; ++ii)
191 : {
192 26542000 : for (unsigned int jj = 0; jj < training_params.cols(); ++jj)
193 18938000 : inputs(ii, jj) = training_params(v_sequence[ii], jj);
194 :
195 15278000 : for (unsigned int jj = 0; jj < training_data.cols(); ++jj)
196 7674000 : outputs(ii, jj) = training_data(v_sequence[ii], jj);
197 : }
198 :
199 515000 : store_loss = getLoss(inputs, outputs);
200 515000 : if (opts.show_every_nth_iteration && ((ss + 1) % opts.show_every_nth_iteration == 0))
201 : Moose::out << "Iteration: " << ss + 1 << " LOSS: " << store_loss << std::endl;
202 1030000 : grad1 = getGradient(inputs);
203 1559000 : for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
204 : {
205 1044000 : const auto first_index = std::get<0>(iter->second);
206 1044000 : const auto num_entries = std::get<1>(iter->second);
207 2840000 : for (unsigned int ii = 0; ii < num_entries; ++ii)
208 : {
209 1796000 : const auto global_index = first_index + ii;
210 1796000 : m0[global_index] = b1 * m0[global_index] + (1 - b1) * grad1[global_index];
211 1796000 : v0[global_index] =
212 1796000 : b2 * v0[global_index] + (1 - b2) * grad1[global_index] * grad1[global_index];
213 1796000 : m_hat = m0[global_index] / (1 - std::pow(b1, (ss + 1)));
214 1796000 : v_hat = v0[global_index] / (1 - std::pow(b2, (ss + 1)));
215 1796000 : new_val =
216 1796000 : theta[global_index] - 1.0 * (opts.learning_rate * m_hat / (std::sqrt(v_hat) + eps) +
217 1796000 : lambda * theta[global_index]);
218 :
219 1796000 : const auto min_value = std::get<2>(iter->second);
220 1796000 : const auto max_value = std::get<3>(iter->second);
221 :
222 1796000 : theta[global_index] = std::min(std::max(new_val, min_value), max_value);
223 : }
224 : }
225 515000 : vecToMap(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
226 515000 : _covariance_function->loadHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
227 : }
228 288 : if (opts.show_every_nth_iteration)
229 : {
230 : Moose::out << "OPTIMIZED GP HYPER-PARAMETERS:" << std::endl;
231 116 : Moose::out << Moose::stringify(theta) << std::endl;
232 : Moose::out << "FINAL LOSS: " << store_loss << std::endl;
233 : }
234 :
235 288 : if (theta.size() > 0)
236 : {
237 : unsigned int count = 1;
238 288 : _length_scales.resize(_num_tunable - count);
239 1002 : for (unsigned int i = 0; i < _num_tunable - count; ++i)
240 714 : _length_scales[i] = theta[i + 1];
241 : }
242 288 : }
243 :
244 : Real
245 515000 : GaussianProcess::getLoss(RealEigenMatrix & inputs, RealEigenMatrix & outputs)
246 : {
247 515000 : _covariance_function->computeCovarianceMatrix(_K, inputs, inputs, true);
248 :
249 515000 : RealEigenMatrix flattened_data = outputs.reshaped(outputs.rows() * outputs.cols(), 1);
250 :
251 515000 : setupStoredMatrices(flattened_data);
252 :
253 : Real log_likelihood = 0;
254 515000 : log_likelihood += -(flattened_data.transpose() * _K_results_solve)(0, 0);
255 515000 : log_likelihood += -std::log(_K.determinant());
256 515000 : log_likelihood -= _batch_size * std::log(2 * M_PI);
257 515000 : log_likelihood = -log_likelihood / 2;
258 515000 : return log_likelihood;
259 : }
260 :
261 : std::vector<Real>
262 515000 : GaussianProcess::getGradient(RealEigenMatrix & inputs) const
263 : {
264 515000 : RealEigenMatrix dKdhp(_batch_size, _batch_size);
265 1030000 : RealEigenMatrix alpha = _K_results_solve * _K_results_solve.transpose();
266 : std::vector<Real> grad_vec;
267 515000 : grad_vec.resize(_num_tunable);
268 1559000 : for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
269 : {
270 1044000 : std::string hyper_param_name = iter->first;
271 1044000 : const auto first_index = std::get<0>(iter->second);
272 1044000 : const auto num_entries = std::get<1>(iter->second);
273 2840000 : for (unsigned int ii = 0; ii < num_entries; ++ii)
274 : {
275 1796000 : const auto global_index = first_index + ii;
276 1796000 : _covariance_function->computedKdhyper(dKdhp, inputs, hyper_param_name, ii);
277 3592000 : RealEigenMatrix tmp = alpha * dKdhp - _K_cho_decomp.solve(dKdhp);
278 1796000 : grad_vec[global_index] = -tmp.trace() / 2.0;
279 : }
280 : }
281 515000 : return grad_vec;
282 0 : }
283 :
284 : void
285 288 : GaussianProcess::mapToVec(
286 : const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
287 : tuning_data,
288 : const std::unordered_map<std::string, Real> & scalar_map,
289 : const std::unordered_map<std::string, std::vector<Real>> & vector_map,
290 : std::vector<Real> & vec) const
291 : {
292 878 : for (auto iter : tuning_data)
293 : {
294 : const std::string & param_name = iter.first;
295 : const auto scalar_it = scalar_map.find(param_name);
296 590 : if (scalar_it != scalar_map.end())
297 288 : vec[std::get<0>(iter.second)] = scalar_it->second;
298 : else
299 : {
300 : const auto vector_it = vector_map.find(param_name);
301 302 : if (vector_it != vector_map.end())
302 1016 : for (unsigned int ii = 0; ii < std::get<1>(iter.second); ++ii)
303 714 : vec[std::get<0>(iter.second) + ii] = (vector_it->second)[ii];
304 : }
305 : }
306 288 : }
307 :
308 : void
309 515000 : GaussianProcess::vecToMap(
310 : const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
311 : tuning_data,
312 : std::unordered_map<std::string, Real> & scalar_map,
313 : std::unordered_map<std::string, std::vector<Real>> & vector_map,
314 : const std::vector<Real> & vec) const
315 : {
316 1559000 : for (auto iter : tuning_data)
317 : {
318 : const std::string & param_name = iter.first;
319 1044000 : if (scalar_map.find(param_name) != scalar_map.end())
320 515000 : scalar_map[param_name] = vec[std::get<0>(iter.second)];
321 529000 : else if (vector_map.find(param_name) != vector_map.end())
322 1810000 : for (unsigned int ii = 0; ii < std::get<1>(iter.second); ++ii)
323 1281000 : vector_map[param_name][ii] = vec[std::get<0>(iter.second) + ii];
324 : }
325 515000 : }
326 :
327 : } // StochasticTools namespace
328 :
329 : template <>
330 : void
331 24 : dataStore(std::ostream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
332 : {
333 : // Store the L matrix as opposed to the full matrix to avoid compounding
334 : // roundoff error and decomposition error
335 24 : RealEigenMatrix L(decomp.matrixL());
336 24 : dataStore(stream, L, context);
337 24 : }
338 :
339 : template <>
340 : void
341 14 : dataLoad(std::istream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
342 : {
343 : RealEigenMatrix L;
344 14 : dataLoad(stream, L, context);
345 14 : decomp.compute(L * L.transpose());
346 14 : }
347 :
348 : template <>
349 : void
350 24 : dataStore(std::ostream & stream, StochasticTools::GaussianProcess & gp_utils, void * context)
351 : {
352 24 : dataStore(stream, gp_utils.hyperparamMap(), context);
353 24 : dataStore(stream, gp_utils.hyperparamVectorMap(), context);
354 24 : dataStore(stream, gp_utils.covarType(), context);
355 24 : dataStore(stream, gp_utils.covarName(), context);
356 : dataStore(stream, gp_utils.covarNumOutputs(), context);
357 24 : dataStore(stream, gp_utils.dependentCovarNames(), context);
358 24 : dataStore(stream, gp_utils.dependentCovarTypes(), context);
359 24 : dataStore(stream, gp_utils.K(), context);
360 24 : dataStore(stream, gp_utils.KResultsSolve(), context);
361 24 : dataStore(stream, gp_utils.KCholeskyDecomp(), context);
362 24 : dataStore(stream, gp_utils.paramStandardizer(), context);
363 24 : dataStore(stream, gp_utils.dataStandardizer(), context);
364 24 : }
365 :
366 : template <>
367 : void
368 14 : dataLoad(std::istream & stream, StochasticTools::GaussianProcess & gp_utils, void * context)
369 : {
370 14 : dataLoad(stream, gp_utils.hyperparamMap(), context);
371 14 : dataLoad(stream, gp_utils.hyperparamVectorMap(), context);
372 14 : dataLoad(stream, gp_utils.covarType(), context);
373 14 : dataLoad(stream, gp_utils.covarName(), context);
374 : dataLoad(stream, gp_utils.covarNumOutputs(), context);
375 14 : dataLoad(stream, gp_utils.dependentCovarNames(), context);
376 14 : dataLoad(stream, gp_utils.dependentCovarTypes(), context);
377 14 : dataLoad(stream, gp_utils.K(), context);
378 14 : dataLoad(stream, gp_utils.KResultsSolve(), context);
379 14 : dataLoad(stream, gp_utils.KCholeskyDecomp(), context);
380 14 : dataLoad(stream, gp_utils.paramStandardizer(), context);
381 14 : dataLoad(stream, gp_utils.dataStandardizer(), context);
382 14 : }
|