Line data Source code
1 : //* This file is part of the MOOSE framework
2 : //* https://mooseframework.inl.gov
3 : //*
4 : //* All rights reserved, see COPYRIGHT for full restrictions
5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 : //*
7 : //* Licensed under LGPL 2.1, please see LICENSE for details
8 : //* https://www.gnu.org/licenses/lgpl-2.1.html
9 :
10 : #include "GaussianProcess.h"
11 : #include "FEProblemBase.h"
12 :
13 : #include <petsctao.h>
14 : #include <petscdmda.h>
15 :
16 : #include "libmesh/petsc_vector.h"
17 : #include "libmesh/petsc_matrix.h"
18 :
19 : #include <cmath>
20 :
21 : #include "MooseRandom.h"
22 : #include "Shuffle.h"
23 :
24 : namespace StochasticTools
25 : {
26 :
27 382 : GaussianProcess::GPOptimizerOptions::GPOptimizerOptions(const bool show_every_nth_iteration,
28 : const unsigned int num_iter,
29 : const unsigned int batch_size,
30 : const Real learning_rate,
31 : const Real b1,
32 : const Real b2,
33 : const Real eps,
34 382 : const Real lambda)
35 382 : : show_every_nth_iteration(show_every_nth_iteration),
36 382 : num_iter(num_iter),
37 382 : batch_size(batch_size),
38 382 : learning_rate(learning_rate),
39 382 : b1(b1),
40 382 : b2(b2),
41 382 : eps(eps),
42 382 : lambda(lambda)
43 : {
44 382 : }
45 :
46 777 : GaussianProcess::GaussianProcess() {}
47 :
48 : void
49 378 : GaussianProcess::initialize(CovarianceFunctionBase * covariance_function,
50 : const std::vector<std::string> & params_to_tune,
51 : const std::vector<Real> & min,
52 : const std::vector<Real> & max)
53 : {
54 378 : linkCovarianceFunction(covariance_function);
55 378 : generateTuningMap(params_to_tune, min, max);
56 378 : }
57 :
58 : void
59 412 : GaussianProcess::linkCovarianceFunction(CovarianceFunctionBase * covariance_function)
60 : {
61 412 : _covariance_function = covariance_function;
62 412 : _covar_type = _covariance_function->type();
63 412 : _covar_name = _covariance_function->name();
64 412 : _covariance_function->dependentCovarianceTypes(_dependent_covar_types);
65 412 : _dependent_covar_names = _covariance_function->dependentCovarianceNames();
66 412 : _num_outputs = _covariance_function->numOutputs();
67 412 : }
68 :
69 : void
70 665 : GaussianProcess::setupCovarianceMatrix(const RealEigenMatrix & training_params,
71 : const RealEigenMatrix & training_data,
72 : const GPOptimizerOptions & opts)
73 : {
74 665 : const bool batch_decision = opts.batch_size > 0 && (opts.batch_size <= training_params.rows());
75 546 : _batch_size = batch_decision ? opts.batch_size : training_params.rows();
76 665 : _K.resize(_num_outputs * _batch_size, _num_outputs * _batch_size);
77 :
78 665 : if (_tuning_data.size())
79 427 : tuneHyperParamsAdam(training_params, training_data, opts);
80 :
81 665 : _K.resize(training_params.rows() * training_data.cols(),
82 : training_params.rows() * training_data.cols());
83 665 : _covariance_function->computeCovarianceMatrix(_K, training_params, training_params, true);
84 :
85 : RealEigenMatrix flattened_data =
86 665 : training_data.reshaped(training_params.rows() * training_data.cols(), 1);
87 :
88 : // Compute the Cholesky decomposition and inverse action of the covariance matrix
89 665 : setupStoredMatrices(flattened_data);
90 :
91 665 : _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
92 665 : }
93 :
94 : void
95 1078665 : GaussianProcess::setupStoredMatrices(const RealEigenMatrix & input)
96 : {
97 1078665 : _K_cho_decomp = _K.llt();
98 1078665 : _K_results_solve = _K_cho_decomp.solve(input);
99 1078665 : }
100 :
101 : void
102 378 : GaussianProcess::generateTuningMap(const std::vector<std::string> & params_to_tune,
103 : const std::vector<Real> & min_vector,
104 : const std::vector<Real> & max_vector)
105 : {
106 378 : _num_tunable = 0;
107 :
108 : const bool upper_bounds_specified = min_vector.size();
109 : const bool lower_bounds_specified = max_vector.size();
110 :
111 896 : for (const auto param_i : index_range(params_to_tune))
112 : {
113 : const auto & hp = params_to_tune[param_i];
114 518 : if (_covariance_function->isTunable(hp))
115 : {
116 : unsigned int size;
117 : Real min;
118 : Real max;
119 : // Get size and default min/max
120 518 : const bool found = _covariance_function->getTuningData(hp, size, min, max);
121 :
122 518 : if (!found)
123 0 : ::mooseError("The covariance parameter ", hp, " could not be found!");
124 :
125 : // Check for overridden min/max
126 518 : min = lower_bounds_specified ? min_vector[param_i] : min;
127 518 : max = upper_bounds_specified ? max_vector[param_i] : max;
128 : // Save data in tuple
129 : _tuning_data[hp] = std::make_tuple(_num_tunable, size, min, max);
130 518 : _num_tunable += size;
131 : }
132 : }
133 378 : }
134 :
135 : void
136 665 : GaussianProcess::standardizeParameters(RealEigenMatrix & data, bool keep_moments)
137 : {
138 665 : if (!keep_moments)
139 665 : _param_standardizer.computeSet(data);
140 665 : _param_standardizer.getStandardized(data);
141 665 : }
142 :
143 : void
144 665 : GaussianProcess::standardizeData(RealEigenMatrix & data, bool keep_moments)
145 : {
146 665 : if (!keep_moments)
147 665 : _data_standardizer.computeSet(data);
148 665 : _data_standardizer.getStandardized(data);
149 665 : }
150 :
151 : void
152 427 : GaussianProcess::tuneHyperParamsAdam(const RealEigenMatrix & training_params,
153 : const RealEigenMatrix & training_data,
154 : const GPOptimizerOptions & opts)
155 : {
156 427 : std::vector<Real> theta(_num_tunable, 0.0);
157 427 : _covariance_function->buildHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
158 :
159 427 : mapToVec(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
160 :
161 : // Internal params for Adam; set to the recommended values in the paper
162 427 : Real b1 = opts.b1;
163 427 : Real b2 = opts.b2;
164 427 : Real eps = opts.eps;
165 :
166 427 : std::vector<Real> m0(_num_tunable, 0.0);
167 427 : std::vector<Real> v0(_num_tunable, 0.0);
168 :
169 : Real new_val;
170 : Real m_hat;
171 : Real v_hat;
172 : Real store_loss = 0.0;
173 : std::vector<Real> grad1;
174 :
175 : // Initialize randomizer
176 427 : std::vector<unsigned int> v_sequence(training_params.rows());
177 : std::iota(std::begin(v_sequence), std::end(v_sequence), 0);
178 427 : RealEigenMatrix inputs(_batch_size, training_params.cols());
179 427 : RealEigenMatrix outputs(_batch_size, training_data.cols());
180 427 : if (opts.show_every_nth_iteration)
181 : Moose::out << "OPTIMIZING GP HYPER-PARAMETERS USING Adam" << std::endl;
182 1078427 : for (unsigned int ss = 0; ss < opts.num_iter; ++ss)
183 : {
184 : // Shuffle data
185 : MooseRandom generator;
186 1078000 : generator.seed(0, 1980);
187 1078000 : generator.saveState();
188 : MooseUtils::shuffle<unsigned int>(v_sequence, generator, 0);
189 17401000 : for (unsigned int ii = 0; ii < _batch_size; ++ii)
190 : {
191 57268000 : for (unsigned int jj = 0; jj < training_params.cols(); ++jj)
192 40945000 : inputs(ii, jj) = training_params(v_sequence[ii], jj);
193 :
194 32816000 : for (unsigned int jj = 0; jj < training_data.cols(); ++jj)
195 16493000 : outputs(ii, jj) = training_data(v_sequence[ii], jj);
196 : }
197 :
198 1078000 : store_loss = getLoss(inputs, outputs);
199 1078000 : if (opts.show_every_nth_iteration && ((ss + 1) % opts.show_every_nth_iteration == 0))
200 : Moose::out << "Iteration: " << ss + 1 << " LOSS: " << store_loss << std::endl;
201 2156000 : grad1 = getGradient(inputs);
202 3268000 : for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
203 : {
204 2190000 : const auto first_index = std::get<0>(iter->second);
205 2190000 : const auto num_entries = std::get<1>(iter->second);
206 5992000 : for (unsigned int ii = 0; ii < num_entries; ++ii)
207 : {
208 3802000 : const auto global_index = first_index + ii;
209 3802000 : m0[global_index] = b1 * m0[global_index] + (1 - b1) * grad1[global_index];
210 3802000 : v0[global_index] =
211 3802000 : b2 * v0[global_index] + (1 - b2) * grad1[global_index] * grad1[global_index];
212 3802000 : m_hat = m0[global_index] / (1 - std::pow(b1, (ss + 1)));
213 3802000 : v_hat = v0[global_index] / (1 - std::pow(b2, (ss + 1)));
214 3802000 : new_val = theta[global_index] - opts.learning_rate * m_hat / (std::sqrt(v_hat) + eps);
215 :
216 3802000 : const auto min_value = std::get<2>(iter->second);
217 3802000 : const auto max_value = std::get<3>(iter->second);
218 :
219 3802000 : theta[global_index] = std::min(std::max(new_val, min_value), max_value);
220 : }
221 : }
222 1078000 : vecToMap(_tuning_data, _hyperparam_map, _hyperparam_vec_map, theta);
223 1078000 : _covariance_function->loadHyperParamMap(_hyperparam_map, _hyperparam_vec_map);
224 : }
225 427 : if (opts.show_every_nth_iteration)
226 : {
227 : Moose::out << "OPTIMIZED GP HYPER-PARAMETERS:" << std::endl;
228 266 : Moose::out << Moose::stringify(theta) << std::endl;
229 : Moose::out << "FINAL LOSS: " << store_loss << std::endl;
230 : }
231 427 : }
232 :
233 : Real
234 1078000 : GaussianProcess::getLoss(RealEigenMatrix & inputs, RealEigenMatrix & outputs)
235 : {
236 1078000 : _covariance_function->computeCovarianceMatrix(_K, inputs, inputs, true);
237 :
238 1078000 : RealEigenMatrix flattened_data = outputs.reshaped(outputs.rows() * outputs.cols(), 1);
239 :
240 1078000 : setupStoredMatrices(flattened_data);
241 :
242 : Real log_likelihood = 0;
243 1078000 : log_likelihood += -(flattened_data.transpose() * _K_results_solve)(0, 0);
244 1078000 : log_likelihood += -std::log(_K.determinant());
245 1078000 : log_likelihood -= _batch_size * std::log(2 * M_PI);
246 1078000 : log_likelihood = -log_likelihood / 2;
247 1078000 : return log_likelihood;
248 : }
249 :
250 : std::vector<Real>
251 1078000 : GaussianProcess::getGradient(RealEigenMatrix & inputs) const
252 : {
253 1078000 : RealEigenMatrix dKdhp(_batch_size, _batch_size);
254 2156000 : RealEigenMatrix alpha = _K_results_solve * _K_results_solve.transpose();
255 : std::vector<Real> grad_vec;
256 1078000 : grad_vec.resize(_num_tunable);
257 3268000 : for (auto iter = _tuning_data.begin(); iter != _tuning_data.end(); ++iter)
258 : {
259 2190000 : std::string hyper_param_name = iter->first;
260 2190000 : const auto first_index = std::get<0>(iter->second);
261 2190000 : const auto num_entries = std::get<1>(iter->second);
262 5992000 : for (unsigned int ii = 0; ii < num_entries; ++ii)
263 : {
264 3802000 : const auto global_index = first_index + ii;
265 3802000 : _covariance_function->computedKdhyper(dKdhp, inputs, hyper_param_name, ii);
266 7604000 : RealEigenMatrix tmp = alpha * dKdhp - _K_cho_decomp.solve(dKdhp);
267 3802000 : grad_vec[global_index] = -tmp.trace() / 2.0;
268 : }
269 : }
270 1078000 : return grad_vec;
271 0 : }
272 :
273 : void
274 427 : GaussianProcess::mapToVec(
275 : const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
276 : tuning_data,
277 : const std::unordered_map<std::string, Real> & scalar_map,
278 : const std::unordered_map<std::string, std::vector<Real>> & vector_map,
279 : std::vector<Real> & vec) const
280 : {
281 1315 : for (auto iter : tuning_data)
282 : {
283 : const std::string & param_name = iter.first;
284 : const auto scalar_it = scalar_map.find(param_name);
285 888 : if (scalar_it != scalar_map.end())
286 427 : vec[std::get<0>(iter.second)] = scalar_it->second;
287 : else
288 : {
289 : const auto vector_it = vector_map.find(param_name);
290 461 : if (vector_it != vector_map.end())
291 1640 : for (unsigned int ii = 0; ii < std::get<1>(iter.second); ++ii)
292 1179 : vec[std::get<0>(iter.second) + ii] = (vector_it->second)[ii];
293 : }
294 : }
295 427 : }
296 :
297 : void
298 1078000 : GaussianProcess::vecToMap(
299 : const std::unordered_map<std::string, std::tuple<unsigned int, unsigned int, Real, Real>> &
300 : tuning_data,
301 : std::unordered_map<std::string, Real> & scalar_map,
302 : std::unordered_map<std::string, std::vector<Real>> & vector_map,
303 : const std::vector<Real> & vec) const
304 : {
305 3268000 : for (auto iter : tuning_data)
306 : {
307 : const std::string & param_name = iter.first;
308 2190000 : if (scalar_map.find(param_name) != scalar_map.end())
309 1078000 : scalar_map[param_name] = vec[std::get<0>(iter.second)];
310 1112000 : else if (vector_map.find(param_name) != vector_map.end())
311 3836000 : for (unsigned int ii = 0; ii < std::get<1>(iter.second); ++ii)
312 2724000 : vector_map[param_name][ii] = vec[std::get<0>(iter.second) + ii];
313 : }
314 1078000 : }
315 :
316 : } // StochasticTools namespace
317 :
318 : template <>
319 : void
320 22 : dataStore(std::ostream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
321 : {
322 : // Store the L matrix as opposed to the full matrix to avoid compounding
323 : // roundoff error and decomposition error
324 22 : RealEigenMatrix L(decomp.matrixL());
325 22 : dataStore(stream, L, context);
326 22 : }
327 :
328 : template <>
329 : void
330 34 : dataLoad(std::istream & stream, Eigen::LLT<RealEigenMatrix> & decomp, void * context)
331 : {
332 : RealEigenMatrix L;
333 34 : dataLoad(stream, L, context);
334 34 : decomp.compute(L * L.transpose());
335 34 : }
336 :
337 : template <>
338 : void
339 22 : dataStore(std::ostream & stream, StochasticTools::GaussianProcess & gp_utils, void * context)
340 : {
341 22 : dataStore(stream, gp_utils.hyperparamMap(), context);
342 22 : dataStore(stream, gp_utils.hyperparamVectorMap(), context);
343 22 : dataStore(stream, gp_utils.covarType(), context);
344 22 : dataStore(stream, gp_utils.covarName(), context);
345 : dataStore(stream, gp_utils.covarNumOutputs(), context);
346 22 : dataStore(stream, gp_utils.dependentCovarNames(), context);
347 22 : dataStore(stream, gp_utils.dependentCovarTypes(), context);
348 22 : dataStore(stream, gp_utils.K(), context);
349 22 : dataStore(stream, gp_utils.KResultsSolve(), context);
350 22 : dataStore(stream, gp_utils.KCholeskyDecomp(), context);
351 22 : dataStore(stream, gp_utils.paramStandardizer(), context);
352 22 : dataStore(stream, gp_utils.dataStandardizer(), context);
353 22 : }
354 :
355 : template <>
356 : void
357 34 : dataLoad(std::istream & stream, StochasticTools::GaussianProcess & gp_utils, void * context)
358 : {
359 34 : dataLoad(stream, gp_utils.hyperparamMap(), context);
360 34 : dataLoad(stream, gp_utils.hyperparamVectorMap(), context);
361 34 : dataLoad(stream, gp_utils.covarType(), context);
362 34 : dataLoad(stream, gp_utils.covarName(), context);
363 : dataLoad(stream, gp_utils.covarNumOutputs(), context);
364 34 : dataLoad(stream, gp_utils.dependentCovarNames(), context);
365 34 : dataLoad(stream, gp_utils.dependentCovarTypes(), context);
366 34 : dataLoad(stream, gp_utils.K(), context);
367 34 : dataLoad(stream, gp_utils.KResultsSolve(), context);
368 34 : dataLoad(stream, gp_utils.KCholeskyDecomp(), context);
369 34 : dataLoad(stream, gp_utils.paramStandardizer(), context);
370 34 : dataLoad(stream, gp_utils.dataStandardizer(), context);
371 34 : }
|