Line data Source code
1 : //* This file is part of the MOOSE framework
2 : //* https://www.mooseframework.org
3 : //*
4 : //* All rights reserved, see COPYRIGHT for full restrictions
5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 : //*
7 : //* Licensed under LGPL 2.1, please see LICENSE for details
8 : //* https://www.gnu.org/licenses/lgpl-2.1.html
9 :
10 : #include "KokkosAssembly.h"
11 :
12 : #include "MooseMesh.h"
13 : #include "FEProblemBase.h"
14 : #include "NonlinearSystemBase.h"
15 : #include "AuxiliarySystem.h"
16 : #include "Assembly.h"
17 : #include "BoundaryRestrictable.h"
18 :
19 : #include "libmesh/fe_interface.h"
20 : #include "libmesh/reference_elem.h"
21 :
22 : namespace Moose
23 : {
24 : namespace Kokkos
25 : {
26 :
27 43070 : Assembly::Assembly(FEProblemBase & problem)
28 42580 : : MeshHolder(*problem.mesh().getKokkosMesh()),
29 42580 : _problem(problem),
30 42580 : _mesh(problem.mesh()),
31 127740 : _dimension(_mesh.dimension())
32 : {
33 43070 : }
34 :
35 : void
36 741 : Assembly::init()
37 : {
38 : // Cache mesh information
39 :
40 741 : auto num_subdomains = _mesh.nSubdomains();
41 :
42 741 : _coord_type.create(num_subdomains);
43 :
44 1532 : for (auto subdomain : _mesh.meshSubdomains())
45 791 : _coord_type[kokkosMesh().getContiguousSubdomainID(subdomain)] = _mesh.getCoordSystem(subdomain);
46 :
47 741 : _coord_type.copyToDevice();
48 :
49 741 : if (_mesh.usingGeneralAxisymmetricCoordAxes())
50 : {
51 0 : _rz_axis.create(num_subdomains);
52 :
53 0 : for (auto subdomain : _mesh.meshSubdomains())
54 0 : _rz_axis[kokkosMesh().getContiguousSubdomainID(subdomain)] =
55 0 : _mesh.getGeneralAxisymmetricCoordAxis(subdomain);
56 :
57 0 : _rz_axis.copyToDevice();
58 : }
59 : else
60 741 : _rz_radial_coord = _mesh.getAxisymmetricRadialCoord();
61 :
62 : // Initialize quadrature and shape data
63 :
64 741 : initQuadrature();
65 741 : initShape();
66 741 : cachePhysicalMap();
67 741 : }
68 :
69 : void
70 741 : Assembly::initQuadrature()
71 : {
72 741 : auto num_subdomains = _mesh.nSubdomains();
73 741 : auto num_elem_types = kokkosMesh().getElementTypeMap().size();
74 :
75 741 : _q_points.create(num_subdomains, num_elem_types);
76 741 : _q_points_face.create(num_subdomains, num_elem_types);
77 741 : _weights.create(num_subdomains, num_elem_types);
78 741 : _weights_face.create(num_subdomains, num_elem_types);
79 :
80 : // Find boundaries where material properties should be computed
81 :
82 178 : auto & boundary_objects =
83 563 : _problem.getKokkosMaterialPropertyStorageConsumers(Moose::BOUNDARY_MATERIAL_DATA);
84 :
85 967 : for (auto object : boundary_objects)
86 : {
87 226 : auto boundary_restriction = dynamic_cast<const BoundaryRestrictable *>(object);
88 :
89 226 : if (boundary_restriction)
90 452 : for (auto boundary : boundary_restriction->boundaryIDs())
91 226 : _material_boundaries.insert(boundary);
92 : else
93 0 : mooseError("Kokkos assembly error: ", object->name(), " is not boundary-restricted.");
94 : }
95 :
96 : // Cache quadrature data
97 :
98 741 : std::map<SubdomainID, std::map<ElemType, unsigned int>> n_qps;
99 741 : std::map<SubdomainID, std::map<ElemType, std::vector<unsigned int>>> n_qps_face;
100 :
101 741 : _max_qps_per_elem = 0;
102 :
103 1532 : for (auto subdomain : _mesh.meshSubdomains())
104 : {
105 791 : auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
106 :
107 791 : auto & assembly = _problem.assembly(0, 0);
108 791 : auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
109 791 : auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
110 :
111 1580 : for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
112 : {
113 789 : auto elem = &libMesh::ReferenceElem::get(elem_type);
114 :
115 789 : _q_points_face(sid, elem_type_id).create(elem->n_sides());
116 789 : _weights_face(sid, elem_type_id).create(elem->n_sides());
117 :
118 : // Cache volume quadrature of each reference element
119 :
120 789 : qrule->init(*elem, /* p-level */ 0);
121 789 : n_qps[subdomain][elem_type] = qrule->n_points();
122 :
123 789 : _q_points(sid, elem_type_id).create(qrule->n_points());
124 789 : _weights(sid, elem_type_id).create(qrule->n_points());
125 :
126 4099 : for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
127 : {
128 3310 : _q_points(sid, elem_type_id)[qp] = qrule->qp(qp);
129 3310 : _weights(sid, elem_type_id)[qp] = qrule->w(qp);
130 : }
131 :
132 : // Cache face quadrature of each reference element
133 :
134 3717 : for (unsigned int side = 0; side < elem->n_sides(); ++side)
135 : {
136 2928 : qrule_face->init(*elem->side_ptr(side), /* p-level */ 0);
137 2928 : n_qps_face[subdomain][elem_type].push_back(qrule_face->n_points());
138 :
139 2928 : _q_points_face(sid, elem_type_id)[side].create(qrule_face->n_points());
140 2928 : _weights_face(sid, elem_type_id)[side].create(qrule_face->n_points());
141 :
142 9164 : for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
143 : {
144 6236 : _q_points_face(sid, elem_type_id)[side][qp] = qrule_face->qp(qp);
145 6236 : _weights_face(sid, elem_type_id)[side][qp] = qrule_face->w(qp);
146 : }
147 : }
148 :
149 789 : _max_qps_per_elem = std::max(_max_qps_per_elem, n_qps[subdomain][elem_type]);
150 : }
151 : }
152 :
153 741 : auto num_elems = _mesh.nActiveLocalElem();
154 :
155 741 : _n_subdomain_qps.create(num_subdomains);
156 741 : _n_subdomain_qps_face.create(num_subdomains);
157 741 : _n_subdomain_qps = 0;
158 741 : _n_subdomain_qps_face = 0;
159 :
160 741 : _n_qps.create(num_elems);
161 741 : _n_qps_face.create(_mesh.getMaxSidesPerElem(), num_elems);
162 741 : _n_qps_face = 0;
163 :
164 741 : _qp_offset.create(num_elems);
165 741 : _qp_offset_face.create(_mesh.getMaxSidesPerElem(), num_elems);
166 741 : _qp_offset_face = libMesh::DofObject::invalid_id;
167 :
168 36820 : for (auto elem : *_mesh.getActiveLocalElementRange())
169 : {
170 36079 : auto eid = kokkosMesh().getContiguousElementID(elem);
171 36079 : auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
172 :
173 36079 : _n_qps[eid] = n_qps[elem->subdomain_id()][elem->type()];
174 36079 : _qp_offset[eid] = _n_subdomain_qps[sid];
175 36079 : _n_subdomain_qps[sid] += _n_qps[eid];
176 :
177 176095 : for (unsigned int side = 0; side < elem->n_sides(); ++side)
178 140016 : _n_qps_face(side, eid) = n_qps_face[elem->subdomain_id()][elem->type()][side];
179 : }
180 :
181 942 : for (auto boundary : _material_boundaries)
182 1520 : for (auto elem_id : _mesh.getBoundaryActiveSemiLocalElemIds(boundary))
183 : {
184 1319 : auto elem = _mesh.elemPtr(elem_id);
185 :
186 1319 : if (elem->processor_id() == _problem.processor_id())
187 : {
188 1195 : auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
189 1195 : auto eid = kokkosMesh().getContiguousElementID(elem);
190 1195 : auto side = _mesh.sideWithBoundaryID(elem, boundary);
191 :
192 1195 : _qp_offset_face(side, eid) = _n_subdomain_qps_face[sid];
193 1195 : _n_subdomain_qps_face[sid] += _n_qps_face(side, eid);
194 : }
195 201 : }
196 :
197 741 : _q_points.copyToDeviceNested();
198 741 : _q_points_face.copyToDeviceNested();
199 741 : _weights.copyToDeviceNested();
200 741 : _weights_face.copyToDeviceNested();
201 :
202 741 : _n_qps.copyToDevice();
203 741 : _n_qps_face.copyToDevice();
204 741 : _n_subdomain_qps.copyToDevice();
205 741 : _n_subdomain_qps_face.copyToDevice();
206 741 : _qp_offset.copyToDevice();
207 741 : _qp_offset_face.copyToDevice();
208 741 : }
209 :
210 : void
211 741 : Assembly::initShape()
212 : {
213 : // Generate the list of unique FE types
214 :
215 741 : std::set<FEType> fe_types;
216 :
217 1482 : auto getFETypes = [&](::System & system)
218 : {
219 2906 : for (unsigned int var = 0; var < system.n_vars(); var++)
220 1424 : fe_types.insert(system.variable_type(var));
221 2223 : };
222 :
223 1482 : for (unsigned int nl = 0; nl < _problem.numNonlinearSystems(); ++nl)
224 741 : getFETypes(_problem.getNonlinearSystemBase(nl).system());
225 :
226 741 : getFETypes(_problem.getAuxiliarySystem().system());
227 :
228 741 : _fe_type_map.clear();
229 :
230 1613 : for (auto & fet : fe_types)
231 872 : _fe_type_map[fet] = _fe_type_map.size();
232 :
233 : // Cache reference shape data
234 :
235 741 : auto num_subdomains = _mesh.meshSubdomains().size();
236 741 : auto num_elem_types = kokkosMesh().getElementTypeMap().size();
237 :
238 741 : _phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
239 741 : _phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
240 741 : _grad_phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
241 741 : _grad_phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
242 :
243 741 : _map_phi.create(num_subdomains, num_elem_types);
244 741 : _map_phi_face.create(num_subdomains, num_elem_types);
245 741 : _map_psi_face.create(num_subdomains, num_elem_types);
246 741 : _map_grad_phi.create(num_subdomains, num_elem_types);
247 741 : _map_grad_phi_face.create(num_subdomains, num_elem_types);
248 741 : _map_grad_psi_face.create(num_subdomains, num_elem_types);
249 :
250 741 : _n_dofs.create(num_elem_types, _fe_type_map.size());
251 741 : _n_dofs = 0;
252 :
253 1532 : for (auto subdomain : _mesh.meshSubdomains())
254 : {
255 791 : auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
256 :
257 791 : auto & assembly = _problem.assembly(0, 0);
258 791 : auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
259 791 : auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
260 :
261 1713 : for (auto & [fe_type, fe_type_id] : _fe_type_map)
262 : {
263 922 : std::unique_ptr<FEBase> fe(FEBase::build(_dimension, fe_type));
264 922 : std::unique_ptr<FEBase> fe_face(FEBase::build(_dimension, fe_type));
265 :
266 922 : fe->attach_quadrature_rule(qrule);
267 922 : fe_face->attach_quadrature_rule(qrule_face);
268 :
269 1842 : for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
270 : {
271 920 : auto elem = &libMesh::ReferenceElem::get(elem_type);
272 :
273 920 : auto & phi = fe->get_phi();
274 920 : auto & grad_phi = fe->get_dphi();
275 :
276 920 : fe->reinit(elem);
277 :
278 920 : _n_dofs(elem_type_id, fe_type_id) = phi.size();
279 :
280 920 : _phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
281 920 : _grad_phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
282 :
283 4397 : for (unsigned int i = 0; i < phi.size(); ++i)
284 21319 : for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
285 : {
286 17842 : _phi(sid, elem_type_id, fe_type_id)(i, qp) = phi[i][qp];
287 17842 : _grad_phi(sid, elem_type_id, fe_type_id)(i, qp) = grad_phi[i][qp];
288 : }
289 :
290 920 : _phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
291 920 : _grad_phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
292 :
293 4402 : for (unsigned int side = 0; side < elem->n_sides(); ++side)
294 : {
295 3482 : auto & phi = fe_face->get_phi();
296 3482 : auto & grad_phi = fe_face->get_dphi();
297 :
298 3482 : fe_face->reinit(elem, side);
299 :
300 3482 : _phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(), qrule_face->n_points());
301 6128 : _grad_phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(),
302 2646 : qrule_face->n_points());
303 :
304 17396 : for (unsigned int i = 0; i < phi.size(); ++i)
305 47612 : for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
306 : {
307 33698 : _phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = phi[i][qp];
308 33698 : _grad_phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = grad_phi[i][qp];
309 : }
310 : }
311 : }
312 922 : }
313 :
314 1580 : for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
315 : {
316 789 : auto elem = &libMesh::ReferenceElem::get(elem_type);
317 :
318 190 : std::unique_ptr<FEBase> fe(
319 599 : FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
320 190 : std::unique_ptr<FEBase> fe_face(
321 599 : FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
322 :
323 789 : fe->attach_quadrature_rule(qrule);
324 789 : fe_face->attach_quadrature_rule(qrule_face);
325 :
326 789 : auto & phi = fe->get_phi();
327 789 : auto & grad_phi = fe->get_dphi();
328 :
329 789 : fe->reinit(elem);
330 :
331 789 : _map_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
332 789 : _map_grad_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
333 :
334 3849 : for (unsigned int i = 0; i < phi.size(); ++i)
335 17392 : for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
336 : {
337 14332 : _map_phi(sid, elem_type_id)(i, qp) = phi[i][qp];
338 14332 : _map_grad_phi(sid, elem_type_id)(i, qp) = grad_phi[i][qp];
339 : }
340 :
341 789 : _map_phi_face(sid, elem_type_id).create(elem->n_sides());
342 789 : _map_grad_phi_face(sid, elem_type_id).create(elem->n_sides());
343 789 : _map_psi_face(sid, elem_type_id).create(elem->n_sides());
344 789 : _map_grad_psi_face(sid, elem_type_id).create(elem->n_sides());
345 :
346 3717 : for (unsigned int side = 0; side < elem->n_sides(); ++side)
347 : {
348 2928 : auto & phi = fe_face->get_phi();
349 2928 : auto & grad_phi = fe_face->get_dphi();
350 :
351 2928 : fe_face->reinit(elem, side);
352 :
353 2928 : _map_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
354 2928 : _map_grad_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
355 :
356 15144 : for (unsigned int i = 0; i < phi.size(); ++i)
357 40736 : for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
358 : {
359 28520 : _map_phi_face(sid, elem_type_id)(side)(i, qp) = phi[i][qp];
360 28520 : _map_grad_phi_face(sid, elem_type_id)(side)(i, qp) = grad_phi[i][qp];
361 : }
362 :
363 2928 : auto & psi = fe_face->get_fe_map().get_psi();
364 2928 : auto & dpsidxi = fe_face->get_fe_map().get_dpsidxi();
365 2928 : auto & dpsideta = fe_face->get_fe_map().get_dpsideta();
366 :
367 2928 : _map_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
368 2928 : _map_grad_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
369 :
370 8964 : for (unsigned int i = 0; i < psi.size(); ++i)
371 20080 : for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
372 : {
373 14044 : _map_psi_face(sid, elem_type_id)(side)(i, qp) = psi[i][qp];
374 14044 : if (elem->dim() > 1)
375 13744 : _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(0) = dpsidxi[i][qp];
376 14044 : if (elem->dim() > 2)
377 3456 : _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(1) = dpsideta[i][qp];
378 : }
379 : }
380 789 : }
381 : }
382 :
383 741 : _phi.copyToDeviceNested();
384 741 : _phi_face.copyToDeviceNested();
385 741 : _grad_phi.copyToDeviceNested();
386 741 : _grad_phi_face.copyToDeviceNested();
387 :
388 741 : _map_phi.copyToDeviceNested();
389 741 : _map_phi_face.copyToDeviceNested();
390 741 : _map_psi_face.copyToDeviceNested();
391 741 : _map_grad_phi.copyToDeviceNested();
392 741 : _map_grad_phi_face.copyToDeviceNested();
393 741 : _map_grad_psi_face.copyToDeviceNested();
394 :
395 741 : _n_dofs.copyToDevice();
396 741 : }
397 :
398 : void
399 741 : Assembly::cachePhysicalMap()
400 : {
401 741 : auto num_subdomains = _mesh.nSubdomains();
402 741 : auto num_elems = _mesh.nActiveLocalElem();
403 :
404 741 : _jacobian.create(num_subdomains);
405 741 : _jxw.create(num_subdomains);
406 741 : _xyz.create(num_subdomains);
407 :
408 1532 : for (auto subdomain : _mesh.meshSubdomains())
409 : {
410 791 : auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
411 :
412 791 : _jacobian[sid].createDevice(_n_subdomain_qps[sid]);
413 791 : _jxw[sid].createDevice(_n_subdomain_qps[sid]);
414 791 : _xyz[sid].createDevice(_n_subdomain_qps[sid]);
415 : }
416 :
417 741 : _jacobian.copyToDeviceNested();
418 741 : _jxw.copyToDeviceNested();
419 741 : _xyz.copyToDeviceNested();
420 :
421 741 : ::Kokkos::RangePolicy<ExecSpace, ::Kokkos::IndexType<ThreadID>> policy(0, num_elems);
422 741 : ::Kokkos::parallel_for(policy, *this);
423 741 : ::Kokkos::fence();
424 741 : }
425 :
426 : KOKKOS_FUNCTION void
427 25802 : Assembly::operator()(const ThreadID tid) const
428 : {
429 25802 : auto info = kokkosMesh().getElementInfo(tid);
430 25802 : auto offset = getQpOffset(info);
431 :
432 25802 : auto jacobian = &_jacobian[info.subdomain][offset];
433 25802 : auto jxw = &_jxw[info.subdomain][offset];
434 25802 : auto xyz = &_xyz[info.subdomain][offset];
435 :
436 148018 : for (unsigned int qp = 0; qp < getNumQps(info); ++qp)
437 122216 : computePhysicalMap(info, qp, &jacobian[qp], &jxw[qp], &xyz[qp]);
438 25802 : }
439 :
440 : } // namespace Kokkos
441 : } // namespace Moose
|