Line data Source code
1 : //* This file is part of the MOOSE framework
2 : //* https://www.mooseframework.org
3 : //*
4 : //* All rights reserved, see COPYRIGHT for full restrictions
5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 : //*
7 : //* Licensed under LGPL 2.1, please see LICENSE for details
8 : //* https://www.gnu.org/licenses/lgpl-2.1.html
9 :
10 : #include "KokkosAssembly.h"
11 :
12 : #include "MooseMesh.h"
13 : #include "FEProblemBase.h"
14 : #include "NonlinearSystemBase.h"
15 : #include "AuxiliarySystem.h"
16 : #include "Assembly.h"
17 : #include "BoundaryRestrictable.h"
18 :
19 : #include "libmesh/fe_interface.h"
20 : #include "libmesh/reference_elem.h"
21 :
22 : namespace Moose
23 : {
24 : namespace Kokkos
25 : {
26 :
27 42670 : Assembly::Assembly(FEProblemBase & problem)
28 42360 : : MeshHolder(*problem.mesh().getKokkosMesh()),
29 42360 : _problem(problem),
30 42360 : _mesh(problem.mesh()),
31 127080 : _dimension(_mesh.dimension())
32 : {
33 42670 : }
34 :
35 : void
36 693 : Assembly::init()
37 : {
38 : // Cache mesh information
39 :
40 693 : auto num_subdomains = _mesh.nSubdomains();
41 :
42 693 : _coord_type.create(num_subdomains);
43 :
44 1436 : for (auto subdomain : _mesh.meshSubdomains())
45 743 : _coord_type[kokkosMesh().getContiguousSubdomainID(subdomain)] = _mesh.getCoordSystem(subdomain);
46 :
47 693 : _coord_type.copyToDevice();
48 :
49 693 : if (_mesh.usingGeneralAxisymmetricCoordAxes())
50 : {
51 0 : _rz_axis.create(num_subdomains);
52 :
53 0 : for (auto subdomain : _mesh.meshSubdomains())
54 0 : _rz_axis[kokkosMesh().getContiguousSubdomainID(subdomain)] =
55 0 : _mesh.getGeneralAxisymmetricCoordAxis(subdomain);
56 :
57 0 : _rz_axis.copyToDevice();
58 : }
59 : else
60 693 : _rz_radial_coord = _mesh.getAxisymmetricRadialCoord();
61 :
62 : // Initialize quadrature and shape data
63 :
64 693 : initQuadrature();
65 693 : initShape();
66 693 : cachePhysicalMap();
67 693 : }
68 :
69 : void
70 693 : Assembly::initQuadrature()
71 : {
72 693 : auto num_subdomains = _mesh.nSubdomains();
73 693 : auto num_elem_types = kokkosMesh().getElementTypeMap().size();
74 :
75 693 : _q_points.create(num_subdomains, num_elem_types);
76 693 : _q_points_face.create(num_subdomains, num_elem_types);
77 693 : _weights.create(num_subdomains, num_elem_types);
78 693 : _weights_face.create(num_subdomains, num_elem_types);
79 :
80 : // Find boundaries where material properties should be computed
81 :
82 166 : auto & boundary_objects =
83 527 : _problem.getKokkosMaterialPropertyStorageConsumers(Moose::BOUNDARY_MATERIAL_DATA);
84 :
85 907 : for (auto object : boundary_objects)
86 : {
87 214 : auto boundary_restriction = dynamic_cast<const BoundaryRestrictable *>(object);
88 :
89 214 : if (boundary_restriction)
90 428 : for (auto boundary : boundary_restriction->boundaryIDs())
91 214 : _material_boundaries.insert(boundary);
92 : else
93 0 : mooseError("Kokkos assembly error: ", object->name(), " is not boundary-restricted.");
94 : }
95 :
96 : // Cache quadrature data
97 :
98 693 : std::map<SubdomainID, std::map<ElemType, unsigned int>> n_qps;
99 693 : std::map<SubdomainID, std::map<ElemType, std::vector<unsigned int>>> n_qps_face;
100 :
101 693 : _max_qps_per_elem = 0;
102 :
103 1436 : for (auto subdomain : _mesh.meshSubdomains())
104 : {
105 743 : auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
106 :
107 743 : auto & assembly = _problem.assembly(0, 0);
108 743 : auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
109 743 : auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
110 :
111 1486 : for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
112 : {
113 743 : auto elem = &libMesh::ReferenceElem::get(elem_type);
114 :
115 743 : _q_points_face(sid, elem_type_id).create(elem->n_sides());
116 743 : _weights_face(sid, elem_type_id).create(elem->n_sides());
117 :
118 : // Cache volume quadrature of each reference element
119 :
120 743 : qrule->init(*elem, /* p-level */ 0);
121 743 : n_qps[subdomain][elem_type] = qrule->n_points();
122 :
123 743 : _q_points(sid, elem_type_id).create(qrule->n_points());
124 743 : _weights(sid, elem_type_id).create(qrule->n_points());
125 :
126 3559 : for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
127 : {
128 2816 : _q_points(sid, elem_type_id)[qp] = qrule->qp(qp);
129 2816 : _weights(sid, elem_type_id)[qp] = qrule->w(qp);
130 : }
131 :
132 : // Cache face quadrature of each reference element
133 :
134 3487 : for (unsigned int side = 0; side < elem->n_sides(); ++side)
135 : {
136 2744 : qrule_face->init(*elem->side_ptr(side), /* p-level */ 0);
137 2744 : n_qps_face[subdomain][elem_type].push_back(qrule_face->n_points());
138 :
139 2744 : _q_points_face(sid, elem_type_id)[side].create(qrule_face->n_points());
140 2744 : _weights_face(sid, elem_type_id)[side].create(qrule_face->n_points());
141 :
142 8364 : for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
143 : {
144 5620 : _q_points_face(sid, elem_type_id)[side][qp] = qrule_face->qp(qp);
145 5620 : _weights_face(sid, elem_type_id)[side][qp] = qrule_face->w(qp);
146 : }
147 : }
148 :
149 743 : _max_qps_per_elem = std::max(_max_qps_per_elem, n_qps[subdomain][elem_type]);
150 : }
151 : }
152 :
153 693 : auto num_elems = _mesh.nActiveLocalElem();
154 :
155 693 : _n_subdomain_qps.create(num_subdomains);
156 693 : _n_subdomain_qps_face.create(num_subdomains);
157 693 : _n_subdomain_qps = 0;
158 693 : _n_subdomain_qps_face = 0;
159 :
160 693 : _n_qps.create(num_elems);
161 693 : _n_qps_face.create(_mesh.getMaxSidesPerElem(), num_elems);
162 693 : _n_qps_face = 0;
163 :
164 693 : _qp_offset.create(num_elems);
165 693 : _qp_offset_face.create(_mesh.getMaxSidesPerElem(), num_elems);
166 693 : _qp_offset_face = libMesh::DofObject::invalid_id;
167 :
168 33762 : for (auto elem : *_mesh.getActiveLocalElementRange())
169 : {
170 33069 : auto eid = kokkosMesh().getContiguousElementID(elem);
171 33069 : auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
172 :
173 33069 : _n_qps[eid] = n_qps[elem->subdomain_id()][elem->type()];
174 33069 : _qp_offset[eid] = _n_subdomain_qps[sid];
175 33069 : _n_subdomain_qps[sid] += _n_qps[eid];
176 :
177 161045 : for (unsigned int side = 0; side < elem->n_sides(); ++side)
178 127976 : _n_qps_face(side, eid) = n_qps_face[elem->subdomain_id()][elem->type()][side];
179 : }
180 :
181 894 : for (auto boundary : _material_boundaries)
182 1520 : for (auto elem_id : _mesh.getBoundaryActiveSemiLocalElemIds(boundary))
183 : {
184 1319 : auto elem = _mesh.elemPtr(elem_id);
185 :
186 1319 : if (elem->processor_id() == _problem.processor_id())
187 : {
188 1195 : auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
189 1195 : auto eid = kokkosMesh().getContiguousElementID(elem);
190 1195 : auto side = _mesh.sideWithBoundaryID(elem, boundary);
191 :
192 1195 : _qp_offset_face(side, eid) = _n_subdomain_qps_face[sid];
193 1195 : _n_subdomain_qps_face[sid] += _n_qps_face(side, eid);
194 : }
195 201 : }
196 :
197 693 : _q_points.copyToDeviceNested();
198 693 : _q_points_face.copyToDeviceNested();
199 693 : _weights.copyToDeviceNested();
200 693 : _weights_face.copyToDeviceNested();
201 :
202 693 : _n_qps.copyToDevice();
203 693 : _n_qps_face.copyToDevice();
204 693 : _n_subdomain_qps.copyToDevice();
205 693 : _n_subdomain_qps_face.copyToDevice();
206 693 : _qp_offset.copyToDevice();
207 693 : _qp_offset_face.copyToDevice();
208 693 : }
209 :
210 : void
211 693 : Assembly::initShape()
212 : {
213 : // Generate the list of unique FE types
214 :
215 693 : std::set<FEType> fe_types;
216 :
217 1386 : auto getFETypes = [&](::System & system)
218 : {
219 2568 : for (unsigned int var = 0; var < system.n_vars(); var++)
220 1182 : fe_types.insert(system.variable_type(var));
221 2079 : };
222 :
223 1386 : for (unsigned int nl = 0; nl < _problem.numNonlinearSystems(); ++nl)
224 693 : getFETypes(_problem.getNonlinearSystemBase(nl).system());
225 :
226 693 : getFETypes(_problem.getAuxiliarySystem().system());
227 :
228 693 : _fe_type_map.clear();
229 :
230 1407 : for (auto & fet : fe_types)
231 714 : _fe_type_map[fet] = _fe_type_map.size();
232 :
233 : // Cache reference shape data
234 :
235 693 : auto num_subdomains = _mesh.meshSubdomains().size();
236 693 : auto num_elem_types = kokkosMesh().getElementTypeMap().size();
237 :
238 693 : _phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
239 693 : _phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
240 693 : _grad_phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
241 693 : _grad_phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
242 :
243 693 : _map_phi.create(num_subdomains, num_elem_types);
244 693 : _map_phi_face.create(num_subdomains, num_elem_types);
245 693 : _map_psi_face.create(num_subdomains, num_elem_types);
246 693 : _map_grad_phi.create(num_subdomains, num_elem_types);
247 693 : _map_grad_phi_face.create(num_subdomains, num_elem_types);
248 693 : _map_grad_psi_face.create(num_subdomains, num_elem_types);
249 :
250 693 : _n_dofs.create(num_elem_types, _fe_type_map.size());
251 693 : _n_dofs = 0;
252 :
253 1436 : for (auto subdomain : _mesh.meshSubdomains())
254 : {
255 743 : auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
256 :
257 743 : auto & assembly = _problem.assembly(0, 0);
258 743 : auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
259 743 : auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
260 :
261 1507 : for (auto & [fe_type, fe_type_id] : _fe_type_map)
262 : {
263 764 : std::unique_ptr<FEBase> fe(FEBase::build(_dimension, fe_type));
264 764 : std::unique_ptr<FEBase> fe_face(FEBase::build(_dimension, fe_type));
265 :
266 764 : fe->attach_quadrature_rule(qrule);
267 764 : fe_face->attach_quadrature_rule(qrule_face);
268 :
269 1528 : for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
270 : {
271 764 : auto elem = &libMesh::ReferenceElem::get(elem_type);
272 :
273 764 : auto & phi = fe->get_phi();
274 764 : auto & grad_phi = fe->get_dphi();
275 :
276 764 : fe->reinit(elem);
277 :
278 764 : _n_dofs(elem_type_id, fe_type_id) = phi.size();
279 :
280 764 : _phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
281 764 : _grad_phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
282 :
283 3601 : for (unsigned int i = 0; i < phi.size(); ++i)
284 14695 : for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
285 : {
286 11858 : _phi(sid, elem_type_id, fe_type_id)(i, qp) = phi[i][qp];
287 11858 : _grad_phi(sid, elem_type_id, fe_type_id)(i, qp) = grad_phi[i][qp];
288 : }
289 :
290 764 : _phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
291 764 : _grad_phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
292 :
293 3550 : for (unsigned int side = 0; side < elem->n_sides(); ++side)
294 : {
295 2786 : auto & phi = fe_face->get_phi();
296 2786 : auto & grad_phi = fe_face->get_dphi();
297 :
298 2786 : fe_face->reinit(elem, side);
299 :
300 2786 : _phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(), qrule_face->n_points());
301 4902 : _grad_phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(),
302 2116 : qrule_face->n_points());
303 :
304 14068 : for (unsigned int i = 0; i < phi.size(); ++i)
305 36660 : for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
306 : {
307 25378 : _phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = phi[i][qp];
308 25378 : _grad_phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = grad_phi[i][qp];
309 : }
310 : }
311 : }
312 764 : }
313 :
314 1486 : for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
315 : {
316 743 : auto elem = &libMesh::ReferenceElem::get(elem_type);
317 :
318 178 : std::unique_ptr<FEBase> fe(
319 565 : FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
320 178 : std::unique_ptr<FEBase> fe_face(
321 565 : FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
322 :
323 743 : fe->attach_quadrature_rule(qrule);
324 743 : fe_face->attach_quadrature_rule(qrule_face);
325 :
326 743 : auto & phi = fe->get_phi();
327 743 : auto & grad_phi = fe->get_dphi();
328 :
329 743 : fe->reinit(elem);
330 :
331 743 : _map_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
332 743 : _map_grad_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
333 :
334 3559 : for (unsigned int i = 0; i < phi.size(); ++i)
335 14632 : for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
336 : {
337 11816 : _map_phi(sid, elem_type_id)(i, qp) = phi[i][qp];
338 11816 : _map_grad_phi(sid, elem_type_id)(i, qp) = grad_phi[i][qp];
339 : }
340 :
341 743 : _map_phi_face(sid, elem_type_id).create(elem->n_sides());
342 743 : _map_grad_phi_face(sid, elem_type_id).create(elem->n_sides());
343 743 : _map_psi_face(sid, elem_type_id).create(elem->n_sides());
344 743 : _map_grad_psi_face(sid, elem_type_id).create(elem->n_sides());
345 :
346 3487 : for (unsigned int side = 0; side < elem->n_sides(); ++side)
347 : {
348 2744 : auto & phi = fe_face->get_phi();
349 2744 : auto & grad_phi = fe_face->get_dphi();
350 :
351 2744 : fe_face->reinit(elem, side);
352 :
353 2744 : _map_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
354 2744 : _map_grad_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
355 :
356 13984 : for (unsigned int i = 0; i < phi.size(); ++i)
357 36576 : for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
358 : {
359 25336 : _map_phi_face(sid, elem_type_id)(side)(i, qp) = phi[i][qp];
360 25336 : _map_grad_phi_face(sid, elem_type_id)(side)(i, qp) = grad_phi[i][qp];
361 : }
362 :
363 2744 : auto & psi = fe_face->get_fe_map().get_psi();
364 2744 : auto & dpsidxi = fe_face->get_fe_map().get_dpsidxi();
365 2744 : auto & dpsideta = fe_face->get_fe_map().get_dpsideta();
366 :
367 2744 : _map_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
368 2744 : _map_grad_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
369 :
370 8364 : for (unsigned int i = 0; i < psi.size(); ++i)
371 18288 : for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
372 : {
373 12668 : _map_psi_face(sid, elem_type_id)(side)(i, qp) = psi[i][qp];
374 12668 : if (elem->dim() > 1)
375 12368 : _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(0) = dpsidxi[i][qp];
376 12668 : if (elem->dim() > 2)
377 3456 : _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(1) = dpsideta[i][qp];
378 : }
379 : }
380 743 : }
381 : }
382 :
383 693 : _phi.copyToDeviceNested();
384 693 : _phi_face.copyToDeviceNested();
385 693 : _grad_phi.copyToDeviceNested();
386 693 : _grad_phi_face.copyToDeviceNested();
387 :
388 693 : _map_phi.copyToDeviceNested();
389 693 : _map_phi_face.copyToDeviceNested();
390 693 : _map_psi_face.copyToDeviceNested();
391 693 : _map_grad_phi.copyToDeviceNested();
392 693 : _map_grad_phi_face.copyToDeviceNested();
393 693 : _map_grad_psi_face.copyToDeviceNested();
394 :
395 693 : _n_dofs.copyToDevice();
396 693 : }
397 :
398 : void
399 693 : Assembly::cachePhysicalMap()
400 : {
401 693 : auto num_subdomains = _mesh.nSubdomains();
402 693 : auto num_elems = _mesh.nActiveLocalElem();
403 :
404 693 : _jacobian.create(num_subdomains);
405 693 : _jxw.create(num_subdomains);
406 693 : _xyz.create(num_subdomains);
407 :
408 1436 : for (auto subdomain : _mesh.meshSubdomains())
409 : {
410 743 : auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
411 :
412 743 : _jacobian[sid].createDevice(_n_subdomain_qps[sid]);
413 743 : _jxw[sid].createDevice(_n_subdomain_qps[sid]);
414 743 : _xyz[sid].createDevice(_n_subdomain_qps[sid]);
415 : }
416 :
417 693 : _jacobian.copyToDeviceNested();
418 693 : _jxw.copyToDeviceNested();
419 693 : _xyz.copyToDeviceNested();
420 :
421 693 : ::Kokkos::RangePolicy<ExecSpace, ::Kokkos::IndexType<ThreadID>> policy(0, num_elems);
422 693 : ::Kokkos::parallel_for(policy, *this);
423 693 : ::Kokkos::fence();
424 693 : }
425 :
426 : KOKKOS_FUNCTION void
427 23695 : Assembly::operator()(const ThreadID tid) const
428 : {
429 23695 : auto info = kokkosMesh().getElementInfo(tid);
430 23695 : auto offset = getQpOffset(info);
431 :
432 23695 : auto jacobian = &_jacobian[info.subdomain][offset];
433 23695 : auto jxw = &_jxw[info.subdomain][offset];
434 23695 : auto xyz = &_xyz[info.subdomain][offset];
435 :
436 117983 : for (unsigned int qp = 0; qp < getNumQps(info); ++qp)
437 94288 : computePhysicalMap(info, qp, &jacobian[qp], &jxw[qp], &xyz[qp]);
438 23695 : }
439 :
440 : } // namespace Kokkos
441 : } // namespace Moose
|