LCOV - code coverage report
Current view: top level - src/kokkos/base - KokkosAssembly.K (source / functions) Hit Total Coverage
Test: idaholab/moose framework: #31653 (1b668c) with base bb0a08 Lines: 245 251 97.6 %
Date: 2025-11-03 17:02:13 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://www.mooseframework.org
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : #include "KokkosAssembly.h"
      11             : 
      12             : #include "MooseMesh.h"
      13             : #include "FEProblemBase.h"
      14             : #include "NonlinearSystemBase.h"
      15             : #include "AuxiliarySystem.h"
      16             : #include "Assembly.h"
      17             : #include "BoundaryRestrictable.h"
      18             : 
      19             : #include "libmesh/fe_interface.h"
      20             : #include "libmesh/reference_elem.h"
      21             : 
      22             : namespace Moose
      23             : {
      24             : namespace Kokkos
      25             : {
      26             : 
      27       43206 : Assembly::Assembly(FEProblemBase & problem)
      28       42713 :   : MeshHolder(*problem.mesh().getKokkosMesh()),
      29       42713 :     _problem(problem),
      30       42713 :     _mesh(problem.mesh()),
      31      128139 :     _dimension(_mesh.dimension())
      32             : {
      33       43206 : }
      34             : 
      35             : void
      36         807 : Assembly::init()
      37             : {
      38             :   // Cache mesh information
      39             : 
      40         807 :   auto num_subdomains = _mesh.nSubdomains();
      41             : 
      42         807 :   _coord_type.create(num_subdomains);
      43             : 
      44        1664 :   for (auto subdomain : _mesh.meshSubdomains())
      45         857 :     _coord_type[kokkosMesh().getContiguousSubdomainID(subdomain)] = _mesh.getCoordSystem(subdomain);
      46             : 
      47         807 :   _coord_type.copyToDevice();
      48             : 
      49         807 :   if (_mesh.usingGeneralAxisymmetricCoordAxes())
      50             :   {
      51           0 :     _rz_axis.create(num_subdomains);
      52             : 
      53           0 :     for (auto subdomain : _mesh.meshSubdomains())
      54           0 :       _rz_axis[kokkosMesh().getContiguousSubdomainID(subdomain)] =
      55           0 :           _mesh.getGeneralAxisymmetricCoordAxis(subdomain);
      56             : 
      57           0 :     _rz_axis.copyToDevice();
      58             :   }
      59             :   else
      60         807 :     _rz_radial_coord = _mesh.getAxisymmetricRadialCoord();
      61             : 
      62             :   // Initialize quadrature and shape data
      63             : 
      64         807 :   initQuadrature();
      65         807 :   initShape();
      66         807 :   cachePhysicalMap();
      67         807 : }
      68             : 
      69             : void
      70         807 : Assembly::initQuadrature()
      71             : {
      72         807 :   auto num_subdomains = _mesh.nSubdomains();
      73         807 :   auto num_elem_types = kokkosMesh().getElementTypeMap().size();
      74             : 
      75         807 :   _q_points.create(num_subdomains, num_elem_types);
      76         807 :   _q_points_face.create(num_subdomains, num_elem_types);
      77         807 :   _weights.create(num_subdomains, num_elem_types);
      78         807 :   _weights_face.create(num_subdomains, num_elem_types);
      79             : 
      80             :   // Find boundaries where material properties should be computed
      81             : 
      82         181 :   auto & boundary_objects =
      83         626 :       _problem.getKokkosMaterialPropertyStorageConsumers(Moose::BOUNDARY_MATERIAL_DATA);
      84             : 
      85        1099 :   for (auto object : boundary_objects)
      86             :   {
      87         292 :     auto boundary_restriction = dynamic_cast<const BoundaryRestrictable *>(object);
      88             : 
      89         292 :     if (boundary_restriction)
      90         584 :       for (auto boundary : boundary_restriction->boundaryIDs())
      91         292 :         _material_boundaries.insert(boundary);
      92             :     else
      93           0 :       mooseError("Kokkos assembly error: ", object->name(), " is not boundary-restricted.");
      94             :   }
      95             : 
      96             :   // Cache quadrature data
      97             : 
      98         807 :   std::map<SubdomainID, std::map<ElemType, unsigned int>> n_qps;
      99         807 :   std::map<SubdomainID, std::map<ElemType, std::vector<unsigned int>>> n_qps_face;
     100             : 
     101         807 :   _max_qps_per_elem = 0;
     102             : 
     103        1664 :   for (auto subdomain : _mesh.meshSubdomains())
     104             :   {
     105         857 :     auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
     106             : 
     107         857 :     auto & assembly = _problem.assembly(0, 0);
     108         857 :     auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
     109         857 :     auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
     110             : 
     111        1712 :     for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
     112             :     {
     113         855 :       auto elem = &libMesh::ReferenceElem::get(elem_type);
     114             : 
     115         855 :       _q_points_face(sid, elem_type_id).create(elem->n_sides());
     116         855 :       _weights_face(sid, elem_type_id).create(elem->n_sides());
     117             : 
     118             :       // Cache volume quadrature of each reference element
     119             : 
     120         855 :       qrule->init(*elem, /* p-level */ 0);
     121         855 :       n_qps[subdomain][elem_type] = qrule->n_points();
     122             : 
     123         855 :       _q_points(sid, elem_type_id).create(qrule->n_points());
     124         855 :       _weights(sid, elem_type_id).create(qrule->n_points());
     125             : 
     126        4429 :       for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
     127             :       {
     128        3574 :         _q_points(sid, elem_type_id)[qp] = qrule->qp(qp);
     129        3574 :         _weights(sid, elem_type_id)[qp] = qrule->w(qp);
     130             :       }
     131             : 
     132             :       // Cache face quadrature of each reference element
     133             : 
     134        4047 :       for (unsigned int side = 0; side < elem->n_sides(); ++side)
     135             :       {
     136        3192 :         qrule_face->init(*elem->side_ptr(side), /* p-level */ 0);
     137        3192 :         n_qps_face[subdomain][elem_type].push_back(qrule_face->n_points());
     138             : 
     139        3192 :         _q_points_face(sid, elem_type_id)[side].create(qrule_face->n_points());
     140        3192 :         _weights_face(sid, elem_type_id)[side].create(qrule_face->n_points());
     141             : 
     142        9956 :         for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     143             :         {
     144        6764 :           _q_points_face(sid, elem_type_id)[side][qp] = qrule_face->qp(qp);
     145        6764 :           _weights_face(sid, elem_type_id)[side][qp] = qrule_face->w(qp);
     146             :         }
     147             :       }
     148             : 
     149         855 :       _max_qps_per_elem = std::max(_max_qps_per_elem, n_qps[subdomain][elem_type]);
     150             :     }
     151             :   }
     152             : 
     153         807 :   auto num_elems = _mesh.nActiveLocalElem();
     154             : 
     155         807 :   _n_subdomain_qps.create(num_subdomains);
     156         807 :   _n_subdomain_qps_face.create(num_subdomains);
     157         807 :   _n_subdomain_qps = 0;
     158         807 :   _n_subdomain_qps_face = 0;
     159             : 
     160         807 :   _n_qps.create(num_elems);
     161         807 :   _n_qps_face.create(_mesh.getMaxSidesPerElem(), num_elems);
     162         807 :   _n_qps_face = 0;
     163             : 
     164         807 :   _qp_offset.create(num_elems);
     165         807 :   _qp_offset_face.create(_mesh.getMaxSidesPerElem(), num_elems);
     166         807 :   _qp_offset_face = libMesh::DofObject::invalid_id;
     167             : 
     168       42086 :   for (auto elem : *_mesh.getActiveLocalElementRange())
     169             :   {
     170       41279 :     auto eid = kokkosMesh().getContiguousElementID(elem);
     171       41279 :     auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
     172             : 
     173       41279 :     _n_qps[eid] = n_qps[elem->subdomain_id()][elem->type()];
     174       41279 :     _qp_offset[eid] = _n_subdomain_qps[sid];
     175       41279 :     _n_subdomain_qps[sid] += _n_qps[eid];
     176             : 
     177      202095 :     for (unsigned int side = 0; side < elem->n_sides(); ++side)
     178      160816 :       _n_qps_face(side, eid) = n_qps_face[elem->subdomain_id()][elem->type()][side];
     179             :   }
     180             : 
     181        1074 :   for (auto boundary : _material_boundaries)
     182        2176 :     for (auto elem_id : _mesh.getBoundaryActiveSemiLocalElemIds(boundary))
     183             :     {
     184        1909 :       auto elem = _mesh.elemPtr(elem_id);
     185             : 
     186        1909 :       if (elem->processor_id() == _problem.processor_id())
     187             :       {
     188        1715 :         auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
     189        1715 :         auto eid = kokkosMesh().getContiguousElementID(elem);
     190        1715 :         auto side = _mesh.sideWithBoundaryID(elem, boundary);
     191             : 
     192        1715 :         _qp_offset_face(side, eid) = _n_subdomain_qps_face[sid];
     193        1715 :         _n_subdomain_qps_face[sid] += _n_qps_face(side, eid);
     194             :       }
     195         267 :     }
     196             : 
     197         807 :   _q_points.copyToDeviceNested();
     198         807 :   _q_points_face.copyToDeviceNested();
     199         807 :   _weights.copyToDeviceNested();
     200         807 :   _weights_face.copyToDeviceNested();
     201             : 
     202         807 :   _n_qps.copyToDevice();
     203         807 :   _n_qps_face.copyToDevice();
     204         807 :   _n_subdomain_qps.copyToDevice();
     205         807 :   _n_subdomain_qps_face.copyToDevice();
     206         807 :   _qp_offset.copyToDevice();
     207         807 :   _qp_offset_face.copyToDevice();
     208         807 : }
     209             : 
     210             : void
     211         807 : Assembly::initShape()
     212             : {
     213             :   // Generate the list of unique FE types
     214             : 
     215         807 :   std::set<FEType> fe_types;
     216             : 
     217        1614 :   auto getFETypes = [&](::System & system)
     218             :   {
     219        3140 :     for (unsigned int var = 0; var < system.n_vars(); var++)
     220        1526 :       fe_types.insert(system.variable_type(var));
     221        2421 :   };
     222             : 
     223        1614 :   for (unsigned int nl = 0; nl < _problem.numNonlinearSystems(); ++nl)
     224         807 :     getFETypes(_problem.getNonlinearSystemBase(nl).system());
     225             : 
     226         807 :   getFETypes(_problem.getAuxiliarySystem().system());
     227             : 
     228         807 :   _fe_type_map.clear();
     229             : 
     230        1745 :   for (auto & fet : fe_types)
     231         938 :     _fe_type_map[fet] = _fe_type_map.size();
     232             : 
     233             :   // Cache reference shape data
     234             : 
     235         807 :   auto num_subdomains = _mesh.meshSubdomains().size();
     236         807 :   auto num_elem_types = kokkosMesh().getElementTypeMap().size();
     237             : 
     238         807 :   _phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
     239         807 :   _phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
     240         807 :   _grad_phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
     241         807 :   _grad_phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
     242             : 
     243         807 :   _map_phi.create(num_subdomains, num_elem_types);
     244         807 :   _map_phi_face.create(num_subdomains, num_elem_types);
     245         807 :   _map_psi_face.create(num_subdomains, num_elem_types);
     246         807 :   _map_grad_phi.create(num_subdomains, num_elem_types);
     247         807 :   _map_grad_phi_face.create(num_subdomains, num_elem_types);
     248         807 :   _map_grad_psi_face.create(num_subdomains, num_elem_types);
     249             : 
     250         807 :   _n_dofs.create(num_elem_types, _fe_type_map.size());
     251         807 :   _n_dofs = 0;
     252             : 
     253        1664 :   for (auto subdomain : _mesh.meshSubdomains())
     254             :   {
     255         857 :     auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
     256             : 
     257         857 :     auto & assembly = _problem.assembly(0, 0);
     258         857 :     auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
     259         857 :     auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
     260             : 
     261        1845 :     for (auto & [fe_type, fe_type_id] : _fe_type_map)
     262             :     {
     263         988 :       std::unique_ptr<FEBase> fe(FEBase::build(_dimension, fe_type));
     264         988 :       std::unique_ptr<FEBase> fe_face(FEBase::build(_dimension, fe_type));
     265             : 
     266         988 :       fe->attach_quadrature_rule(qrule);
     267         988 :       fe_face->attach_quadrature_rule(qrule_face);
     268             : 
     269        1974 :       for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
     270             :       {
     271         986 :         auto elem = &libMesh::ReferenceElem::get(elem_type);
     272             : 
     273         986 :         auto & phi = fe->get_phi();
     274         986 :         auto & grad_phi = fe->get_dphi();
     275             : 
     276         986 :         fe->reinit(elem);
     277             : 
     278         986 :         _n_dofs(elem_type_id, fe_type_id) = phi.size();
     279             : 
     280         986 :         _phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
     281         986 :         _grad_phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
     282             : 
     283        4727 :         for (unsigned int i = 0; i < phi.size(); ++i)
     284       22639 :           for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
     285             :           {
     286       18898 :             _phi(sid, elem_type_id, fe_type_id)(i, qp) = phi[i][qp];
     287       18898 :             _grad_phi(sid, elem_type_id, fe_type_id)(i, qp) = grad_phi[i][qp];
     288             :           }
     289             : 
     290         986 :         _phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
     291         986 :         _grad_phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
     292             : 
     293        4732 :         for (unsigned int side = 0; side < elem->n_sides(); ++side)
     294             :         {
     295        3746 :           auto & phi = fe_face->get_phi();
     296        3746 :           auto & grad_phi = fe_face->get_dphi();
     297             : 
     298        3746 :           fe_face->reinit(elem, side);
     299             : 
     300        3746 :           _phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(), qrule_face->n_points());
     301        6644 :           _grad_phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(),
     302        2898 :                                                                      qrule_face->n_points());
     303             : 
     304       18716 :           for (unsigned int i = 0; i < phi.size(); ++i)
     305       50780 :             for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     306             :             {
     307       35810 :               _phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = phi[i][qp];
     308       35810 :               _grad_phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = grad_phi[i][qp];
     309             :             }
     310             :         }
     311             :       }
     312         988 :     }
     313             : 
     314        1712 :     for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
     315             :     {
     316         855 :       auto elem = &libMesh::ReferenceElem::get(elem_type);
     317             : 
     318         193 :       std::unique_ptr<FEBase> fe(
     319         662 :           FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
     320         193 :       std::unique_ptr<FEBase> fe_face(
     321         662 :           FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
     322             : 
     323         855 :       fe->attach_quadrature_rule(qrule);
     324         855 :       fe_face->attach_quadrature_rule(qrule_face);
     325             : 
     326         855 :       auto & phi = fe->get_phi();
     327         855 :       auto & grad_phi = fe->get_dphi();
     328             : 
     329         855 :       fe->reinit(elem);
     330             : 
     331         855 :       _map_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
     332         855 :       _map_grad_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
     333             : 
     334        4179 :       for (unsigned int i = 0; i < phi.size(); ++i)
     335       18712 :         for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
     336             :         {
     337       15388 :           _map_phi(sid, elem_type_id)(i, qp) = phi[i][qp];
     338       15388 :           _map_grad_phi(sid, elem_type_id)(i, qp) = grad_phi[i][qp];
     339             :         }
     340             : 
     341         855 :       _map_phi_face(sid, elem_type_id).create(elem->n_sides());
     342         855 :       _map_grad_phi_face(sid, elem_type_id).create(elem->n_sides());
     343         855 :       _map_psi_face(sid, elem_type_id).create(elem->n_sides());
     344         855 :       _map_grad_psi_face(sid, elem_type_id).create(elem->n_sides());
     345             : 
     346        4047 :       for (unsigned int side = 0; side < elem->n_sides(); ++side)
     347             :       {
     348        3192 :         auto & phi = fe_face->get_phi();
     349        3192 :         auto & grad_phi = fe_face->get_dphi();
     350             : 
     351        3192 :         fe_face->reinit(elem, side);
     352             : 
     353        3192 :         _map_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
     354        3192 :         _map_grad_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
     355             : 
     356       16464 :         for (unsigned int i = 0; i < phi.size(); ++i)
     357       43904 :           for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     358             :           {
     359       30632 :             _map_phi_face(sid, elem_type_id)(side)(i, qp) = phi[i][qp];
     360       30632 :             _map_grad_phi_face(sid, elem_type_id)(side)(i, qp) = grad_phi[i][qp];
     361             :           }
     362             : 
     363        3192 :         auto & psi = fe_face->get_fe_map().get_psi();
     364        3192 :         auto & dpsidxi = fe_face->get_fe_map().get_dpsidxi();
     365        3192 :         auto & dpsideta = fe_face->get_fe_map().get_dpsideta();
     366             : 
     367        3192 :         _map_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
     368        3192 :         _map_grad_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
     369             : 
     370        9756 :         for (unsigned int i = 0; i < psi.size(); ++i)
     371       21664 :           for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     372             :           {
     373       15100 :             _map_psi_face(sid, elem_type_id)(side)(i, qp) = psi[i][qp];
     374       15100 :             if (elem->dim() > 1)
     375       14800 :               _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(0) = dpsidxi[i][qp];
     376       15100 :             if (elem->dim() > 2)
     377        3456 :               _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(1) = dpsideta[i][qp];
     378             :           }
     379             :       }
     380         855 :     }
     381             :   }
     382             : 
     383         807 :   _phi.copyToDeviceNested();
     384         807 :   _phi_face.copyToDeviceNested();
     385         807 :   _grad_phi.copyToDeviceNested();
     386         807 :   _grad_phi_face.copyToDeviceNested();
     387             : 
     388         807 :   _map_phi.copyToDeviceNested();
     389         807 :   _map_phi_face.copyToDeviceNested();
     390         807 :   _map_psi_face.copyToDeviceNested();
     391         807 :   _map_grad_phi.copyToDeviceNested();
     392         807 :   _map_grad_phi_face.copyToDeviceNested();
     393         807 :   _map_grad_psi_face.copyToDeviceNested();
     394             : 
     395         807 :   _n_dofs.copyToDevice();
     396         807 : }
     397             : 
     398             : void
     399         807 : Assembly::cachePhysicalMap()
     400             : {
     401         807 :   auto num_subdomains = _mesh.nSubdomains();
     402         807 :   auto num_elems = _mesh.nActiveLocalElem();
     403             : 
     404         807 :   _jacobian.create(num_subdomains);
     405         807 :   _jxw.create(num_subdomains);
     406         807 :   _xyz.create(num_subdomains);
     407             : 
     408        1664 :   for (auto subdomain : _mesh.meshSubdomains())
     409             :   {
     410         857 :     auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
     411             : 
     412         857 :     _jacobian[sid].createDevice(_n_subdomain_qps[sid]);
     413         857 :     _jxw[sid].createDevice(_n_subdomain_qps[sid]);
     414         857 :     _xyz[sid].createDevice(_n_subdomain_qps[sid]);
     415             :   }
     416             : 
     417         807 :   _jacobian.copyToDeviceNested();
     418         807 :   _jxw.copyToDeviceNested();
     419         807 :   _xyz.copyToDeviceNested();
     420             : 
     421         807 :   ::Kokkos::RangePolicy<ExecSpace, ::Kokkos::IndexType<ThreadID>> policy(0, num_elems);
     422         807 :   ::Kokkos::parallel_for(policy, *this);
     423         807 :   ::Kokkos::fence();
     424         807 : }
     425             : 
     426             : KOKKOS_FUNCTION void
     427       30702 : Assembly::operator()(const ThreadID tid) const
     428             : {
     429       30702 :   auto info = kokkosMesh().getElementInfo(tid);
     430       30702 :   auto offset = getQpOffset(info);
     431             : 
     432       30702 :   auto jacobian = &_jacobian[info.subdomain][offset];
     433       30702 :   auto jxw = &_jxw[info.subdomain][offset];
     434       30702 :   auto xyz = &_xyz[info.subdomain][offset];
     435             : 
     436      172518 :   for (unsigned int qp = 0; qp < getNumQps(info); ++qp)
     437      141816 :     computePhysicalMap(info, qp, &jacobian[qp], &jxw[qp], &xyz[qp]);
     438       30702 : }
     439             : 
     440             : } // namespace Kokkos
     441             : } // namespace Moose

Generated by: LCOV version 1.14