LCOV - code coverage report
Current view: top level - src/kokkos/base - KokkosAssembly.K (source / functions) Hit Total Coverage
Test: idaholab/moose framework: #31730 (e8b711) with base e0c998 Lines: 245 251 97.6 %
Date: 2025-10-29 16:49:47 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://www.mooseframework.org
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : #include "KokkosAssembly.h"
      11             : 
      12             : #include "MooseMesh.h"
      13             : #include "FEProblemBase.h"
      14             : #include "NonlinearSystemBase.h"
      15             : #include "AuxiliarySystem.h"
      16             : #include "Assembly.h"
      17             : #include "BoundaryRestrictable.h"
      18             : 
      19             : #include "libmesh/fe_interface.h"
      20             : #include "libmesh/reference_elem.h"
      21             : 
      22             : namespace Moose
      23             : {
      24             : namespace Kokkos
      25             : {
      26             : 
      27       43070 : Assembly::Assembly(FEProblemBase & problem)
      28       42580 :   : MeshHolder(*problem.mesh().getKokkosMesh()),
      29       42580 :     _problem(problem),
      30       42580 :     _mesh(problem.mesh()),
      31      127740 :     _dimension(_mesh.dimension())
      32             : {
      33       43070 : }
      34             : 
      35             : void
      36         741 : Assembly::init()
      37             : {
      38             :   // Cache mesh information
      39             : 
      40         741 :   auto num_subdomains = _mesh.nSubdomains();
      41             : 
      42         741 :   _coord_type.create(num_subdomains);
      43             : 
      44        1532 :   for (auto subdomain : _mesh.meshSubdomains())
      45         791 :     _coord_type[kokkosMesh().getContiguousSubdomainID(subdomain)] = _mesh.getCoordSystem(subdomain);
      46             : 
      47         741 :   _coord_type.copyToDevice();
      48             : 
      49         741 :   if (_mesh.usingGeneralAxisymmetricCoordAxes())
      50             :   {
      51           0 :     _rz_axis.create(num_subdomains);
      52             : 
      53           0 :     for (auto subdomain : _mesh.meshSubdomains())
      54           0 :       _rz_axis[kokkosMesh().getContiguousSubdomainID(subdomain)] =
      55           0 :           _mesh.getGeneralAxisymmetricCoordAxis(subdomain);
      56             : 
      57           0 :     _rz_axis.copyToDevice();
      58             :   }
      59             :   else
      60         741 :     _rz_radial_coord = _mesh.getAxisymmetricRadialCoord();
      61             : 
      62             :   // Initialize quadrature and shape data
      63             : 
      64         741 :   initQuadrature();
      65         741 :   initShape();
      66         741 :   cachePhysicalMap();
      67         741 : }
      68             : 
      69             : void
      70         741 : Assembly::initQuadrature()
      71             : {
      72         741 :   auto num_subdomains = _mesh.nSubdomains();
      73         741 :   auto num_elem_types = kokkosMesh().getElementTypeMap().size();
      74             : 
      75         741 :   _q_points.create(num_subdomains, num_elem_types);
      76         741 :   _q_points_face.create(num_subdomains, num_elem_types);
      77         741 :   _weights.create(num_subdomains, num_elem_types);
      78         741 :   _weights_face.create(num_subdomains, num_elem_types);
      79             : 
      80             :   // Find boundaries where material properties should be computed
      81             : 
      82         178 :   auto & boundary_objects =
      83         563 :       _problem.getKokkosMaterialPropertyStorageConsumers(Moose::BOUNDARY_MATERIAL_DATA);
      84             : 
      85         967 :   for (auto object : boundary_objects)
      86             :   {
      87         226 :     auto boundary_restriction = dynamic_cast<const BoundaryRestrictable *>(object);
      88             : 
      89         226 :     if (boundary_restriction)
      90         452 :       for (auto boundary : boundary_restriction->boundaryIDs())
      91         226 :         _material_boundaries.insert(boundary);
      92             :     else
      93           0 :       mooseError("Kokkos assembly error: ", object->name(), " is not boundary-restricted.");
      94             :   }
      95             : 
      96             :   // Cache quadrature data
      97             : 
      98         741 :   std::map<SubdomainID, std::map<ElemType, unsigned int>> n_qps;
      99         741 :   std::map<SubdomainID, std::map<ElemType, std::vector<unsigned int>>> n_qps_face;
     100             : 
     101         741 :   _max_qps_per_elem = 0;
     102             : 
     103        1532 :   for (auto subdomain : _mesh.meshSubdomains())
     104             :   {
     105         791 :     auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
     106             : 
     107         791 :     auto & assembly = _problem.assembly(0, 0);
     108         791 :     auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
     109         791 :     auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
     110             : 
     111        1580 :     for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
     112             :     {
     113         789 :       auto elem = &libMesh::ReferenceElem::get(elem_type);
     114             : 
     115         789 :       _q_points_face(sid, elem_type_id).create(elem->n_sides());
     116         789 :       _weights_face(sid, elem_type_id).create(elem->n_sides());
     117             : 
     118             :       // Cache volume quadrature of each reference element
     119             : 
     120         789 :       qrule->init(*elem, /* p-level */ 0);
     121         789 :       n_qps[subdomain][elem_type] = qrule->n_points();
     122             : 
     123         789 :       _q_points(sid, elem_type_id).create(qrule->n_points());
     124         789 :       _weights(sid, elem_type_id).create(qrule->n_points());
     125             : 
     126        4099 :       for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
     127             :       {
     128        3310 :         _q_points(sid, elem_type_id)[qp] = qrule->qp(qp);
     129        3310 :         _weights(sid, elem_type_id)[qp] = qrule->w(qp);
     130             :       }
     131             : 
     132             :       // Cache face quadrature of each reference element
     133             : 
     134        3717 :       for (unsigned int side = 0; side < elem->n_sides(); ++side)
     135             :       {
     136        2928 :         qrule_face->init(*elem->side_ptr(side), /* p-level */ 0);
     137        2928 :         n_qps_face[subdomain][elem_type].push_back(qrule_face->n_points());
     138             : 
     139        2928 :         _q_points_face(sid, elem_type_id)[side].create(qrule_face->n_points());
     140        2928 :         _weights_face(sid, elem_type_id)[side].create(qrule_face->n_points());
     141             : 
     142        9164 :         for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     143             :         {
     144        6236 :           _q_points_face(sid, elem_type_id)[side][qp] = qrule_face->qp(qp);
     145        6236 :           _weights_face(sid, elem_type_id)[side][qp] = qrule_face->w(qp);
     146             :         }
     147             :       }
     148             : 
     149         789 :       _max_qps_per_elem = std::max(_max_qps_per_elem, n_qps[subdomain][elem_type]);
     150             :     }
     151             :   }
     152             : 
     153         741 :   auto num_elems = _mesh.nActiveLocalElem();
     154             : 
     155         741 :   _n_subdomain_qps.create(num_subdomains);
     156         741 :   _n_subdomain_qps_face.create(num_subdomains);
     157         741 :   _n_subdomain_qps = 0;
     158         741 :   _n_subdomain_qps_face = 0;
     159             : 
     160         741 :   _n_qps.create(num_elems);
     161         741 :   _n_qps_face.create(_mesh.getMaxSidesPerElem(), num_elems);
     162         741 :   _n_qps_face = 0;
     163             : 
     164         741 :   _qp_offset.create(num_elems);
     165         741 :   _qp_offset_face.create(_mesh.getMaxSidesPerElem(), num_elems);
     166         741 :   _qp_offset_face = libMesh::DofObject::invalid_id;
     167             : 
     168       36820 :   for (auto elem : *_mesh.getActiveLocalElementRange())
     169             :   {
     170       36079 :     auto eid = kokkosMesh().getContiguousElementID(elem);
     171       36079 :     auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
     172             : 
     173       36079 :     _n_qps[eid] = n_qps[elem->subdomain_id()][elem->type()];
     174       36079 :     _qp_offset[eid] = _n_subdomain_qps[sid];
     175       36079 :     _n_subdomain_qps[sid] += _n_qps[eid];
     176             : 
     177      176095 :     for (unsigned int side = 0; side < elem->n_sides(); ++side)
     178      140016 :       _n_qps_face(side, eid) = n_qps_face[elem->subdomain_id()][elem->type()][side];
     179             :   }
     180             : 
     181         942 :   for (auto boundary : _material_boundaries)
     182        1520 :     for (auto elem_id : _mesh.getBoundaryActiveSemiLocalElemIds(boundary))
     183             :     {
     184        1319 :       auto elem = _mesh.elemPtr(elem_id);
     185             : 
     186        1319 :       if (elem->processor_id() == _problem.processor_id())
     187             :       {
     188        1195 :         auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
     189        1195 :         auto eid = kokkosMesh().getContiguousElementID(elem);
     190        1195 :         auto side = _mesh.sideWithBoundaryID(elem, boundary);
     191             : 
     192        1195 :         _qp_offset_face(side, eid) = _n_subdomain_qps_face[sid];
     193        1195 :         _n_subdomain_qps_face[sid] += _n_qps_face(side, eid);
     194             :       }
     195         201 :     }
     196             : 
     197         741 :   _q_points.copyToDeviceNested();
     198         741 :   _q_points_face.copyToDeviceNested();
     199         741 :   _weights.copyToDeviceNested();
     200         741 :   _weights_face.copyToDeviceNested();
     201             : 
     202         741 :   _n_qps.copyToDevice();
     203         741 :   _n_qps_face.copyToDevice();
     204         741 :   _n_subdomain_qps.copyToDevice();
     205         741 :   _n_subdomain_qps_face.copyToDevice();
     206         741 :   _qp_offset.copyToDevice();
     207         741 :   _qp_offset_face.copyToDevice();
     208         741 : }
     209             : 
     210             : void
     211         741 : Assembly::initShape()
     212             : {
     213             :   // Generate the list of unique FE types
     214             : 
     215         741 :   std::set<FEType> fe_types;
     216             : 
     217        1482 :   auto getFETypes = [&](::System & system)
     218             :   {
     219        2906 :     for (unsigned int var = 0; var < system.n_vars(); var++)
     220        1424 :       fe_types.insert(system.variable_type(var));
     221        2223 :   };
     222             : 
     223        1482 :   for (unsigned int nl = 0; nl < _problem.numNonlinearSystems(); ++nl)
     224         741 :     getFETypes(_problem.getNonlinearSystemBase(nl).system());
     225             : 
     226         741 :   getFETypes(_problem.getAuxiliarySystem().system());
     227             : 
     228         741 :   _fe_type_map.clear();
     229             : 
     230        1613 :   for (auto & fet : fe_types)
     231         872 :     _fe_type_map[fet] = _fe_type_map.size();
     232             : 
     233             :   // Cache reference shape data
     234             : 
     235         741 :   auto num_subdomains = _mesh.meshSubdomains().size();
     236         741 :   auto num_elem_types = kokkosMesh().getElementTypeMap().size();
     237             : 
     238         741 :   _phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
     239         741 :   _phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
     240         741 :   _grad_phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
     241         741 :   _grad_phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
     242             : 
     243         741 :   _map_phi.create(num_subdomains, num_elem_types);
     244         741 :   _map_phi_face.create(num_subdomains, num_elem_types);
     245         741 :   _map_psi_face.create(num_subdomains, num_elem_types);
     246         741 :   _map_grad_phi.create(num_subdomains, num_elem_types);
     247         741 :   _map_grad_phi_face.create(num_subdomains, num_elem_types);
     248         741 :   _map_grad_psi_face.create(num_subdomains, num_elem_types);
     249             : 
     250         741 :   _n_dofs.create(num_elem_types, _fe_type_map.size());
     251         741 :   _n_dofs = 0;
     252             : 
     253        1532 :   for (auto subdomain : _mesh.meshSubdomains())
     254             :   {
     255         791 :     auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
     256             : 
     257         791 :     auto & assembly = _problem.assembly(0, 0);
     258         791 :     auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
     259         791 :     auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
     260             : 
     261        1713 :     for (auto & [fe_type, fe_type_id] : _fe_type_map)
     262             :     {
     263         922 :       std::unique_ptr<FEBase> fe(FEBase::build(_dimension, fe_type));
     264         922 :       std::unique_ptr<FEBase> fe_face(FEBase::build(_dimension, fe_type));
     265             : 
     266         922 :       fe->attach_quadrature_rule(qrule);
     267         922 :       fe_face->attach_quadrature_rule(qrule_face);
     268             : 
     269        1842 :       for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
     270             :       {
     271         920 :         auto elem = &libMesh::ReferenceElem::get(elem_type);
     272             : 
     273         920 :         auto & phi = fe->get_phi();
     274         920 :         auto & grad_phi = fe->get_dphi();
     275             : 
     276         920 :         fe->reinit(elem);
     277             : 
     278         920 :         _n_dofs(elem_type_id, fe_type_id) = phi.size();
     279             : 
     280         920 :         _phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
     281         920 :         _grad_phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
     282             : 
     283        4397 :         for (unsigned int i = 0; i < phi.size(); ++i)
     284       21319 :           for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
     285             :           {
     286       17842 :             _phi(sid, elem_type_id, fe_type_id)(i, qp) = phi[i][qp];
     287       17842 :             _grad_phi(sid, elem_type_id, fe_type_id)(i, qp) = grad_phi[i][qp];
     288             :           }
     289             : 
     290         920 :         _phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
     291         920 :         _grad_phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
     292             : 
     293        4402 :         for (unsigned int side = 0; side < elem->n_sides(); ++side)
     294             :         {
     295        3482 :           auto & phi = fe_face->get_phi();
     296        3482 :           auto & grad_phi = fe_face->get_dphi();
     297             : 
     298        3482 :           fe_face->reinit(elem, side);
     299             : 
     300        3482 :           _phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(), qrule_face->n_points());
     301        6128 :           _grad_phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(),
     302        2646 :                                                                      qrule_face->n_points());
     303             : 
     304       17396 :           for (unsigned int i = 0; i < phi.size(); ++i)
     305       47612 :             for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     306             :             {
     307       33698 :               _phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = phi[i][qp];
     308       33698 :               _grad_phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = grad_phi[i][qp];
     309             :             }
     310             :         }
     311             :       }
     312         922 :     }
     313             : 
     314        1580 :     for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
     315             :     {
     316         789 :       auto elem = &libMesh::ReferenceElem::get(elem_type);
     317             : 
     318         190 :       std::unique_ptr<FEBase> fe(
     319         599 :           FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
     320         190 :       std::unique_ptr<FEBase> fe_face(
     321         599 :           FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
     322             : 
     323         789 :       fe->attach_quadrature_rule(qrule);
     324         789 :       fe_face->attach_quadrature_rule(qrule_face);
     325             : 
     326         789 :       auto & phi = fe->get_phi();
     327         789 :       auto & grad_phi = fe->get_dphi();
     328             : 
     329         789 :       fe->reinit(elem);
     330             : 
     331         789 :       _map_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
     332         789 :       _map_grad_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
     333             : 
     334        3849 :       for (unsigned int i = 0; i < phi.size(); ++i)
     335       17392 :         for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
     336             :         {
     337       14332 :           _map_phi(sid, elem_type_id)(i, qp) = phi[i][qp];
     338       14332 :           _map_grad_phi(sid, elem_type_id)(i, qp) = grad_phi[i][qp];
     339             :         }
     340             : 
     341         789 :       _map_phi_face(sid, elem_type_id).create(elem->n_sides());
     342         789 :       _map_grad_phi_face(sid, elem_type_id).create(elem->n_sides());
     343         789 :       _map_psi_face(sid, elem_type_id).create(elem->n_sides());
     344         789 :       _map_grad_psi_face(sid, elem_type_id).create(elem->n_sides());
     345             : 
     346        3717 :       for (unsigned int side = 0; side < elem->n_sides(); ++side)
     347             :       {
     348        2928 :         auto & phi = fe_face->get_phi();
     349        2928 :         auto & grad_phi = fe_face->get_dphi();
     350             : 
     351        2928 :         fe_face->reinit(elem, side);
     352             : 
     353        2928 :         _map_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
     354        2928 :         _map_grad_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
     355             : 
     356       15144 :         for (unsigned int i = 0; i < phi.size(); ++i)
     357       40736 :           for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     358             :           {
     359       28520 :             _map_phi_face(sid, elem_type_id)(side)(i, qp) = phi[i][qp];
     360       28520 :             _map_grad_phi_face(sid, elem_type_id)(side)(i, qp) = grad_phi[i][qp];
     361             :           }
     362             : 
     363        2928 :         auto & psi = fe_face->get_fe_map().get_psi();
     364        2928 :         auto & dpsidxi = fe_face->get_fe_map().get_dpsidxi();
     365        2928 :         auto & dpsideta = fe_face->get_fe_map().get_dpsideta();
     366             : 
     367        2928 :         _map_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
     368        2928 :         _map_grad_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
     369             : 
     370        8964 :         for (unsigned int i = 0; i < psi.size(); ++i)
     371       20080 :           for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     372             :           {
     373       14044 :             _map_psi_face(sid, elem_type_id)(side)(i, qp) = psi[i][qp];
     374       14044 :             if (elem->dim() > 1)
     375       13744 :               _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(0) = dpsidxi[i][qp];
     376       14044 :             if (elem->dim() > 2)
     377        3456 :               _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(1) = dpsideta[i][qp];
     378             :           }
     379             :       }
     380         789 :     }
     381             :   }
     382             : 
     383         741 :   _phi.copyToDeviceNested();
     384         741 :   _phi_face.copyToDeviceNested();
     385         741 :   _grad_phi.copyToDeviceNested();
     386         741 :   _grad_phi_face.copyToDeviceNested();
     387             : 
     388         741 :   _map_phi.copyToDeviceNested();
     389         741 :   _map_phi_face.copyToDeviceNested();
     390         741 :   _map_psi_face.copyToDeviceNested();
     391         741 :   _map_grad_phi.copyToDeviceNested();
     392         741 :   _map_grad_phi_face.copyToDeviceNested();
     393         741 :   _map_grad_psi_face.copyToDeviceNested();
     394             : 
     395         741 :   _n_dofs.copyToDevice();
     396         741 : }
     397             : 
     398             : void
     399         741 : Assembly::cachePhysicalMap()
     400             : {
     401         741 :   auto num_subdomains = _mesh.nSubdomains();
     402         741 :   auto num_elems = _mesh.nActiveLocalElem();
     403             : 
     404         741 :   _jacobian.create(num_subdomains);
     405         741 :   _jxw.create(num_subdomains);
     406         741 :   _xyz.create(num_subdomains);
     407             : 
     408        1532 :   for (auto subdomain : _mesh.meshSubdomains())
     409             :   {
     410         791 :     auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
     411             : 
     412         791 :     _jacobian[sid].createDevice(_n_subdomain_qps[sid]);
     413         791 :     _jxw[sid].createDevice(_n_subdomain_qps[sid]);
     414         791 :     _xyz[sid].createDevice(_n_subdomain_qps[sid]);
     415             :   }
     416             : 
     417         741 :   _jacobian.copyToDeviceNested();
     418         741 :   _jxw.copyToDeviceNested();
     419         741 :   _xyz.copyToDeviceNested();
     420             : 
     421         741 :   ::Kokkos::RangePolicy<ExecSpace, ::Kokkos::IndexType<ThreadID>> policy(0, num_elems);
     422         741 :   ::Kokkos::parallel_for(policy, *this);
     423         741 :   ::Kokkos::fence();
     424         741 : }
     425             : 
     426             : KOKKOS_FUNCTION void
     427       25802 : Assembly::operator()(const ThreadID tid) const
     428             : {
     429       25802 :   auto info = kokkosMesh().getElementInfo(tid);
     430       25802 :   auto offset = getQpOffset(info);
     431             : 
     432       25802 :   auto jacobian = &_jacobian[info.subdomain][offset];
     433       25802 :   auto jxw = &_jxw[info.subdomain][offset];
     434       25802 :   auto xyz = &_xyz[info.subdomain][offset];
     435             : 
     436      148018 :   for (unsigned int qp = 0; qp < getNumQps(info); ++qp)
     437      122216 :     computePhysicalMap(info, qp, &jacobian[qp], &jxw[qp], &xyz[qp]);
     438       25802 : }
     439             : 
     440             : } // namespace Kokkos
     441             : } // namespace Moose

Generated by: LCOV version 1.14