LCOV - code coverage report
Current view: top level - src/kokkos/base - KokkosAssembly.K (source / functions) Hit Total Coverage
Test: idaholab/moose framework: 6f668f Lines: 245 251 97.6 %
Date: 2025-09-22 20:01:15 Functions: 7 7 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://www.mooseframework.org
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : #include "KokkosAssembly.h"
      11             : 
      12             : #include "MooseMesh.h"
      13             : #include "FEProblemBase.h"
      14             : #include "NonlinearSystemBase.h"
      15             : #include "AuxiliarySystem.h"
      16             : #include "Assembly.h"
      17             : #include "BoundaryRestrictable.h"
      18             : 
      19             : #include "libmesh/fe_interface.h"
      20             : #include "libmesh/reference_elem.h"
      21             : 
      22             : namespace Moose
      23             : {
      24             : namespace Kokkos
      25             : {
      26             : 
      27       42670 : Assembly::Assembly(FEProblemBase & problem)
      28       42360 :   : MeshHolder(*problem.mesh().getKokkosMesh()),
      29       42360 :     _problem(problem),
      30       42360 :     _mesh(problem.mesh()),
      31      127080 :     _dimension(_mesh.dimension())
      32             : {
      33       42670 : }
      34             : 
      35             : void
      36         693 : Assembly::init()
      37             : {
      38             :   // Cache mesh information
      39             : 
      40         693 :   auto num_subdomains = _mesh.nSubdomains();
      41             : 
      42         693 :   _coord_type.create(num_subdomains);
      43             : 
      44        1436 :   for (auto subdomain : _mesh.meshSubdomains())
      45         743 :     _coord_type[kokkosMesh().getContiguousSubdomainID(subdomain)] = _mesh.getCoordSystem(subdomain);
      46             : 
      47         693 :   _coord_type.copyToDevice();
      48             : 
      49         693 :   if (_mesh.usingGeneralAxisymmetricCoordAxes())
      50             :   {
      51           0 :     _rz_axis.create(num_subdomains);
      52             : 
      53           0 :     for (auto subdomain : _mesh.meshSubdomains())
      54           0 :       _rz_axis[kokkosMesh().getContiguousSubdomainID(subdomain)] =
      55           0 :           _mesh.getGeneralAxisymmetricCoordAxis(subdomain);
      56             : 
      57           0 :     _rz_axis.copyToDevice();
      58             :   }
      59             :   else
      60         693 :     _rz_radial_coord = _mesh.getAxisymmetricRadialCoord();
      61             : 
      62             :   // Initialize quadrature and shape data
      63             : 
      64         693 :   initQuadrature();
      65         693 :   initShape();
      66         693 :   cachePhysicalMap();
      67         693 : }
      68             : 
      69             : void
      70         693 : Assembly::initQuadrature()
      71             : {
      72         693 :   auto num_subdomains = _mesh.nSubdomains();
      73         693 :   auto num_elem_types = kokkosMesh().getElementTypeMap().size();
      74             : 
      75         693 :   _q_points.create(num_subdomains, num_elem_types);
      76         693 :   _q_points_face.create(num_subdomains, num_elem_types);
      77         693 :   _weights.create(num_subdomains, num_elem_types);
      78         693 :   _weights_face.create(num_subdomains, num_elem_types);
      79             : 
      80             :   // Find boundaries where material properties should be computed
      81             : 
      82         166 :   auto & boundary_objects =
      83         527 :       _problem.getKokkosMaterialPropertyStorageConsumers(Moose::BOUNDARY_MATERIAL_DATA);
      84             : 
      85         907 :   for (auto object : boundary_objects)
      86             :   {
      87         214 :     auto boundary_restriction = dynamic_cast<const BoundaryRestrictable *>(object);
      88             : 
      89         214 :     if (boundary_restriction)
      90         428 :       for (auto boundary : boundary_restriction->boundaryIDs())
      91         214 :         _material_boundaries.insert(boundary);
      92             :     else
      93           0 :       mooseError("Kokkos assembly error: ", object->name(), " is not boundary-restricted.");
      94             :   }
      95             : 
      96             :   // Cache quadrature data
      97             : 
      98         693 :   std::map<SubdomainID, std::map<ElemType, unsigned int>> n_qps;
      99         693 :   std::map<SubdomainID, std::map<ElemType, std::vector<unsigned int>>> n_qps_face;
     100             : 
     101         693 :   _max_qps_per_elem = 0;
     102             : 
     103        1436 :   for (auto subdomain : _mesh.meshSubdomains())
     104             :   {
     105         743 :     auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
     106             : 
     107         743 :     auto & assembly = _problem.assembly(0, 0);
     108         743 :     auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
     109         743 :     auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
     110             : 
     111        1486 :     for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
     112             :     {
     113         743 :       auto elem = &libMesh::ReferenceElem::get(elem_type);
     114             : 
     115         743 :       _q_points_face(sid, elem_type_id).create(elem->n_sides());
     116         743 :       _weights_face(sid, elem_type_id).create(elem->n_sides());
     117             : 
     118             :       // Cache volume quadrature of each reference element
     119             : 
     120         743 :       qrule->init(*elem, /* p-level */ 0);
     121         743 :       n_qps[subdomain][elem_type] = qrule->n_points();
     122             : 
     123         743 :       _q_points(sid, elem_type_id).create(qrule->n_points());
     124         743 :       _weights(sid, elem_type_id).create(qrule->n_points());
     125             : 
     126        3559 :       for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
     127             :       {
     128        2816 :         _q_points(sid, elem_type_id)[qp] = qrule->qp(qp);
     129        2816 :         _weights(sid, elem_type_id)[qp] = qrule->w(qp);
     130             :       }
     131             : 
     132             :       // Cache face quadrature of each reference element
     133             : 
     134        3487 :       for (unsigned int side = 0; side < elem->n_sides(); ++side)
     135             :       {
     136        2744 :         qrule_face->init(*elem->side_ptr(side), /* p-level */ 0);
     137        2744 :         n_qps_face[subdomain][elem_type].push_back(qrule_face->n_points());
     138             : 
     139        2744 :         _q_points_face(sid, elem_type_id)[side].create(qrule_face->n_points());
     140        2744 :         _weights_face(sid, elem_type_id)[side].create(qrule_face->n_points());
     141             : 
     142        8364 :         for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     143             :         {
     144        5620 :           _q_points_face(sid, elem_type_id)[side][qp] = qrule_face->qp(qp);
     145        5620 :           _weights_face(sid, elem_type_id)[side][qp] = qrule_face->w(qp);
     146             :         }
     147             :       }
     148             : 
     149         743 :       _max_qps_per_elem = std::max(_max_qps_per_elem, n_qps[subdomain][elem_type]);
     150             :     }
     151             :   }
     152             : 
     153         693 :   auto num_elems = _mesh.nActiveLocalElem();
     154             : 
     155         693 :   _n_subdomain_qps.create(num_subdomains);
     156         693 :   _n_subdomain_qps_face.create(num_subdomains);
     157         693 :   _n_subdomain_qps = 0;
     158         693 :   _n_subdomain_qps_face = 0;
     159             : 
     160         693 :   _n_qps.create(num_elems);
     161         693 :   _n_qps_face.create(_mesh.getMaxSidesPerElem(), num_elems);
     162         693 :   _n_qps_face = 0;
     163             : 
     164         693 :   _qp_offset.create(num_elems);
     165         693 :   _qp_offset_face.create(_mesh.getMaxSidesPerElem(), num_elems);
     166         693 :   _qp_offset_face = libMesh::DofObject::invalid_id;
     167             : 
     168       33762 :   for (auto elem : *_mesh.getActiveLocalElementRange())
     169             :   {
     170       33069 :     auto eid = kokkosMesh().getContiguousElementID(elem);
     171       33069 :     auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
     172             : 
     173       33069 :     _n_qps[eid] = n_qps[elem->subdomain_id()][elem->type()];
     174       33069 :     _qp_offset[eid] = _n_subdomain_qps[sid];
     175       33069 :     _n_subdomain_qps[sid] += _n_qps[eid];
     176             : 
     177      161045 :     for (unsigned int side = 0; side < elem->n_sides(); ++side)
     178      127976 :       _n_qps_face(side, eid) = n_qps_face[elem->subdomain_id()][elem->type()][side];
     179             :   }
     180             : 
     181         894 :   for (auto boundary : _material_boundaries)
     182        1520 :     for (auto elem_id : _mesh.getBoundaryActiveSemiLocalElemIds(boundary))
     183             :     {
     184        1319 :       auto elem = _mesh.elemPtr(elem_id);
     185             : 
     186        1319 :       if (elem->processor_id() == _problem.processor_id())
     187             :       {
     188        1195 :         auto sid = kokkosMesh().getContiguousSubdomainID(elem->subdomain_id());
     189        1195 :         auto eid = kokkosMesh().getContiguousElementID(elem);
     190        1195 :         auto side = _mesh.sideWithBoundaryID(elem, boundary);
     191             : 
     192        1195 :         _qp_offset_face(side, eid) = _n_subdomain_qps_face[sid];
     193        1195 :         _n_subdomain_qps_face[sid] += _n_qps_face(side, eid);
     194             :       }
     195         201 :     }
     196             : 
     197         693 :   _q_points.copyToDeviceNested();
     198         693 :   _q_points_face.copyToDeviceNested();
     199         693 :   _weights.copyToDeviceNested();
     200         693 :   _weights_face.copyToDeviceNested();
     201             : 
     202         693 :   _n_qps.copyToDevice();
     203         693 :   _n_qps_face.copyToDevice();
     204         693 :   _n_subdomain_qps.copyToDevice();
     205         693 :   _n_subdomain_qps_face.copyToDevice();
     206         693 :   _qp_offset.copyToDevice();
     207         693 :   _qp_offset_face.copyToDevice();
     208         693 : }
     209             : 
     210             : void
     211         693 : Assembly::initShape()
     212             : {
     213             :   // Generate the list of unique FE types
     214             : 
     215         693 :   std::set<FEType> fe_types;
     216             : 
     217        1386 :   auto getFETypes = [&](::System & system)
     218             :   {
     219        2568 :     for (unsigned int var = 0; var < system.n_vars(); var++)
     220        1182 :       fe_types.insert(system.variable_type(var));
     221        2079 :   };
     222             : 
     223        1386 :   for (unsigned int nl = 0; nl < _problem.numNonlinearSystems(); ++nl)
     224         693 :     getFETypes(_problem.getNonlinearSystemBase(nl).system());
     225             : 
     226         693 :   getFETypes(_problem.getAuxiliarySystem().system());
     227             : 
     228         693 :   _fe_type_map.clear();
     229             : 
     230        1407 :   for (auto & fet : fe_types)
     231         714 :     _fe_type_map[fet] = _fe_type_map.size();
     232             : 
     233             :   // Cache reference shape data
     234             : 
     235         693 :   auto num_subdomains = _mesh.meshSubdomains().size();
     236         693 :   auto num_elem_types = kokkosMesh().getElementTypeMap().size();
     237             : 
     238         693 :   _phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
     239         693 :   _phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
     240         693 :   _grad_phi.create(num_subdomains, num_elem_types, _fe_type_map.size());
     241         693 :   _grad_phi_face.create(num_subdomains, num_elem_types, _fe_type_map.size());
     242             : 
     243         693 :   _map_phi.create(num_subdomains, num_elem_types);
     244         693 :   _map_phi_face.create(num_subdomains, num_elem_types);
     245         693 :   _map_psi_face.create(num_subdomains, num_elem_types);
     246         693 :   _map_grad_phi.create(num_subdomains, num_elem_types);
     247         693 :   _map_grad_phi_face.create(num_subdomains, num_elem_types);
     248         693 :   _map_grad_psi_face.create(num_subdomains, num_elem_types);
     249             : 
     250         693 :   _n_dofs.create(num_elem_types, _fe_type_map.size());
     251         693 :   _n_dofs = 0;
     252             : 
     253        1436 :   for (auto subdomain : _mesh.meshSubdomains())
     254             :   {
     255         743 :     auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
     256             : 
     257         743 :     auto & assembly = _problem.assembly(0, 0);
     258         743 :     auto qrule = assembly.writeableQRule(_dimension, subdomain, {});
     259         743 :     auto qrule_face = assembly.writeableQRuleFace(_dimension, subdomain, {});
     260             : 
     261        1507 :     for (auto & [fe_type, fe_type_id] : _fe_type_map)
     262             :     {
     263         764 :       std::unique_ptr<FEBase> fe(FEBase::build(_dimension, fe_type));
     264         764 :       std::unique_ptr<FEBase> fe_face(FEBase::build(_dimension, fe_type));
     265             : 
     266         764 :       fe->attach_quadrature_rule(qrule);
     267         764 :       fe_face->attach_quadrature_rule(qrule_face);
     268             : 
     269        1528 :       for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
     270             :       {
     271         764 :         auto elem = &libMesh::ReferenceElem::get(elem_type);
     272             : 
     273         764 :         auto & phi = fe->get_phi();
     274         764 :         auto & grad_phi = fe->get_dphi();
     275             : 
     276         764 :         fe->reinit(elem);
     277             : 
     278         764 :         _n_dofs(elem_type_id, fe_type_id) = phi.size();
     279             : 
     280         764 :         _phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
     281         764 :         _grad_phi(sid, elem_type_id, fe_type_id).create(phi.size(), qrule->n_points());
     282             : 
     283        3601 :         for (unsigned int i = 0; i < phi.size(); ++i)
     284       14695 :           for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
     285             :           {
     286       11858 :             _phi(sid, elem_type_id, fe_type_id)(i, qp) = phi[i][qp];
     287       11858 :             _grad_phi(sid, elem_type_id, fe_type_id)(i, qp) = grad_phi[i][qp];
     288             :           }
     289             : 
     290         764 :         _phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
     291         764 :         _grad_phi_face(sid, elem_type_id, fe_type_id).create(elem->n_sides());
     292             : 
     293        3550 :         for (unsigned int side = 0; side < elem->n_sides(); ++side)
     294             :         {
     295        2786 :           auto & phi = fe_face->get_phi();
     296        2786 :           auto & grad_phi = fe_face->get_dphi();
     297             : 
     298        2786 :           fe_face->reinit(elem, side);
     299             : 
     300        2786 :           _phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(), qrule_face->n_points());
     301        4902 :           _grad_phi_face(sid, elem_type_id, fe_type_id)(side).create(phi.size(),
     302        2116 :                                                                      qrule_face->n_points());
     303             : 
     304       14068 :           for (unsigned int i = 0; i < phi.size(); ++i)
     305       36660 :             for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     306             :             {
     307       25378 :               _phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = phi[i][qp];
     308       25378 :               _grad_phi_face(sid, elem_type_id, fe_type_id)(side)(i, qp) = grad_phi[i][qp];
     309             :             }
     310             :         }
     311             :       }
     312         764 :     }
     313             : 
     314        1486 :     for (auto & [elem_type, elem_type_id] : kokkosMesh().getElementTypeMap())
     315             :     {
     316         743 :       auto elem = &libMesh::ReferenceElem::get(elem_type);
     317             : 
     318         178 :       std::unique_ptr<FEBase> fe(
     319         565 :           FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
     320         178 :       std::unique_ptr<FEBase> fe_face(
     321         565 :           FEBase::build(_dimension, FEType(elem->default_order(), LAGRANGE)));
     322             : 
     323         743 :       fe->attach_quadrature_rule(qrule);
     324         743 :       fe_face->attach_quadrature_rule(qrule_face);
     325             : 
     326         743 :       auto & phi = fe->get_phi();
     327         743 :       auto & grad_phi = fe->get_dphi();
     328             : 
     329         743 :       fe->reinit(elem);
     330             : 
     331         743 :       _map_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
     332         743 :       _map_grad_phi(sid, elem_type_id).create(phi.size(), qrule->n_points());
     333             : 
     334        3559 :       for (unsigned int i = 0; i < phi.size(); ++i)
     335       14632 :         for (unsigned int qp = 0; qp < qrule->n_points(); ++qp)
     336             :         {
     337       11816 :           _map_phi(sid, elem_type_id)(i, qp) = phi[i][qp];
     338       11816 :           _map_grad_phi(sid, elem_type_id)(i, qp) = grad_phi[i][qp];
     339             :         }
     340             : 
     341         743 :       _map_phi_face(sid, elem_type_id).create(elem->n_sides());
     342         743 :       _map_grad_phi_face(sid, elem_type_id).create(elem->n_sides());
     343         743 :       _map_psi_face(sid, elem_type_id).create(elem->n_sides());
     344         743 :       _map_grad_psi_face(sid, elem_type_id).create(elem->n_sides());
     345             : 
     346        3487 :       for (unsigned int side = 0; side < elem->n_sides(); ++side)
     347             :       {
     348        2744 :         auto & phi = fe_face->get_phi();
     349        2744 :         auto & grad_phi = fe_face->get_dphi();
     350             : 
     351        2744 :         fe_face->reinit(elem, side);
     352             : 
     353        2744 :         _map_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
     354        2744 :         _map_grad_phi_face(sid, elem_type_id)(side).create(phi.size(), qrule_face->n_points());
     355             : 
     356       13984 :         for (unsigned int i = 0; i < phi.size(); ++i)
     357       36576 :           for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     358             :           {
     359       25336 :             _map_phi_face(sid, elem_type_id)(side)(i, qp) = phi[i][qp];
     360       25336 :             _map_grad_phi_face(sid, elem_type_id)(side)(i, qp) = grad_phi[i][qp];
     361             :           }
     362             : 
     363        2744 :         auto & psi = fe_face->get_fe_map().get_psi();
     364        2744 :         auto & dpsidxi = fe_face->get_fe_map().get_dpsidxi();
     365        2744 :         auto & dpsideta = fe_face->get_fe_map().get_dpsideta();
     366             : 
     367        2744 :         _map_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
     368        2744 :         _map_grad_psi_face(sid, elem_type_id)(side).create(psi.size(), qrule_face->n_points());
     369             : 
     370        8364 :         for (unsigned int i = 0; i < psi.size(); ++i)
     371       18288 :           for (unsigned int qp = 0; qp < qrule_face->n_points(); ++qp)
     372             :           {
     373       12668 :             _map_psi_face(sid, elem_type_id)(side)(i, qp) = psi[i][qp];
     374       12668 :             if (elem->dim() > 1)
     375       12368 :               _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(0) = dpsidxi[i][qp];
     376       12668 :             if (elem->dim() > 2)
     377        3456 :               _map_grad_psi_face(sid, elem_type_id)(side)(i, qp)(1) = dpsideta[i][qp];
     378             :           }
     379             :       }
     380         743 :     }
     381             :   }
     382             : 
     383         693 :   _phi.copyToDeviceNested();
     384         693 :   _phi_face.copyToDeviceNested();
     385         693 :   _grad_phi.copyToDeviceNested();
     386         693 :   _grad_phi_face.copyToDeviceNested();
     387             : 
     388         693 :   _map_phi.copyToDeviceNested();
     389         693 :   _map_phi_face.copyToDeviceNested();
     390         693 :   _map_psi_face.copyToDeviceNested();
     391         693 :   _map_grad_phi.copyToDeviceNested();
     392         693 :   _map_grad_phi_face.copyToDeviceNested();
     393         693 :   _map_grad_psi_face.copyToDeviceNested();
     394             : 
     395         693 :   _n_dofs.copyToDevice();
     396         693 : }
     397             : 
     398             : void
     399         693 : Assembly::cachePhysicalMap()
     400             : {
     401         693 :   auto num_subdomains = _mesh.nSubdomains();
     402         693 :   auto num_elems = _mesh.nActiveLocalElem();
     403             : 
     404         693 :   _jacobian.create(num_subdomains);
     405         693 :   _jxw.create(num_subdomains);
     406         693 :   _xyz.create(num_subdomains);
     407             : 
     408        1436 :   for (auto subdomain : _mesh.meshSubdomains())
     409             :   {
     410         743 :     auto sid = kokkosMesh().getContiguousSubdomainID(subdomain);
     411             : 
     412         743 :     _jacobian[sid].createDevice(_n_subdomain_qps[sid]);
     413         743 :     _jxw[sid].createDevice(_n_subdomain_qps[sid]);
     414         743 :     _xyz[sid].createDevice(_n_subdomain_qps[sid]);
     415             :   }
     416             : 
     417         693 :   _jacobian.copyToDeviceNested();
     418         693 :   _jxw.copyToDeviceNested();
     419         693 :   _xyz.copyToDeviceNested();
     420             : 
     421         693 :   ::Kokkos::RangePolicy<ExecSpace, ::Kokkos::IndexType<ThreadID>> policy(0, num_elems);
     422         693 :   ::Kokkos::parallel_for(policy, *this);
     423         693 :   ::Kokkos::fence();
     424         693 : }
     425             : 
     426             : KOKKOS_FUNCTION void
     427       23695 : Assembly::operator()(const ThreadID tid) const
     428             : {
     429       23695 :   auto info = kokkosMesh().getElementInfo(tid);
     430       23695 :   auto offset = getQpOffset(info);
     431             : 
     432       23695 :   auto jacobian = &_jacobian[info.subdomain][offset];
     433       23695 :   auto jxw = &_jxw[info.subdomain][offset];
     434       23695 :   auto xyz = &_xyz[info.subdomain][offset];
     435             : 
     436      117983 :   for (unsigned int qp = 0; qp < getNumQps(info); ++qp)
     437       94288 :     computePhysicalMap(info, qp, &jacobian[qp], &jxw[qp], &xyz[qp]);
     438       23695 : }
     439             : 
     440             : } // namespace Kokkos
     441             : } // namespace Moose

Generated by: LCOV version 1.14