LCOV - code coverage report
Current view: top level - src/vectorpostprocessors - WorkBalance.C (source / functions) Hit Total Coverage
Test: idaholab/moose framework: 2bf808 Lines: 211 217 97.2 %
Date: 2025-07-17 01:28:37 Functions: 19 19 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://mooseframework.inl.gov
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : #include "WorkBalance.h"
      11             : 
      12             : // MOOSE includes
      13             : #include "MooseVariable.h"
      14             : #include "ThreadedElementLoopBase.h"
      15             : #include "ThreadedNodeLoop.h"
      16             : #include "PetscExternalPartitioner.h"
      17             : #include "CastUniquePointer.h"
      18             : 
      19             : #include "libmesh/quadrature.h"
      20             : #include "libmesh/elem_side_builder.h"
      21             : 
      22             : #include <numeric>
      23             : 
      24             : registerMooseObject("MooseApp", WorkBalance);
      25             : 
      26             : InputParameters
      27       14939 : WorkBalance::validParams()
      28             : {
      29       14939 :   InputParameters params = GeneralVectorPostprocessor::validParams();
      30       14939 :   params.addClassDescription("Computes several metrics for workload balance per processor");
      31             : 
      32             :   // These are numbered this way because NL is always system 0 and Aux is system 1
      33       14939 :   MooseEnum system_enum("ALL=-1 NL AUX", "ALL");
      34       14939 :   params.addParam<MooseEnum>(
      35             :       "system",
      36             :       system_enum,
      37             :       "The system(s) to retrieve the number of DOFs from (NL, AUX, ALL). Default == ALL");
      38             : 
      39       44817 :   params.addParam<bool>("sync_to_all_procs",
      40       29878 :                         false,
      41             :                         "Whether or not to sync the vectors to all processors.  By default we only "
      42             :                         "sync them to processor 0 so they can be written out.  Setting this to "
      43             :                         "true will use more communication, but is necessary if you expect these "
      44             :                         "vectors to be available on all processors");
      45             : 
      46             :   MultiMooseEnum balances(
      47             :       "num_elems=0 num_nodes=1 num_dofs=2 num_partition_sides=3 partition_surface_area=4 "
      48             :       "num_partition_hardware_id_sides=5 partition_hardware_id_surface_area=6",
      49             :       "num_elems num_nodes num_dofs num_partition_sides partition_surface_area "
      50       14939 :       "num_partition_hardware_id_sides partition_hardware_id_surface_area");
      51       14939 :   params.addParam<MultiMooseEnum>(
      52             :       "balances", balances, "Which metrics do you want to use to represent word balance");
      53       29878 :   return params;
      54       14939 : }
      55             : 
      56         330 : WorkBalance::WorkBalance(const InputParameters & parameters)
      57             :   : GeneralVectorPostprocessor(parameters),
      58         330 :     _system(getParam<MooseEnum>("system")),
      59         330 :     _rank_map(_app.rankMap()),
      60         330 :     _my_hardware_id(_rank_map.hardwareID(processor_id())),
      61         330 :     _sync_to_all_procs(getParam<bool>("sync_to_all_procs")),
      62         330 :     _local_num_elems(0),
      63         330 :     _local_num_nodes(0),
      64         330 :     _local_num_dofs(0),
      65         330 :     _local_num_partition_sides(0),
      66         330 :     _local_partition_surface_area(0),
      67         330 :     _local_num_partition_hardware_id_sides(0),
      68         330 :     _local_partition_hardware_id_surface_area(0),
      69         330 :     _pid(declareVector("pid")),
      70         660 :     _balances(getParam<MultiMooseEnum>("balances"))
      71             : {
      72        1380 :   for (auto & balance : _balances)
      73        1050 :     _balance_vectors[balance] = &declareVector(MooseUtils::toLower(
      74             :         balance)); // Use 'toLower' to make names consistent with the original interface
      75         330 : }
      76             : 
      77             : void
      78         321 : WorkBalance::initialize()
      79             : {
      80         321 :   _local_num_elems = 0;
      81         321 :   _local_num_nodes = 0;
      82         321 :   _local_num_dofs = 0;
      83         321 :   _local_num_partition_sides = 0;
      84         321 :   _local_partition_surface_area = 0;
      85         321 :   _local_num_partition_hardware_id_sides = 0;
      86         321 :   _local_partition_hardware_id_surface_area = 0;
      87         321 : }
      88             : 
      89             : namespace
      90             : {
      91             : 
      92             : // Helper Threaded Loop for Elements
      93             : class WBElementLoop : public ThreadedElementLoopBase<ConstElemRange>
      94             : {
      95             : public:
      96         321 :   WBElementLoop(MooseMesh & mesh, int system, const RankMap & rank_map)
      97         321 :     : ThreadedElementLoopBase(mesh),
      98         321 :       _system(system),
      99         321 :       _rank_map(rank_map),
     100         321 :       _my_hardware_id(rank_map.hardwareID(mesh.processor_id())),
     101         321 :       _local_num_elems(0),
     102         321 :       _local_num_dofs(0),
     103         321 :       _local_num_partition_sides(0),
     104         321 :       _local_partition_surface_area(0),
     105         321 :       _local_num_partition_hardware_id_sides(0),
     106         321 :       _local_partition_hardware_id_surface_area(0),
     107         642 :       _this_pid(_mesh.processor_id()) // Get this once because it is expensive
     108             :   {
     109             :     // This is required because dynamic_pointer_cast() requires an l-value
     110         321 :     auto partitioner = mesh.getMesh().partitioner()->clone();
     111         321 :     _petsc_partitioner = dynamic_pointer_cast<PetscExternalPartitioner>(partitioner);
     112         321 :   }
     113             : 
     114          45 :   WBElementLoop(WBElementLoop & x, Threads::split split)
     115          45 :     : ThreadedElementLoopBase(x, split),
     116          45 :       _system(x._system),
     117          45 :       _rank_map(x._rank_map),
     118          45 :       _my_hardware_id(x._my_hardware_id),
     119          45 :       _local_num_elems(0),
     120          45 :       _local_num_dofs(0),
     121          45 :       _local_num_partition_sides(0),
     122          45 :       _local_partition_surface_area(0),
     123          45 :       _local_num_partition_hardware_id_sides(0),
     124          45 :       _local_partition_hardware_id_surface_area(0),
     125          45 :       _this_pid(x._this_pid)
     126             :   {
     127          45 :     if (x._petsc_partitioner)
     128             :     {
     129             :       // This is required because dynamic_pointer_cast() requires an l-value
     130          32 :       auto partitioner = x._petsc_partitioner->clone();
     131          32 :       _petsc_partitioner = dynamic_pointer_cast<PetscExternalPartitioner>(partitioner);
     132          32 :     }
     133          45 :   }
     134             : 
     135         411 :   virtual ~WBElementLoop() {}
     136             : 
     137         366 :   virtual void pre() override
     138             :   {
     139         366 :     _local_num_elems = 0;
     140         366 :     _local_num_dofs = 0;
     141         366 :     _local_num_partition_sides = 0;
     142         366 :     _local_partition_surface_area = 0;
     143         366 :     _local_num_partition_hardware_id_sides = 0;
     144         366 :     _local_partition_hardware_id_surface_area = 0;
     145         366 :   }
     146             : 
     147       67654 :   virtual void onElement(const Elem * elem) override
     148             :   {
     149       67654 :     if (_petsc_partitioner && _petsc_partitioner->applyElementEeight())
     150             :     {
     151             :       // We should change partitioner interface to take const
     152             :       // But at this point let us keep API intact
     153       16800 :       _local_num_elems += _petsc_partitioner->computeElementWeight(const_cast<Elem &>(*elem));
     154             :     }
     155             :     else
     156       50854 :       _local_num_elems++;
     157             : 
     158             :     // Find out how many dofs there are on this element
     159       67654 :     if (_system == WorkBalance::ALL) // All systems
     160             :     {
     161        1500 :       auto n_sys = elem->n_systems();
     162        4500 :       for (decltype(n_sys) sys = 0; sys < n_sys; sys++)
     163             :       {
     164        3000 :         auto n_vars = elem->n_vars(sys);
     165             : 
     166        7500 :         for (decltype(n_vars) var = 0; var < n_vars; var++)
     167        4500 :           _local_num_dofs += elem->n_dofs(sys, var);
     168             :       }
     169             :     }
     170             :     else // Particular system
     171             :     {
     172       66154 :       auto n_vars = elem->n_vars(static_cast<unsigned int>(_system));
     173             : 
     174      133108 :       for (decltype(n_vars) var = 0; var < n_vars; var++)
     175       66954 :         _local_num_dofs += elem->n_dofs(static_cast<unsigned int>(_system), var);
     176             :     }
     177       67654 :   }
     178             : 
     179      317130 :   virtual void onInternalSide(const Elem * elem, unsigned int side) override
     180             :   {
     181      317130 :     if (elem->neighbor_ptr(side)->processor_id() != _this_pid)
     182             :     {
     183       18022 :       if (_petsc_partitioner && _petsc_partitioner->applySideWeight())
     184             :       {
     185             :         // We should change partitioner interface to take const
     186             :         // But at this point let us keep API intact
     187        2856 :         _local_num_partition_sides +=
     188        2856 :             _petsc_partitioner->computeSideWeight(const_cast<Elem &>(*elem), side);
     189             :       }
     190             :       else
     191       15166 :         _local_num_partition_sides++;
     192             : 
     193             :       // NOTE: we do not want to account for different coordinate systems here, so
     194             :       // using volume from libmesh elem is fine here
     195       18022 :       auto volume = _elem_side_builder(*elem, side).volume();
     196       18022 :       _local_partition_surface_area += volume;
     197             : 
     198       18022 :       if (_my_hardware_id != _rank_map.hardwareID(elem->neighbor_ptr(side)->processor_id()))
     199             :       {
     200           0 :         _local_num_partition_hardware_id_sides++;
     201           0 :         _local_partition_hardware_id_surface_area += volume;
     202             :       }
     203             :     }
     204      317130 :   }
     205             : 
     206          45 :   void join(const WBElementLoop & y)
     207             :   {
     208          45 :     _local_num_elems += y._local_num_elems;
     209          45 :     _local_num_dofs += y._local_num_dofs;
     210          45 :     _local_num_partition_sides += y._local_num_partition_sides;
     211          45 :     _local_partition_surface_area += y._local_partition_surface_area;
     212          45 :     _local_num_partition_hardware_id_sides += y._local_num_partition_hardware_id_sides;
     213          45 :     _local_partition_hardware_id_surface_area += y._local_partition_hardware_id_surface_area;
     214          45 :   }
     215             : 
     216             :   int _system;
     217             : 
     218             :   const RankMap & _rank_map;
     219             : 
     220             :   unsigned int _my_hardware_id;
     221             : 
     222             :   dof_id_type _local_num_elems;
     223             :   dof_id_type _local_num_dofs;
     224             :   dof_id_type _local_num_partition_sides;
     225             :   Real _local_partition_surface_area;
     226             :   dof_id_type _local_num_partition_hardware_id_sides;
     227             :   Real _local_partition_hardware_id_surface_area;
     228             : 
     229             :   processor_id_type _this_pid;
     230             : 
     231             :   libMesh::ElemSideBuilder _elem_side_builder;
     232             : 
     233             :   std::unique_ptr<PetscExternalPartitioner> _petsc_partitioner;
     234             : 
     235             : private:
     236      317130 :   bool shouldComputeInternalSide(const Elem & /*elem*/, const Elem & /*neighbor*/) const override
     237             :   {
     238      317130 :     return true;
     239             :   }
     240             : };
     241             : 
     242             : class WBNodeLoop : public ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>
     243             : {
     244             : public:
     245         321 :   WBNodeLoop(FEProblemBase & fe_problem, int system)
     246         321 :     : ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>(fe_problem),
     247         321 :       _system(system),
     248         321 :       _local_num_nodes(0),
     249         321 :       _local_num_dofs(0)
     250             :   {
     251         321 :   }
     252             : 
     253          45 :   WBNodeLoop(WBNodeLoop & x, Threads::split split)
     254          45 :     : ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>(x, split),
     255          45 :       _system(x._system),
     256          45 :       _local_num_nodes(0),
     257          45 :       _local_num_dofs(0)
     258             :   {
     259          45 :   }
     260             : 
     261       75655 :   virtual void onNode(ConstNodeRange::const_iterator & node_it)
     262             :   {
     263       75655 :     auto & node = *(*node_it);
     264             : 
     265       75655 :     _local_num_nodes++;
     266             : 
     267             :     // Find out how many dofs there are on this node
     268       75655 :     if (_system == WorkBalance::ALL) // All systems
     269             :     {
     270        1815 :       auto n_sys = node.n_systems();
     271        5445 :       for (decltype(n_sys) sys = 0; sys < n_sys; sys++)
     272             :       {
     273        3630 :         auto n_vars = node.n_vars(sys);
     274             : 
     275        9075 :         for (decltype(n_vars) var = 0; var < n_vars; var++)
     276        5445 :           _local_num_dofs += node.n_dofs(sys, var);
     277             :       }
     278             :     }
     279             :     else // Particular system
     280             :     {
     281       73840 :       auto n_vars = node.n_vars(static_cast<unsigned int>(_system));
     282             : 
     283      148648 :       for (decltype(n_vars) var = 0; var < n_vars; var++)
     284       74808 :         _local_num_dofs += node.n_dofs(static_cast<unsigned int>(_system), var);
     285             :     }
     286       75655 :   }
     287             : 
     288          45 :   void join(WBNodeLoop & y)
     289             :   {
     290          45 :     _local_num_nodes += y._local_num_nodes;
     291          45 :     _local_num_dofs += y._local_num_dofs;
     292          45 :   }
     293             : 
     294             :   int _system;
     295             : 
     296             :   dof_id_type _local_num_nodes;
     297             :   dof_id_type _local_num_dofs;
     298             : };
     299             : 
     300             : } // End of blank namespace
     301             : 
     302             : void
     303         321 : WorkBalance::execute()
     304             : {
     305         321 :   auto & mesh = _fe_problem.mesh();
     306             : 
     307             :   // Get all of the Elem info first
     308         321 :   WBElementLoop wb_el(mesh, _system, _rank_map);
     309             : 
     310         321 :   Threads::parallel_reduce(*mesh.getActiveLocalElementRange(), wb_el);
     311             : 
     312         321 :   _local_num_elems = wb_el._local_num_elems;
     313         321 :   _local_num_dofs = wb_el._local_num_dofs;
     314         321 :   _local_num_partition_sides = wb_el._local_num_partition_sides;
     315         321 :   _local_partition_surface_area = wb_el._local_partition_surface_area;
     316         321 :   _local_num_partition_hardware_id_sides = wb_el._local_num_partition_hardware_id_sides;
     317         321 :   _local_partition_hardware_id_surface_area = wb_el._local_partition_hardware_id_surface_area;
     318             : 
     319             :   // Now Node info
     320         321 :   WBNodeLoop wb_nl(_fe_problem, _system);
     321             : 
     322         321 :   Threads::parallel_reduce(*mesh.getLocalNodeRange(), wb_nl);
     323             : 
     324         321 :   _local_num_nodes = wb_nl._local_num_nodes;
     325         321 :   _local_num_dofs += wb_nl._local_num_dofs;
     326         321 : }
     327             : 
     328             : void
     329         987 : WorkBalance::gather(int balance_id, VectorPostprocessorValue & vppv)
     330             : {
     331         987 :   if (!_sync_to_all_procs)
     332             :   {
     333         840 :     switch (balance_id)
     334             :     {
     335         300 :       case 0: // num_elems
     336         300 :         _communicator.gather(0, static_cast<Real>(_local_num_elems), vppv);
     337         300 :         break;
     338          48 :       case 1: // num_nodes
     339          48 :         _communicator.gather(0, static_cast<Real>(_local_num_nodes), vppv);
     340          48 :         break;
     341          48 :       case 2: // num_dofs
     342          48 :         _communicator.gather(0, static_cast<Real>(_local_num_dofs), vppv);
     343          48 :         break;
     344         300 :       case 3: // num_partition_sides
     345         300 :         _communicator.gather(0, static_cast<Real>(_local_num_partition_sides), vppv);
     346         300 :         break;
     347          48 :       case 4: // partition_surface_area
     348          48 :         _communicator.gather(0, _local_partition_surface_area, vppv);
     349          48 :         break;
     350          48 :       case 5: // num_partition_hardware_id_sides
     351          48 :         _communicator.gather(0, static_cast<Real>(_local_num_partition_hardware_id_sides), vppv);
     352          48 :         break;
     353          48 :       case 6: // partition_hardware_id_surface_area
     354          48 :         _communicator.gather(0, _local_partition_hardware_id_surface_area, vppv);
     355          48 :         break;
     356           0 :       default:
     357           0 :         mooseError("Unknown balance type: ", balance_id);
     358             :     }
     359             :   }
     360             :   else
     361             :   {
     362         147 :     switch (balance_id)
     363             :     {
     364          21 :       case 0: // num_elems
     365          21 :         _communicator.allgather(static_cast<Real>(_local_num_elems), vppv);
     366          21 :         break;
     367          21 :       case 1: // num_nodes
     368          21 :         _communicator.allgather(static_cast<Real>(_local_num_nodes), vppv);
     369          21 :         break;
     370          21 :       case 2: // num_dofs
     371          21 :         _communicator.allgather(static_cast<Real>(_local_num_dofs), vppv);
     372          21 :         break;
     373          21 :       case 3: // num_partition_sides
     374          21 :         _communicator.allgather(static_cast<Real>(_local_num_partition_sides), vppv);
     375          21 :         break;
     376          21 :       case 4: // partition_surface_area
     377          21 :         _communicator.allgather(_local_partition_surface_area, vppv);
     378          21 :         break;
     379          21 :       case 5: // num_partition_hardware_id_sides
     380          21 :         _communicator.allgather(static_cast<Real>(_local_num_partition_hardware_id_sides), vppv);
     381          21 :         break;
     382          21 :       case 6: // partition_hardware_id_surface_area
     383          21 :         _communicator.allgather(_local_partition_hardware_id_surface_area, vppv);
     384          21 :         break;
     385           0 :       default:
     386           0 :         mooseError("Unknown balance type: ", balance_id);
     387             :     }
     388             :   }
     389         987 : }
     390             : 
     391             : void
     392         321 : WorkBalance::finalize()
     393             : {
     394        1308 :   for (auto & balance : _balances)
     395             :   {
     396         987 :     auto balance_id = balance.id();
     397             : 
     398         987 :     auto & balance_vector = *_balance_vectors.at(balance);
     399             : 
     400         987 :     gather(balance_id, balance_vector);
     401             :   }
     402             :   // Fill in the PID column - this just makes plotting easier
     403         321 :   _pid.resize(_communicator.size());
     404         321 :   std::iota(_pid.begin(), _pid.end(), 0);
     405         321 : }

Generated by: LCOV version 1.14