LCOV - code coverage report
Current view: top level - src/partitioning - parmetis_partitioner.C (source / functions) Hit Total Coverage
Test: libMesh/libmesh: #4475 (55045b) with base a68cc6 Lines: 153 158 96.8 %
Date: 2026-06-03 14:29:06 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // The libMesh Finite Element Library.
       2             : // Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
       3             : 
       4             : // This library is free software; you can redistribute it and/or
       5             : // modify it under the terms of the GNU Lesser General Public
       6             : // License as published by the Free Software Foundation; either
       7             : // version 2.1 of the License, or (at your option) any later version.
       8             : 
       9             : // This library is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             : // Lesser General Public License for more details.
      13             : 
      14             : // You should have received a copy of the GNU Lesser General Public
      15             : // License along with this library; if not, write to the Free Software
      16             : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      17             : 
      18             : 
      19             : 
      20             : // Local includes
      21             : #include "libmesh/parmetis_partitioner.h"
      22             : 
      23             : // libMesh includes
      24             : #include "libmesh/libmesh_config.h"
      25             : #include "libmesh/elem.h"
      26             : #include "libmesh/elem_range.h"
      27             : #include "libmesh/enum_partitioner_type.h"
      28             : #include "libmesh/libmesh_logging.h"
      29             : #include "libmesh/mesh_base.h"
      30             : #include "libmesh/mesh_serializer.h"
      31             : #include "libmesh/mesh_tools.h"
      32             : #include "libmesh/mesh_communication.h"
      33             : #include "libmesh/metis_partitioner.h"
      34             : #include "libmesh/parallel_only.h"
      35             : #include "libmesh/parmetis_helper.h"
      36             : #include "libmesh/utility.h"
      37             : 
      38             : // TIMPI includes
      39             : #include "timpi/communicator.h"    // also includes mpi.h
      40             : #include "timpi/parallel_implementation.h"    // for min()
      41             : 
      42             : // Include the ParMETIS header file.
      43             : #ifdef LIBMESH_HAVE_PARMETIS
      44             : 
      45             : // Before we include a header wrapped in a namespace, we'd better make
      46             : // sure none of its dependencies end up in that namespace
      47             : #include <mpi.h>
      48             : 
      49             : namespace Parmetis {
      50             : extern "C" {
      51             : #     include "libmesh/ignore_warnings.h"
      52             : #     include "parmetis.h"
      53             : #     include "libmesh/restore_warnings.h"
      54             : }
      55             : }
      56             : 
      57             : #endif
      58             : 
      59             : 
      60             : // C++ includes
      61             : #include <unordered_map>
      62             : 
      63             : 
      64             : namespace libMesh
      65             : {
      66             : 
      67             : // Minimum elements on each processor required for us to choose
      68             : // Parmetis over Metis.
      69             : #ifdef LIBMESH_HAVE_PARMETIS
      70             : const unsigned int MIN_ELEM_PER_PROC = 4;
      71             : #endif
      72             : 
      73             : // ------------------------------------------------------------
      74             : // ParmetisPartitioner implementation
      75      277558 : ParmetisPartitioner::ParmetisPartitioner()
      76             : #ifdef LIBMESH_HAVE_PARMETIS
      77      277826 :   :  _pmetis(std::make_unique<ParmetisHelper>())
      78             : #endif
      79      277558 : {}
      80             : 
      81             : 
      82             : 
      83       19326 : ParmetisPartitioner::ParmetisPartitioner (const ParmetisPartitioner & other)
      84             :   : Partitioner(other)
      85             : #ifdef LIBMESH_HAVE_PARMETIS
      86       19326 :   , _pmetis(std::make_unique<ParmetisHelper>(*(other._pmetis)))
      87             : #endif
      88             : {
      89       19326 : }
      90             : 
      91             : 
      92             : 
      93      591882 : ParmetisPartitioner::~ParmetisPartitioner() = default;
      94             : 
      95             : 
      96             : 
      97         284 : PartitionerType ParmetisPartitioner::type() const
      98             : {
      99         284 :   return PARMETIS_PARTITIONER;
     100             : }
     101             : 
     102             : 
     103             : 
     104      259136 : void ParmetisPartitioner::_do_partition (MeshBase & mesh,
     105             :                                          const unsigned int n_sbdmns)
     106             : {
     107      259136 :   this->_do_repartition (mesh, n_sbdmns);
     108      259136 : }
     109             : 
     110             : 
     111             : 
     112      259136 : void ParmetisPartitioner::_do_repartition (MeshBase & mesh,
     113             :                                            const unsigned int n_sbdmns)
     114             : {
     115             :   // This function must be run on all processors at once
     116         376 :   libmesh_parallel_only(mesh.comm());
     117             : 
     118             :   // Check for easy returns
     119      259136 :   if (!mesh.n_elem())
     120      210155 :     return;
     121             : 
     122      258994 :   if (n_sbdmns == 1)
     123             :     {
     124           0 :       this->single_partition(mesh);
     125           0 :       return;
     126             :     }
     127             : 
     128         372 :   libmesh_assert_greater (n_sbdmns, 0);
     129             : 
     130             :   // What to do if the Parmetis library IS NOT present
     131             : #ifndef LIBMESH_HAVE_PARMETIS
     132             : 
     133             :   libmesh_do_once(
     134             :   libMesh::out << "ERROR: The library has been built without" << std::endl
     135             :                << "Parmetis support.  Using a Metis"          << std::endl
     136             :                << "partitioner instead!"                      << std::endl;);
     137             : 
     138             :   MetisPartitioner mp;
     139             : 
     140             :   // Metis and other fallbacks only work in serial, and need to get
     141             :   // handed element ranges from an already-serialized mesh.
     142             :   mesh.allgather();
     143             : 
     144             :   // Don't just call partition() here; that would end up calling
     145             :   // post-element-partitioning work redundantly (and at the moment
     146             :   // incorrectly)
     147             :   mp.partition_range (mesh, mesh.active_elements_begin(),
     148             :                       mesh.active_elements_end(), n_sbdmns);
     149             : 
     150             :   // What to do if the Parmetis library IS present
     151             : #else
     152             : 
     153             :   // Revert to METIS on one processor.
     154      259366 :   if (mesh.n_processors() == 1)
     155             :     {
     156             :       // Make sure the mesh knows it's serial
     157           4 :       mesh.allgather();
     158             : 
     159           0 :       MetisPartitioner mp;
     160             :       // Don't just call partition() here; that would end up calling
     161             :       // post-element-partitioning work redundantly (and at the moment
     162             :       // incorrectly)
     163           8 :       mp.partition_range (mesh, mesh.active_elements_begin(),
     164           8 :                           mesh.active_elements_end(), n_sbdmns);
     165           0 :       return;
     166             :     }
     167             : 
     168         372 :   LOG_SCOPE("repartition()", "ParmetisPartitioner");
     169             : 
     170             :   // Initialize the data structures required by ParMETIS
     171      258990 :   this->initialize (mesh, n_sbdmns);
     172             : 
     173             :   // build the graph corresponding to the mesh
     174      258990 :   this->build_graph (mesh);
     175             : 
     176             :   // Make sure all processors have enough active local elements and
     177             :   // enough connectivity among them.
     178             :   // Parmetis tends to die when it's given only a couple elements
     179             :   // per partition or when it can't reach elements from each other.
     180             :   {
     181         372 :     bool ready_for_parmetis = true;
     182     3040452 :     for (const auto & nelem : _n_active_elem_on_proc)
     183     2781462 :       if (nelem < MIN_ELEM_PER_PROC)
     184         154 :         ready_for_parmetis = false;
     185             : 
     186      258990 :     std::size_t my_adjacency = _pmetis->adjncy.size();
     187      258990 :     mesh.comm().min(my_adjacency);
     188      258990 :     if (!my_adjacency)
     189          14 :       ready_for_parmetis = false;
     190             : 
     191             :     // Parmetis will not work unless each processor has some
     192             :     // elements. Specifically, it will abort when passed a nullptr
     193             :     // partition or adjacency array on *any* of the processors.
     194      107285 :     if (!ready_for_parmetis)
     195             :       {
     196             :         // FIXME: revert to METIS, although this requires a serial mesh
     197      210181 :         MeshSerializer serialize(mesh);
     198          86 :         MetisPartitioner mp;
     199      210009 :         mp.partition (mesh, n_sbdmns);
     200          86 :         return;
     201      209837 :       }
     202             :   }
     203             : 
     204             : 
     205             :   // Partition the graph
     206       49839 :   std::vector<Parmetis::idx_t> vsize(_pmetis->vwgt.size(), 1);
     207       48981 :   Parmetis::real_t itr = 1000000.0;
     208       48981 :   MPI_Comm mpi_comm = mesh.comm().get();
     209             : 
     210             :   // Call the ParMETIS adaptive repartitioning method.  This respects the
     211             :   // original partitioning when computing the new partitioning so as to
     212             :   // minimize the required data redistribution.
     213      441401 :   Parmetis::ParMETIS_V3_AdaptiveRepart(_pmetis->vtxdist.empty() ? nullptr : _pmetis->vtxdist.data(),
     214         572 :                                        _pmetis->xadj.empty()    ? nullptr : _pmetis->xadj.data(),
     215         572 :                                        _pmetis->adjncy.empty()  ? nullptr : _pmetis->adjncy.data(),
     216         572 :                                        _pmetis->vwgt.empty()    ? nullptr : _pmetis->vwgt.data(),
     217         572 :                                        vsize.empty()            ? nullptr : vsize.data(),
     218             :                                        nullptr,
     219         286 :                                        &_pmetis->wgtflag,
     220         286 :                                        &_pmetis->numflag,
     221         286 :                                        &_pmetis->ncon,
     222         286 :                                        &_pmetis->nparts,
     223         572 :                                        _pmetis->tpwgts.empty()  ? nullptr : _pmetis->tpwgts.data(),
     224         572 :                                        _pmetis->ubvec.empty()   ? nullptr : _pmetis->ubvec.data(),
     225             :                                        &itr,
     226         286 :                                        _pmetis->options.data(),
     227         286 :                                        &_pmetis->edgecut,
     228         572 :                                        _pmetis->part.empty()    ? nullptr : reinterpret_cast<Parmetis::idx_t *>(_pmetis->part.data()),
     229             :                                        &mpi_comm);
     230             : 
     231             :   // Assign the returned processor ids
     232       49267 :   this->assign_partitioning (mesh, _pmetis->part);
     233             : 
     234             : #endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ...
     235             : 
     236             : }
     237             : 
     238             : 
     239             : 
     240             : // Only need to compile these methods if ParMETIS is present
     241             : #ifdef LIBMESH_HAVE_PARMETIS
     242             : 
     243      258990 : void ParmetisPartitioner::initialize (const MeshBase & mesh,
     244             :                                       const unsigned int n_sbdmns)
     245             : {
     246         744 :   LOG_SCOPE("initialize()", "ParmetisPartitioner");
     247             : 
     248         372 :   const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
     249             :   // Set parameters.
     250      258990 :   _pmetis->wgtflag = 2;                                      // weights on vertices only
     251      258990 :   _pmetis->ncon    = 1;                                      // one weight per vertex
     252      258990 :   _pmetis->numflag = 0;                                      // C-style 0-based numbering
     253      258990 :   _pmetis->nparts  = static_cast<Parmetis::idx_t>(n_sbdmns); // number of subdomains to create
     254      258990 :   _pmetis->edgecut = 0;                                      // the numbers of edges cut by the
     255             :                                                              // partition
     256             : 
     257             :   // Initialize data structures for ParMETIS
     258      259362 :   _pmetis->vtxdist.assign (mesh.n_processors()+1, 0);
     259      258990 :   _pmetis->tpwgts.assign  (_pmetis->nparts, 1./_pmetis->nparts);
     260      258990 :   _pmetis->ubvec.assign   (_pmetis->ncon, 1.05);
     261      258990 :   _pmetis->part.assign    (n_active_local_elem, 0);
     262      258990 :   _pmetis->options.resize (5);
     263      258990 :   _pmetis->vwgt.resize    (n_active_local_elem);
     264             : 
     265             :   // Set the options
     266      258990 :   _pmetis->options[0] = 1;  // don't use default options
     267      258990 :   _pmetis->options[1] = 0;  // default (level of timing)
     268      258990 :   _pmetis->options[2] = 15; // random seed (default)
     269      258990 :   _pmetis->options[3] = 2;  // processor distribution and subdomain distribution are decoupled
     270             : 
     271             :   // ParMetis expects the elements to be numbered in contiguous blocks
     272             :   // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ...
     273             :   // Since we only partition active elements we should have no expectation
     274             :   // that we currently have such a distribution.  So we need to create it.
     275             :   // Also, at the same time we are going to map all the active elements into a globally
     276             :   // unique range [0,n_active_elem) which is *independent* of the current partitioning.
     277             :   // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled
     278             :   // from the partitioning of the objects themselves).  This allows us to get the same
     279             :   // resultant partitioning independent of the input partitioning.
     280             :   libMesh::BoundingBox bbox =
     281      258990 :     MeshTools::create_bounding_box(mesh);
     282             : 
     283      258990 :   _find_global_index_by_pid_map(mesh);
     284             : 
     285             : 
     286             :   // count the total number of active elements in the mesh.  Note we cannot
     287             :   // use mesh.n_active_elem() in general since this only returns the number
     288             :   // of active elements which are stored on the calling processor.
     289             :   // We should not use n_active_elem for any allocation because that will
     290             :   // be inherently unscalable, but it can be useful for libmesh_assertions.
     291         372 :   dof_id_type n_active_elem=0;
     292             : 
     293             :   // Set up the vtxdist array.  This will be the same on each processor.
     294             :   // ***** Consult the Parmetis documentation. *****
     295         372 :   libmesh_assert_equal_to (_pmetis->vtxdist.size(),
     296             :                            cast_int<std::size_t>(mesh.n_processors()+1));
     297         372 :   libmesh_assert_equal_to (_pmetis->vtxdist[0], 0);
     298             : 
     299     3040452 :   for (auto pid : make_range(mesh.n_processors()))
     300             :     {
     301     2782950 :       _pmetis->vtxdist[pid+1] = _pmetis->vtxdist[pid] + _n_active_elem_on_proc[pid];
     302     2781462 :       n_active_elem += _n_active_elem_on_proc[pid];
     303             :     }
     304         372 :   libmesh_assert_equal_to (_pmetis->vtxdist.back(), static_cast<Parmetis::idx_t>(n_active_elem));
     305             : 
     306             : 
     307             :   // Maps active element ids into a contiguous range independent of partitioning.
     308             :   // (only needs local scope)
     309         744 :   std::unordered_map<dof_id_type, dof_id_type> global_index_map;
     310             : 
     311             :   {
     312         744 :     std::vector<dof_id_type> global_index;
     313             : 
     314             :     // create the unique mapping for all active elements independent of partitioning
     315             :     {
     316      259362 :       MeshBase::const_element_iterator       it  = mesh.active_elements_begin();
     317      259362 :       const MeshBase::const_element_iterator end = mesh.active_elements_end();
     318             : 
     319             :       // Calling this on all processors a unique range in [0,n_active_elem) is constructed.
     320             :       // Only the indices for the elements we pass in are returned in the array.
     321      258990 :       MeshCommunication().find_global_indices (mesh.comm(),
     322             :                                                bbox, it, end,
     323             :                                                global_index);
     324             : 
     325    16809518 :       for (dof_id_type cnt=0; it != end; ++it)
     326             :         {
     327    16292282 :           const Elem * elem = *it;
     328             :           // vectormap::count forces a sort, which is too expensive
     329             :           // in a loop
     330             :           // libmesh_assert (!global_index_map.count(elem->id()));
     331       19684 :           libmesh_assert_less (cnt, global_index.size());
     332       19684 :           libmesh_assert_less (global_index[cnt], n_active_elem);
     333             : 
     334    16311978 :           global_index_map.emplace(elem->id(), global_index[cnt++]);
     335             :         }
     336             :     }
     337             :     // really, shouldn't be close!
     338         372 :     libmesh_assert_less_equal (global_index_map.size(), n_active_elem);
     339         372 :     libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem);
     340             : 
     341             :     // At this point the two maps should be the same size.  If they are not
     342             :     // then the number of active elements is not the same as the sum over all
     343             :     // processors of the number of active elements per processor, which means
     344             :     // there must be some unpartitioned objects out there.
     345      258990 :     libmesh_error_msg_if(global_index_map.size() != _global_index_by_pid_map.size(),
     346             :                          "ERROR:  ParmetisPartitioner cannot handle unpartitioned objects!");
     347             :   }
     348             : 
     349             :   // Finally, we need to initialize the vertex (partition) weights and the initial subdomain
     350             :   // mapping.  The subdomain mapping will be independent of the processor mapping, and is
     351             :   // defined by a simple mapping of the global indices we just found.
     352             :   {
     353      259362 :     std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
     354             : 
     355      259734 :     const dof_id_type first_local_elem = _pmetis->vtxdist[mesh.processor_id()];
     356             : 
     357     3040452 :     for (auto pid : make_range(mesh.n_processors()))
     358             :       {
     359         744 :         dof_id_type tgt_subdomain_size = 0;
     360             : 
     361             :         // watch out for the case that n_subdomains < n_processors
     362     2781462 :         if (pid < static_cast<unsigned int>(_pmetis->nparts))
     363             :           {
     364     1792140 :             tgt_subdomain_size = n_active_elem/std::min
     365     1792140 :               (cast_int<Parmetis::idx_t>(mesh.n_processors()), _pmetis->nparts);
     366             : 
     367     1791396 :             if (pid < n_active_elem%_pmetis->nparts)
     368      619849 :               tgt_subdomain_size++;
     369             :           }
     370     2781462 :         if (pid == 0)
     371      258990 :           subdomain_bounds[0] = tgt_subdomain_size;
     372             :         else
     373     2523216 :           subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
     374             :       }
     375             : 
     376         372 :     libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem);
     377             : 
     378             :     Threads::parallel_for
     379      258990 :       (mesh.active_local_element_stored_range(),
     380      775090 :        [first_local_elem, n_active_elem, n_active_local_elem, this,
     381      167370 :         &global_index_map, &subdomain_bounds](const ConstElemRange & range)
     382             :        {
     383     2940350 :          for (const Elem * elem : range)
     384             :            {
     385             :              const dof_id_type global_index_by_pid =
     386     2680304 :                libmesh_map_find(_global_index_by_pid_map, elem->id());
     387       11779 :              libmesh_assert_less (global_index_by_pid, n_active_elem);
     388             : 
     389     2680304 :              const dof_id_type local_index =
     390       23558 :                global_index_by_pid - first_local_elem;
     391             : 
     392       11779 :              libmesh_assert_less (local_index, n_active_local_elem);
     393       11779 :              libmesh_assert_less (local_index, _pmetis->vwgt.size());
     394             : 
     395       11779 :              libmesh_ignore(n_active_elem); // unused outside dbg/devel
     396       11779 :              libmesh_ignore(n_active_local_elem); // unused outside dbg/devel
     397             : 
     398             :              // Spline nodes are a special case (storing all the
     399             :              // unconstrained DoFs in an IGA simulation), but in general
     400             :              // we'll try to distribute work by expecting it to be roughly
     401             :              // proportional to DoFs, which are roughly proportional to
     402             :              // nodes.
     403     2680304 :              if (elem->type() == NODEELEM &&
     404           0 :                  elem->mapping_type() == RATIONAL_BERNSTEIN_MAP)
     405       12789 :                _pmetis->vwgt[local_index] = 50;
     406             :              else
     407     2667515 :                _pmetis->vwgt[local_index] = elem->n_nodes();
     408             : 
     409             :              // find the subdomain this element belongs in
     410             :              const dof_id_type global_index =
     411     2680304 :                libmesh_map_find(global_index_map, elem->id());
     412             : 
     413       11779 :              libmesh_assert_less (global_index, subdomain_bounds.back());
     414             : 
     415             :              const unsigned int subdomain_id =
     416             :                cast_int<unsigned int>
     417     2668519 :                (std::distance(subdomain_bounds.begin(),
     418             :                               std::lower_bound(subdomain_bounds.begin(),
     419             :                                                subdomain_bounds.end(),
     420       11779 :                                                global_index)));
     421       11779 :              libmesh_assert_less (subdomain_id, _pmetis->nparts);
     422       11779 :              libmesh_assert_less (local_index, _pmetis->part.size());
     423             : 
     424     2692089 :              _pmetis->part[local_index] = subdomain_id;
     425             :            }
     426      260046 :        });
     427             :   }
     428      258990 : }
     429             : 
     430             : 
     431             : 
     432      258990 : void ParmetisPartitioner::build_graph (const MeshBase & mesh)
     433             : {
     434         744 :   LOG_SCOPE("build_graph()", "ParmetisPartitioner");
     435             : 
     436             :   // build the graph in distributed CSR format.  Note that
     437             :   // the edges in the graph will correspond to
     438             :   // face neighbors
     439         372 :   const dof_id_type n_active_local_elem  = mesh.n_active_local_elem();
     440             : 
     441      258990 :   Partitioner::build_graph(mesh);
     442             : 
     443         372 :   dof_id_type graph_size=0;
     444             : 
     445     2939294 :   for (auto & row: _dual_graph)
     446     2692089 :    graph_size += cast_int<dof_id_type>(row.size());
     447             : 
     448             :   // Reserve space in the adjacency array
     449      258990 :   _pmetis->xadj.clear();
     450      258990 :   _pmetis->xadj.reserve (n_active_local_elem + 1);
     451      258990 :   _pmetis->adjncy.clear();
     452      258990 :   _pmetis->adjncy.reserve (graph_size);
     453             : 
     454     2939294 :   for (auto & graph_row : _dual_graph)
     455             :     {
     456     2692089 :       _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
     457     2680298 :       _pmetis->adjncy.insert(_pmetis->adjncy.end(),
     458             :                              graph_row.begin(),
     459       47128 :                              graph_row.end());
     460             :     }
     461             : 
     462             :   // The end of the adjacency array for the last elem
     463      259362 :   _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
     464             : 
     465         372 :   libmesh_assert_equal_to (_pmetis->xadj.size(), n_active_local_elem+1);
     466         372 :   libmesh_assert_equal_to (_pmetis->adjncy.size(), graph_size);
     467      258990 : }
     468             : 
     469             : #endif // #ifdef LIBMESH_HAVE_PARMETIS
     470             : 
     471             : } // namespace libMesh

Generated by: LCOV version 1.14