LCOV - code coverage report
Current view: top level - src/partitioning - parmetis_partitioner.C (source / functions) Hit Total Coverage
Test: libMesh/libmesh: #4232 (290bfc) with base 82cc40 Lines: 151 155 97.4 %
Date: 2025-08-27 15:46:53 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // The libMesh Finite Element Library.
       2             : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
       3             : 
       4             : // This library is free software; you can redistribute it and/or
       5             : // modify it under the terms of the GNU Lesser General Public
       6             : // License as published by the Free Software Foundation; either
       7             : // version 2.1 of the License, or (at your option) any later version.
       8             : 
       9             : // This library is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             : // Lesser General Public License for more details.
      13             : 
      14             : // You should have received a copy of the GNU Lesser General Public
      15             : // License along with this library; if not, write to the Free Software
      16             : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      17             : 
      18             : 
      19             : 
      20             : // Local includes
      21             : #include "libmesh/parmetis_partitioner.h"
      22             : 
      23             : // libMesh includes
      24             : #include "libmesh/libmesh_config.h"
      25             : #include "libmesh/elem.h"
      26             : #include "libmesh/enum_partitioner_type.h"
      27             : #include "libmesh/libmesh_logging.h"
      28             : #include "libmesh/mesh_base.h"
      29             : #include "libmesh/mesh_serializer.h"
      30             : #include "libmesh/mesh_tools.h"
      31             : #include "libmesh/mesh_communication.h"
      32             : #include "libmesh/metis_partitioner.h"
      33             : #include "libmesh/parallel_only.h"
      34             : #include "libmesh/parmetis_helper.h"
      35             : 
      36             : // TIMPI includes
      37             : #include "timpi/communicator.h"    // also includes mpi.h
      38             : #include "timpi/parallel_implementation.h"    // for min()
      39             : 
      40             : // Include the ParMETIS header file.
      41             : #ifdef LIBMESH_HAVE_PARMETIS
      42             : 
      43             : // Before we include a header wrapped in a namespace, we'd better make
      44             : // sure none of its dependencies end up in that namespace
      45             : #include <mpi.h>
      46             : 
      47             : namespace Parmetis {
      48             : extern "C" {
      49             : #     include "libmesh/ignore_warnings.h"
      50             : #     include "parmetis.h"
      51             : #     include "libmesh/restore_warnings.h"
      52             : }
      53             : }
      54             : 
      55             : #endif
      56             : 
      57             : 
      58             : // C++ includes
      59             : #include <unordered_map>
      60             : 
      61             : 
      62             : namespace libMesh
      63             : {
      64             : 
      65             : // Minimum elements on each processor required for us to choose
      66             : // Parmetis over Metis.
      67             : #ifdef LIBMESH_HAVE_PARMETIS
      68             : const unsigned int MIN_ELEM_PER_PROC = 4;
      69             : #endif
      70             : 
      71             : // ------------------------------------------------------------
      72             : // ParmetisPartitioner implementation
      73      254277 : ParmetisPartitioner::ParmetisPartitioner()
      74             : #ifdef LIBMESH_HAVE_PARMETIS
      75      254531 :   :  _pmetis(std::make_unique<ParmetisHelper>())
      76             : #endif
      77      254277 : {}
      78             : 
      79             : 
      80             : 
      81       16674 : ParmetisPartitioner::ParmetisPartitioner (const ParmetisPartitioner & other)
      82             :   : Partitioner(other)
      83             : #ifdef LIBMESH_HAVE_PARMETIS
      84       16674 :   , _pmetis(std::make_unique<ParmetisHelper>(*(other._pmetis)))
      85             : #endif
      86             : {
      87       16674 : }
      88             : 
      89             : 
      90             : 
      91      540104 : ParmetisPartitioner::~ParmetisPartitioner() = default;
      92             : 
      93             : 
      94             : 
      95         284 : PartitionerType ParmetisPartitioner::type() const
      96             : {
      97         284 :   return PARMETIS_PARTITIONER;
      98             : }
      99             : 
     100             : 
     101             : 
     102      230450 : void ParmetisPartitioner::_do_partition (MeshBase & mesh,
     103             :                                          const unsigned int n_sbdmns)
     104             : {
     105      230450 :   this->_do_repartition (mesh, n_sbdmns);
     106      230450 : }
     107             : 
     108             : 
     109             : 
     110      230450 : void ParmetisPartitioner::_do_repartition (MeshBase & mesh,
     111             :                                            const unsigned int n_sbdmns)
     112             : {
     113             :   // This function must be run on all processors at once
     114         348 :   libmesh_parallel_only(mesh.comm());
     115             : 
     116             :   // Check for easy returns
     117      230450 :   if (!mesh.n_elem())
     118      187634 :     return;
     119             : 
     120      230308 :   if (n_sbdmns == 1)
     121             :     {
     122           0 :       this->single_partition(mesh);
     123           0 :       return;
     124             :     }
     125             : 
     126         344 :   libmesh_assert_greater (n_sbdmns, 0);
     127             : 
     128             :   // What to do if the Parmetis library IS NOT present
     129             : #ifndef LIBMESH_HAVE_PARMETIS
     130             : 
     131             :   libmesh_do_once(
     132             :   libMesh::out << "ERROR: The library has been built without" << std::endl
     133             :                << "Parmetis support.  Using a Metis"          << std::endl
     134             :                << "partitioner instead!"                      << std::endl;);
     135             : 
     136             :   MetisPartitioner mp;
     137             : 
     138             :   // Metis and other fallbacks only work in serial, and need to get
     139             :   // handed element ranges from an already-serialized mesh.
     140             :   mesh.allgather();
     141             : 
     142             :   // Don't just call partition() here; that would end up calling
     143             :   // post-element-partitioning work redundantly (and at the moment
     144             :   // incorrectly)
     145             :   mp.partition_range (mesh, mesh.active_elements_begin(),
     146             :                       mesh.active_elements_end(), n_sbdmns);
     147             : 
     148             :   // What to do if the Parmetis library IS present
     149             : #else
     150             : 
     151             :   // Revert to METIS on one processor.
     152      230652 :   if (mesh.n_processors() == 1)
     153             :     {
     154             :       // Make sure the mesh knows it's serial
     155           4 :       mesh.allgather();
     156             : 
     157           0 :       MetisPartitioner mp;
     158             :       // Don't just call partition() here; that would end up calling
     159             :       // post-element-partitioning work redundantly (and at the moment
     160             :       // incorrectly)
     161           8 :       mp.partition_range (mesh, mesh.active_elements_begin(),
     162           8 :                           mesh.active_elements_end(), n_sbdmns);
     163           0 :       return;
     164             :     }
     165             : 
     166         344 :   LOG_SCOPE("repartition()", "ParmetisPartitioner");
     167             : 
     168             :   // Initialize the data structures required by ParMETIS
     169      230304 :   this->initialize (mesh, n_sbdmns);
     170             : 
     171             :   // build the graph corresponding to the mesh
     172      230304 :   this->build_graph (mesh);
     173             : 
     174             :   // Make sure all processors have enough active local elements and
     175             :   // enough connectivity among them.
     176             :   // Parmetis tends to die when it's given only a couple elements
     177             :   // per partition or when it can't reach elements from each other.
     178             :   {
     179         344 :     bool ready_for_parmetis = true;
     180     2703332 :     for (const auto & nelem : _n_active_elem_on_proc)
     181     2473028 :       if (nelem < MIN_ELEM_PER_PROC)
     182         138 :         ready_for_parmetis = false;
     183             : 
     184      230304 :     std::size_t my_adjacency = _pmetis->adjncy.size();
     185      230304 :     mesh.comm().min(my_adjacency);
     186      230304 :     if (!my_adjacency)
     187          12 :       ready_for_parmetis = false;
     188             : 
     189             :     // Parmetis will not work unless each processor has some
     190             :     // elements. Specifically, it will abort when passed a nullptr
     191             :     // partition or adjacency array on *any* of the processors.
     192       96362 :     if (!ready_for_parmetis)
     193             :       {
     194             :         // FIXME: revert to METIS, although this requires a serial mesh
     195      187644 :         MeshSerializer serialize(mesh);
     196          78 :         MetisPartitioner mp;
     197      187488 :         mp.partition (mesh, n_sbdmns);
     198          78 :         return;
     199      187332 :       }
     200             :   }
     201             : 
     202             : 
     203             :   // Partition the graph
     204       43614 :   std::vector<Parmetis::idx_t> vsize(_pmetis->vwgt.size(), 1);
     205       42816 :   Parmetis::real_t itr = 1000000.0;
     206       42816 :   MPI_Comm mpi_comm = mesh.comm().get();
     207             : 
     208             :   // Call the ParMETIS adaptive repartitioning method.  This respects the
     209             :   // original partitioning when computing the new partitioning so as to
     210             :   // minimize the required data redistribution.
     211      385876 :   Parmetis::ParMETIS_V3_AdaptiveRepart(_pmetis->vtxdist.empty() ? nullptr : _pmetis->vtxdist.data(),
     212         532 :                                        _pmetis->xadj.empty()    ? nullptr : _pmetis->xadj.data(),
     213         532 :                                        _pmetis->adjncy.empty()  ? nullptr : _pmetis->adjncy.data(),
     214         532 :                                        _pmetis->vwgt.empty()    ? nullptr : _pmetis->vwgt.data(),
     215         532 :                                        vsize.empty()            ? nullptr : vsize.data(),
     216             :                                        nullptr,
     217         266 :                                        &_pmetis->wgtflag,
     218         266 :                                        &_pmetis->numflag,
     219         266 :                                        &_pmetis->ncon,
     220         266 :                                        &_pmetis->nparts,
     221         532 :                                        _pmetis->tpwgts.empty()  ? nullptr : _pmetis->tpwgts.data(),
     222         532 :                                        _pmetis->ubvec.empty()   ? nullptr : _pmetis->ubvec.data(),
     223             :                                        &itr,
     224         266 :                                        _pmetis->options.data(),
     225         266 :                                        &_pmetis->edgecut,
     226         532 :                                        _pmetis->part.empty()    ? nullptr : reinterpret_cast<Parmetis::idx_t *>(_pmetis->part.data()),
     227             :                                        &mpi_comm);
     228             : 
     229             :   // Assign the returned processor ids
     230       43082 :   this->assign_partitioning (mesh, _pmetis->part);
     231             : 
     232             : #endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ...
     233             : 
     234             : }
     235             : 
     236             : 
     237             : 
     238             : // Only need to compile these methods if ParMETIS is present
     239             : #ifdef LIBMESH_HAVE_PARMETIS
     240             : 
     241      230304 : void ParmetisPartitioner::initialize (const MeshBase & mesh,
     242             :                                       const unsigned int n_sbdmns)
     243             : {
     244         688 :   LOG_SCOPE("initialize()", "ParmetisPartitioner");
     245             : 
     246         344 :   const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
     247             :   // Set parameters.
     248      230304 :   _pmetis->wgtflag = 2;                                      // weights on vertices only
     249      230304 :   _pmetis->ncon    = 1;                                      // one weight per vertex
     250      230304 :   _pmetis->numflag = 0;                                      // C-style 0-based numbering
     251      230304 :   _pmetis->nparts  = static_cast<Parmetis::idx_t>(n_sbdmns); // number of subdomains to create
     252      230304 :   _pmetis->edgecut = 0;                                      // the numbers of edges cut by the
     253             :                                                              // partition
     254             : 
     255             :   // Initialize data structures for ParMETIS
     256      230648 :   _pmetis->vtxdist.assign (mesh.n_processors()+1, 0);
     257      230304 :   _pmetis->tpwgts.assign  (_pmetis->nparts, 1./_pmetis->nparts);
     258      230304 :   _pmetis->ubvec.assign   (_pmetis->ncon, 1.05);
     259      230304 :   _pmetis->part.assign    (n_active_local_elem, 0);
     260      230304 :   _pmetis->options.resize (5);
     261      230304 :   _pmetis->vwgt.resize    (n_active_local_elem);
     262             : 
     263             :   // Set the options
     264      230304 :   _pmetis->options[0] = 1;  // don't use default options
     265      230304 :   _pmetis->options[1] = 0;  // default (level of timing)
     266      230304 :   _pmetis->options[2] = 15; // random seed (default)
     267      230304 :   _pmetis->options[3] = 2;  // processor distribution and subdomain distribution are decoupled
     268             : 
     269             :   // ParMetis expects the elements to be numbered in contiguous blocks
     270             :   // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ...
     271             :   // Since we only partition active elements we should have no expectation
     272             :   // that we currently have such a distribution.  So we need to create it.
     273             :   // Also, at the same time we are going to map all the active elements into a globally
     274             :   // unique range [0,n_active_elem) which is *independent* of the current partitioning.
     275             :   // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled
     276             :   // from the partitioning of the objects themselves).  This allows us to get the same
     277             :   // resultant partitioning independent of the input partitioning.
     278             :   libMesh::BoundingBox bbox =
     279      230304 :     MeshTools::create_bounding_box(mesh);
     280             : 
     281      230304 :   _find_global_index_by_pid_map(mesh);
     282             : 
     283             : 
     284             :   // count the total number of active elements in the mesh.  Note we cannot
     285             :   // use mesh.n_active_elem() in general since this only returns the number
     286             :   // of active elements which are stored on the calling processor.
     287             :   // We should not use n_active_elem for any allocation because that will
     288             :   // be inherently unscalable, but it can be useful for libmesh_assertions.
     289         344 :   dof_id_type n_active_elem=0;
     290             : 
     291             :   // Set up the vtxdist array.  This will be the same on each processor.
     292             :   // ***** Consult the Parmetis documentation. *****
     293         344 :   libmesh_assert_equal_to (_pmetis->vtxdist.size(),
     294             :                            cast_int<std::size_t>(mesh.n_processors()+1));
     295         344 :   libmesh_assert_equal_to (_pmetis->vtxdist[0], 0);
     296             : 
     297     2703332 :   for (auto pid : make_range(mesh.n_processors()))
     298             :     {
     299     2474404 :       _pmetis->vtxdist[pid+1] = _pmetis->vtxdist[pid] + _n_active_elem_on_proc[pid];
     300     2473028 :       n_active_elem += _n_active_elem_on_proc[pid];
     301             :     }
     302         344 :   libmesh_assert_equal_to (_pmetis->vtxdist.back(), static_cast<Parmetis::idx_t>(n_active_elem));
     303             : 
     304             : 
     305             :   // Maps active element ids into a contiguous range independent of partitioning.
     306             :   // (only needs local scope)
     307         688 :   std::unordered_map<dof_id_type, dof_id_type> global_index_map;
     308             : 
     309             :   {
     310         688 :     std::vector<dof_id_type> global_index;
     311             : 
     312             :     // create the unique mapping for all active elements independent of partitioning
     313             :     {
     314      230648 :       MeshBase::const_element_iterator       it  = mesh.active_elements_begin();
     315      230648 :       const MeshBase::const_element_iterator end = mesh.active_elements_end();
     316             : 
     317             :       // Calling this on all processors a unique range in [0,n_active_elem) is constructed.
     318             :       // Only the indices for the elements we pass in are returned in the array.
     319      230304 :       MeshCommunication().find_global_indices (mesh.comm(),
     320             :                                                bbox, it, end,
     321             :                                                global_index);
     322             : 
     323    15583203 :       for (dof_id_type cnt=0; it != end; ++it)
     324             :         {
     325    15123283 :           const Elem * elem = *it;
     326             :           // vectormap::count forces a sort, which is too expensive
     327             :           // in a loop
     328             :           // libmesh_assert (!global_index_map.count(elem->id()));
     329       17840 :           libmesh_assert_less (cnt, global_index.size());
     330       17840 :           libmesh_assert_less (global_index[cnt], n_active_elem);
     331             : 
     332    15141135 :           global_index_map.emplace(elem->id(), global_index[cnt++]);
     333             :         }
     334             :     }
     335             :     // really, shouldn't be close!
     336         344 :     libmesh_assert_less_equal (global_index_map.size(), n_active_elem);
     337         344 :     libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem);
     338             : 
     339             :     // At this point the two maps should be the same size.  If they are not
     340             :     // then the number of active elements is not the same as the sum over all
     341             :     // processors of the number of active elements per processor, which means
     342             :     // there must be some unpartitioned objects out there.
     343      230304 :     libmesh_error_msg_if(global_index_map.size() != _global_index_by_pid_map.size(),
     344             :                          "ERROR:  ParmetisPartitioner cannot handle unpartitioned objects!");
     345             :   }
     346             : 
     347             :   // Finally, we need to initialize the vertex (partition) weights and the initial subdomain
     348             :   // mapping.  The subdomain mapping will be independent of the processor mapping, and is
     349             :   // defined by a simple mapping of the global indices we just found.
     350             :   {
     351      230992 :     std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
     352             : 
     353      230992 :     const dof_id_type first_local_elem = _pmetis->vtxdist[mesh.processor_id()];
     354             : 
     355     2703332 :     for (auto pid : make_range(mesh.n_processors()))
     356             :       {
     357         688 :         dof_id_type tgt_subdomain_size = 0;
     358             : 
     359             :         // watch out for the case that n_subdomains < n_processors
     360     2473028 :         if (pid < static_cast<unsigned int>(_pmetis->nparts))
     361             :           {
     362     1567389 :             tgt_subdomain_size = n_active_elem/std::min
     363     1567389 :               (cast_int<Parmetis::idx_t>(mesh.n_processors()), _pmetis->nparts);
     364             : 
     365     1566701 :             if (pid < n_active_elem%_pmetis->nparts)
     366      548290 :               tgt_subdomain_size++;
     367             :           }
     368     2473028 :         if (pid == 0)
     369      230304 :           subdomain_bounds[0] = tgt_subdomain_size;
     370             :         else
     371     2243412 :           subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
     372             :       }
     373             : 
     374         344 :     libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem);
     375             : 
     376     5428224 :     for (const auto & elem : mesh.active_local_element_ptr_range())
     377             :       {
     378       10503 :         libmesh_assert (_global_index_by_pid_map.count(elem->id()));
     379             :         const dof_id_type global_index_by_pid =
     380     2494486 :           _global_index_by_pid_map[elem->id()];
     381       10503 :         libmesh_assert_less (global_index_by_pid, n_active_elem);
     382             : 
     383     2494486 :         const dof_id_type local_index =
     384       10503 :           global_index_by_pid - first_local_elem;
     385             : 
     386       10503 :         libmesh_assert_less (local_index, n_active_local_elem);
     387       10503 :         libmesh_assert_less (local_index, _pmetis->vwgt.size());
     388             : 
     389             :         // Spline nodes are a special case (storing all the
     390             :         // unconstrained DoFs in an IGA simulation), but in general
     391             :         // we'll try to distribute work by expecting it to be roughly
     392             :         // proportional to DoFs, which are roughly proportional to
     393             :         // nodes.
     394     2494486 :         if (elem->type() == NODEELEM &&
     395       13881 :             elem->mapping_type() == RATIONAL_BERNSTEIN_MAP)
     396       12789 :           _pmetis->vwgt[local_index] = 50;
     397             :         else
     398     2481697 :           _pmetis->vwgt[local_index] = elem->n_nodes();
     399             : 
     400             :         // find the subdomain this element belongs in
     401       10503 :         libmesh_assert (global_index_map.count(elem->id()));
     402             :         const dof_id_type global_index =
     403     2494486 :           global_index_map[elem->id()];
     404             : 
     405       10503 :         libmesh_assert_less (global_index, subdomain_bounds.back());
     406             : 
     407             :         const unsigned int subdomain_id =
     408             :           cast_int<unsigned int>
     409     2483977 :           (std::distance(subdomain_bounds.begin(),
     410             :                          std::lower_bound(subdomain_bounds.begin(),
     411             :                                           subdomain_bounds.end(),
     412       10503 :                                           global_index)));
     413       10503 :         libmesh_assert_less (subdomain_id, _pmetis->nparts);
     414       10503 :         libmesh_assert_less (local_index, _pmetis->part.size());
     415             : 
     416     2504995 :         _pmetis->part[local_index] = subdomain_id;
     417      229616 :       }
     418             :   }
     419      230304 : }
     420             : 
     421             : 
     422             : 
     423      230304 : void ParmetisPartitioner::build_graph (const MeshBase & mesh)
     424             : {
     425         688 :   LOG_SCOPE("build_graph()", "ParmetisPartitioner");
     426             : 
     427             :   // build the graph in distributed CSR format.  Note that
     428             :   // the edges in the graph will correspond to
     429             :   // face neighbors
     430         344 :   const dof_id_type n_active_local_elem  = mesh.n_active_local_elem();
     431             : 
     432      230304 :   Partitioner::build_graph(mesh);
     433             : 
     434         344 :   dof_id_type graph_size=0;
     435             : 
     436     2724790 :   for (auto & row: _dual_graph)
     437     2504995 :    graph_size += cast_int<dof_id_type>(row.size());
     438             : 
     439             :   // Reserve space in the adjacency array
     440      230304 :   _pmetis->xadj.clear();
     441      230304 :   _pmetis->xadj.reserve (n_active_local_elem + 1);
     442      230304 :   _pmetis->adjncy.clear();
     443      230304 :   _pmetis->adjncy.reserve (graph_size);
     444             : 
     445     2724790 :   for (auto & graph_row : _dual_graph)
     446             :     {
     447     2504995 :       _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
     448     2494480 :       _pmetis->adjncy.insert(_pmetis->adjncy.end(),
     449             :                              graph_row.begin(),
     450       42024 :                              graph_row.end());
     451             :     }
     452             : 
     453             :   // The end of the adjacency array for the last elem
     454      230648 :   _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
     455             : 
     456         344 :   libmesh_assert_equal_to (_pmetis->xadj.size(), n_active_local_elem+1);
     457         344 :   libmesh_assert_equal_to (_pmetis->adjncy.size(), graph_size);
     458      230304 : }
     459             : 
     460             : #endif // #ifdef LIBMESH_HAVE_PARMETIS
     461             : 
     462             : } // namespace libMesh

Generated by: LCOV version 1.14