libMesh
parmetis_partitioner.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 
20 // Local includes
21 #include "libmesh/parmetis_partitioner.h"
22 
23 // libMesh includes
24 #include "libmesh/libmesh_config.h"
25 #include "libmesh/elem.h"
26 #include "libmesh/elem_range.h"
27 #include "libmesh/enum_partitioner_type.h"
28 #include "libmesh/libmesh_logging.h"
29 #include "libmesh/mesh_base.h"
30 #include "libmesh/mesh_serializer.h"
31 #include "libmesh/mesh_tools.h"
32 #include "libmesh/mesh_communication.h"
33 #include "libmesh/metis_partitioner.h"
34 #include "libmesh/parallel_only.h"
35 #include "libmesh/parmetis_helper.h"
36 #include "libmesh/utility.h"
37 
38 // TIMPI includes
39 #include "timpi/communicator.h" // also includes mpi.h
40 #include "timpi/parallel_implementation.h" // for min()
41 
42 // Include the ParMETIS header file.
43 #ifdef LIBMESH_HAVE_PARMETIS
44 
45 // Before we include a header wrapped in a namespace, we'd better make
46 // sure none of its dependencies end up in that namespace
47 #include <mpi.h>
48 
49 namespace Parmetis {
50 extern "C" {
51 # include "libmesh/ignore_warnings.h"
52 # include "parmetis.h"
53 # include "libmesh/restore_warnings.h"
54 }
55 }
56 
57 #endif
58 
59 
60 // C++ includes
61 #include <unordered_map>
62 
63 
64 namespace libMesh
65 {
66 
67 // Minimum elements on each processor required for us to choose
68 // Parmetis over Metis.
69 #ifdef LIBMESH_HAVE_PARMETIS
70 const unsigned int MIN_ELEM_PER_PROC = 4;
71 #endif
72 
73 // ------------------------------------------------------------
74 // ParmetisPartitioner implementation
76 #ifdef LIBMESH_HAVE_PARMETIS
77  : _pmetis(std::make_unique<ParmetisHelper>())
78 #endif
79 {}
80 
81 
82 
83 ParmetisPartitioner::ParmetisPartitioner (const ParmetisPartitioner & other)
84  : Partitioner(other)
85 #ifdef LIBMESH_HAVE_PARMETIS
86  , _pmetis(std::make_unique<ParmetisHelper>(*(other._pmetis)))
87 #endif
88 {
89 }
90 
91 
92 
93 ParmetisPartitioner::~ParmetisPartitioner() = default;
94 
95 
96 
97 PartitionerType ParmetisPartitioner::type() const
98 {
99  return PARMETIS_PARTITIONER;
100 }
101 
102 
103 
104 void ParmetisPartitioner::_do_partition (MeshBase & mesh,
105  const unsigned int n_sbdmns)
106 {
107  this->_do_repartition (mesh, n_sbdmns);
108 }
109 
110 
111 
112 void ParmetisPartitioner::_do_repartition (MeshBase & mesh,
113  const unsigned int n_sbdmns)
114 {
115  // This function must be run on all processors at once
116  libmesh_parallel_only(mesh.comm());
117 
118  // Check for easy returns
119  if (!mesh.n_elem())
120  return;
121 
122  if (n_sbdmns == 1)
123  {
124  this->single_partition(mesh);
125  return;
126  }
127 
128  libmesh_assert_greater (n_sbdmns, 0);
129 
130  // What to do if the Parmetis library IS NOT present
131 #ifndef LIBMESH_HAVE_PARMETIS
132 
133  libmesh_do_once(
134  libMesh::out << "ERROR: The library has been built without" << std::endl
135  << "Parmetis support. Using a Metis" << std::endl
136  << "partitioner instead!" << std::endl;);
137 
138  MetisPartitioner mp;
139 
140  // Metis and other fallbacks only work in serial, and need to get
141  // handed element ranges from an already-serialized mesh.
142  mesh.allgather();
143 
144  // Don't just call partition() here; that would end up calling
145  // post-element-partitioning work redundantly (and at the moment
146  // incorrectly)
147  mp.partition_range (mesh, mesh.active_elements_begin(),
148  mesh.active_elements_end(), n_sbdmns);
149 
150  // What to do if the Parmetis library IS present
151 #else
152 
153  // Revert to METIS on one processor.
154  if (mesh.n_processors() == 1)
155  {
156  // Make sure the mesh knows it's serial
157  mesh.allgather();
158 
159  MetisPartitioner mp;
160  // Don't just call partition() here; that would end up calling
161  // post-element-partitioning work redundantly (and at the moment
162  // incorrectly)
163  mp.partition_range (mesh, mesh.active_elements_begin(),
164  mesh.active_elements_end(), n_sbdmns);
165  return;
166  }
167 
168  LOG_SCOPE("repartition()", "ParmetisPartitioner");
169 
170  // Initialize the data structures required by ParMETIS
171  this->initialize (mesh, n_sbdmns);
172 
173  // build the graph corresponding to the mesh
174  this->build_graph (mesh);
175 
176  // Make sure all processors have enough active local elements and
177  // enough connectivity among them.
178  // Parmetis tends to die when it's given only a couple elements
179  // per partition or when it can't reach elements from each other.
180  {
181  bool ready_for_parmetis = true;
182  for (const auto & nelem : _n_active_elem_on_proc)
183  if (nelem < MIN_ELEM_PER_PROC)
184  ready_for_parmetis = false;
185 
186  std::size_t my_adjacency = _pmetis->adjncy.size();
187  mesh.comm().min(my_adjacency);
188  if (!my_adjacency)
189  ready_for_parmetis = false;
190 
191  // Parmetis will not work unless each processor has some
192  // elements. Specifically, it will abort when passed a nullptr
193  // partition or adjacency array on *any* of the processors.
194  if (!ready_for_parmetis)
195  {
196  // FIXME: revert to METIS, although this requires a serial mesh
197  MeshSerializer serialize(mesh);
198  MetisPartitioner mp;
199  mp.partition (mesh, n_sbdmns);
200  return;
201  }
202  }
203 
204 
205  // Partition the graph
206  std::vector<Parmetis::idx_t> vsize(_pmetis->vwgt.size(), 1);
207  Parmetis::real_t itr = 1000000.0;
208  MPI_Comm mpi_comm = mesh.comm().get();
209 
210  // Call the ParMETIS adaptive repartitioning method. This respects the
211  // original partitioning when computing the new partitioning so as to
212  // minimize the required data redistribution.
213  Parmetis::ParMETIS_V3_AdaptiveRepart(_pmetis->vtxdist.empty() ? nullptr : _pmetis->vtxdist.data(),
214  _pmetis->xadj.empty() ? nullptr : _pmetis->xadj.data(),
215  _pmetis->adjncy.empty() ? nullptr : _pmetis->adjncy.data(),
216  _pmetis->vwgt.empty() ? nullptr : _pmetis->vwgt.data(),
217  vsize.empty() ? nullptr : vsize.data(),
218  nullptr,
219  &_pmetis->wgtflag,
220  &_pmetis->numflag,
221  &_pmetis->ncon,
222  &_pmetis->nparts,
223  _pmetis->tpwgts.empty() ? nullptr : _pmetis->tpwgts.data(),
224  _pmetis->ubvec.empty() ? nullptr : _pmetis->ubvec.data(),
225  &itr,
226  _pmetis->options.data(),
227  &_pmetis->edgecut,
228  _pmetis->part.empty() ? nullptr : reinterpret_cast<Parmetis::idx_t *>(_pmetis->part.data()),
229  &mpi_comm);
230 
231  // Assign the returned processor ids
232  this->assign_partitioning (mesh, _pmetis->part);
233 
234 #endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ...
235 
236 }
237 
238 
239 
240 // Only need to compile these methods if ParMETIS is present
241 #ifdef LIBMESH_HAVE_PARMETIS
242 
244  const unsigned int n_sbdmns)
245 {
246  LOG_SCOPE("initialize()", "ParmetisPartitioner");
247 
248  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
249  // Set parameters.
250  _pmetis->wgtflag = 2; // weights on vertices only
251  _pmetis->ncon = 1; // one weight per vertex
252  _pmetis->numflag = 0; // C-style 0-based numbering
253  _pmetis->nparts = static_cast<Parmetis::idx_t>(n_sbdmns); // number of subdomains to create
254  _pmetis->edgecut = 0; // the numbers of edges cut by the
255  // partition
256 
257  // Initialize data structures for ParMETIS
258  _pmetis->vtxdist.assign (mesh.n_processors()+1, 0);
259  _pmetis->tpwgts.assign (_pmetis->nparts, 1./_pmetis->nparts);
260  _pmetis->ubvec.assign (_pmetis->ncon, 1.05);
261  _pmetis->part.assign (n_active_local_elem, 0);
262  _pmetis->options.resize (5);
263  _pmetis->vwgt.resize (n_active_local_elem);
264 
265  // Set the options
266  _pmetis->options[0] = 1; // don't use default options
267  _pmetis->options[1] = 0; // default (level of timing)
268  _pmetis->options[2] = 15; // random seed (default)
269  _pmetis->options[3] = 2; // processor distribution and subdomain distribution are decoupled
270 
271  // ParMetis expects the elements to be numbered in contiguous blocks
272  // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ...
273  // Since we only partition active elements we should have no expectation
274  // that we currently have such a distribution. So we need to create it.
275  // Also, at the same time we are going to map all the active elements into a globally
276  // unique range [0,n_active_elem) which is *independent* of the current partitioning.
277  // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled
278  // from the partitioning of the objects themselves). This allows us to get the same
279  // resultant partitioning independent of the input partitioning.
280  libMesh::BoundingBox bbox =
282 
283  _find_global_index_by_pid_map(mesh);
284 
285 
286  // count the total number of active elements in the mesh. Note we cannot
287  // use mesh.n_active_elem() in general since this only returns the number
288  // of active elements which are stored on the calling processor.
289  // We should not use n_active_elem for any allocation because that will
290  // be inherently unscalable, but it can be useful for libmesh_assertions.
291  dof_id_type n_active_elem=0;
292 
293  // Set up the vtxdist array. This will be the same on each processor.
294  // ***** Consult the Parmetis documentation. *****
295  libmesh_assert_equal_to (_pmetis->vtxdist.size(),
296  cast_int<std::size_t>(mesh.n_processors()+1));
297  libmesh_assert_equal_to (_pmetis->vtxdist[0], 0);
298 
299  for (auto pid : make_range(mesh.n_processors()))
300  {
301  _pmetis->vtxdist[pid+1] = _pmetis->vtxdist[pid] + _n_active_elem_on_proc[pid];
302  n_active_elem += _n_active_elem_on_proc[pid];
303  }
304  libmesh_assert_equal_to (_pmetis->vtxdist.back(), static_cast<Parmetis::idx_t>(n_active_elem));
305 
306 
307  // Maps active element ids into a contiguous range independent of partitioning.
308  // (only needs local scope)
309  std::unordered_map<dof_id_type, dof_id_type> global_index_map;
310 
311  {
312  std::vector<dof_id_type> global_index;
313 
314  // create the unique mapping for all active elements independent of partitioning
315  {
316  MeshBase::const_element_iterator it = mesh.active_elements_begin();
317  const MeshBase::const_element_iterator end = mesh.active_elements_end();
318 
319  // Calling this on all processors a unique range in [0,n_active_elem) is constructed.
320  // Only the indices for the elements we pass in are returned in the array.
322  bbox, it, end,
323  global_index);
324 
325  for (dof_id_type cnt=0; it != end; ++it)
326  {
327  const Elem * elem = *it;
328  // vectormap::count forces a sort, which is too expensive
329  // in a loop
330  // libmesh_assert (!global_index_map.count(elem->id()));
331  libmesh_assert_less (cnt, global_index.size());
332  libmesh_assert_less (global_index[cnt], n_active_elem);
333 
334  global_index_map.emplace(elem->id(), global_index[cnt++]);
335  }
336  }
337  // really, shouldn't be close!
338  libmesh_assert_less_equal (global_index_map.size(), n_active_elem);
339  libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem);
340 
341  // At this point the two maps should be the same size. If they are not
342  // then the number of active elements is not the same as the sum over all
343  // processors of the number of active elements per processor, which means
344  // there must be some unpartitioned objects out there.
345  libmesh_error_msg_if(global_index_map.size() != _global_index_by_pid_map.size(),
346  "ERROR: ParmetisPartitioner cannot handle unpartitioned objects!");
347  }
348 
349  // Finally, we need to initialize the vertex (partition) weights and the initial subdomain
350  // mapping. The subdomain mapping will be independent of the processor mapping, and is
351  // defined by a simple mapping of the global indices we just found.
352  {
353  std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
354 
355  const dof_id_type first_local_elem = _pmetis->vtxdist[mesh.processor_id()];
356 
357  for (auto pid : make_range(mesh.n_processors()))
358  {
359  dof_id_type tgt_subdomain_size = 0;
360 
361  // watch out for the case that n_subdomains < n_processors
362  if (pid < static_cast<unsigned int>(_pmetis->nparts))
363  {
364  tgt_subdomain_size = n_active_elem/std::min
365  (cast_int<Parmetis::idx_t>(mesh.n_processors()), _pmetis->nparts);
366 
367  if (pid < n_active_elem%_pmetis->nparts)
368  tgt_subdomain_size++;
369  }
370  if (pid == 0)
371  subdomain_bounds[0] = tgt_subdomain_size;
372  else
373  subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
374  }
375 
376  libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem);
377 
379  (mesh.active_local_element_stored_range(),
380  [first_local_elem, n_active_elem, n_active_local_elem, this,
381  &global_index_map, &subdomain_bounds](const ConstElemRange & range)
382  {
383  for (const Elem * elem : range)
384  {
385  const dof_id_type global_index_by_pid =
386  libmesh_map_find(_global_index_by_pid_map, elem->id());
387  libmesh_assert_less (global_index_by_pid, n_active_elem);
388 
389  const dof_id_type local_index =
390  global_index_by_pid - first_local_elem;
391 
392  libmesh_assert_less (local_index, n_active_local_elem);
393  libmesh_assert_less (local_index, _pmetis->vwgt.size());
394 
395  libmesh_ignore(n_active_elem); // unused outside dbg/devel
396  libmesh_ignore(n_active_local_elem); // unused outside dbg/devel
397 
398  // Spline nodes are a special case (storing all the
399  // unconstrained DoFs in an IGA simulation), but in general
400  // we'll try to distribute work by expecting it to be roughly
401  // proportional to DoFs, which are roughly proportional to
402  // nodes.
403  if (elem->type() == NODEELEM &&
404  elem->mapping_type() == RATIONAL_BERNSTEIN_MAP)
405  _pmetis->vwgt[local_index] = 50;
406  else
407  _pmetis->vwgt[local_index] = elem->n_nodes();
408 
409  // find the subdomain this element belongs in
410  const dof_id_type global_index =
411  libmesh_map_find(global_index_map, elem->id());
412 
413  libmesh_assert_less (global_index, subdomain_bounds.back());
414 
415  const unsigned int subdomain_id =
416  cast_int<unsigned int>
417  (std::distance(subdomain_bounds.begin(),
418  std::lower_bound(subdomain_bounds.begin(),
419  subdomain_bounds.end(),
420  global_index)));
421  libmesh_assert_less (subdomain_id, _pmetis->nparts);
422  libmesh_assert_less (local_index, _pmetis->part.size());
423 
424  _pmetis->part[local_index] = subdomain_id;
425  }
426  });
427  }
428 }
429 
430 
431 
432 void ParmetisPartitioner::build_graph (const MeshBase & mesh)
433 {
434  LOG_SCOPE("build_graph()", "ParmetisPartitioner");
435 
436  // build the graph in distributed CSR format. Note that
437  // the edges in the graph will correspond to
438  // face neighbors
439  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
440 
441  Partitioner::build_graph(mesh);
442 
443  dof_id_type graph_size=0;
444 
445  for (auto & row: _dual_graph)
446  graph_size += cast_int<dof_id_type>(row.size());
447 
448  // Reserve space in the adjacency array
449  _pmetis->xadj.clear();
450  _pmetis->xadj.reserve (n_active_local_elem + 1);
451  _pmetis->adjncy.clear();
452  _pmetis->adjncy.reserve (graph_size);
453 
454  for (auto & graph_row : _dual_graph)
455  {
456  _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
457  _pmetis->adjncy.insert(_pmetis->adjncy.end(),
458  graph_row.begin(),
459  graph_row.end());
460  }
461 
462  // The end of the adjacency array for the last elem
463  _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
464 
465  libmesh_assert_equal_to (_pmetis->xadj.size(), n_active_local_elem+1);
466  libmesh_assert_equal_to (_pmetis->adjncy.size(), graph_size);
467 }
468 
469 #endif // #ifdef LIBMESH_HAVE_PARMETIS
470 
471 } // namespace libMesh
void parallel_for(const Range &range, const Body &body, unsigned int n_threads=libMesh::n_threads())
Execute the provided function object in parallel on the specified range.
Definition: threads_none.h:73
void find_global_indices(const Parallel::Communicator &communicator, const libMesh::BoundingBox &, const ForwardIterator &, const ForwardIterator &, std::vector< dof_id_type > &) const
This method determines a globally unique, partition-agnostic index for each object in the input range...
libMesh::BoundingBox create_bounding_box(const MeshBase &mesh)
Definition: mesh_tools.C:566
The definition of the const_element_iterator struct.
Definition: mesh_base.h:2521
virtual void partition_range(MeshBase &mesh, MeshBase::element_iterator it, MeshBase::element_iterator end, const unsigned int n) override
Called by the SubdomainPartitioner to partition elements in the range (it, end).
This is the base class from which all geometric element types are derived.
Definition: elem.h:94
MeshBase & mesh
ParmetisPartitioner()
Default and copy ctors.
The StoredRange class defines a contiguous, divisible set of objects.
Definition: stored_range.h:54
The MetisPartitioner uses the Metis graph partitioner to partition the elements.
The libMesh namespace provides an interface to certain functionality in the library.
void initialize(EquationSystems &es, const std::string &system_name)
Definition: main.C:52
Real distance(const Point &p)
This is the MeshBase class.
Definition: mesh_base.h:80
PartitionerType
Defines an enum for mesh partitioner types.
void libmesh_ignore(const Args &...)
This is the MeshCommunication class.
dof_id_type id() const
Definition: dof_object.h:819
Defines a Cartesian bounding box by the two corner extremum.
Definition: bounding_box.h:40
const unsigned int MIN_ELEM_PER_PROC
Temporarily serialize a DistributedMesh for non-distributed-mesh capable code paths.
virtual void partition(MeshBase &mesh, const unsigned int n)
Partitions the MeshBase into n parts by setting processor_id() on Nodes and Elems.
Definition: partitioner.C:196
OStreamProxy out
IntRange< T > make_range(T beg, T end)
The 2-parameter make_range() helper function returns an IntRange<T> when both input parameters are of...
Definition: int_range.h:176
uint8_t dof_id_type
Definition: id_types.h:67