Loading [MathJax]/extensions/tex2jax.js
libMesh
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Pages
parmetis_partitioner.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 
20 // Local includes
21 #include "libmesh/parmetis_partitioner.h"
22 
23 // libMesh includes
24 #include "libmesh/libmesh_config.h"
25 #include "libmesh/elem.h"
26 #include "libmesh/enum_partitioner_type.h"
27 #include "libmesh/libmesh_logging.h"
28 #include "libmesh/mesh_base.h"
29 #include "libmesh/mesh_serializer.h"
30 #include "libmesh/mesh_tools.h"
31 #include "libmesh/mesh_communication.h"
32 #include "libmesh/metis_partitioner.h"
33 #include "libmesh/parallel_only.h"
34 #include "libmesh/parmetis_helper.h"
35 
36 // TIMPI includes
37 #include "timpi/communicator.h" // also includes mpi.h
38 #include "timpi/parallel_implementation.h" // for min()
39 
40 // Include the ParMETIS header file.
41 #ifdef LIBMESH_HAVE_PARMETIS
42 
43 // Before we include a header wrapped in a namespace, we'd better make
44 // sure none of its dependencies end up in that namespace
45 #include <mpi.h>
46 
47 namespace Parmetis {
48 extern "C" {
49 # include "libmesh/ignore_warnings.h"
50 # include "parmetis.h"
51 # include "libmesh/restore_warnings.h"
52 }
53 }
54 
55 #endif
56 
57 
58 // C++ includes
59 #include <unordered_map>
60 
61 
62 namespace libMesh
63 {
64 
65 // Minimum elements on each processor required for us to choose
66 // Parmetis over Metis.
67 #ifdef LIBMESH_HAVE_PARMETIS
68 const unsigned int MIN_ELEM_PER_PROC = 4;
69 #endif
70 
71 // ------------------------------------------------------------
72 // ParmetisPartitioner implementation
74 #ifdef LIBMESH_HAVE_PARMETIS
75  : _pmetis(std::make_unique<ParmetisHelper>())
76 #endif
77 {}
78 
79 
80 
81 ParmetisPartitioner::ParmetisPartitioner (const ParmetisPartitioner & other)
82  : Partitioner(other)
83 #ifdef LIBMESH_HAVE_PARMETIS
84  , _pmetis(std::make_unique<ParmetisHelper>(*(other._pmetis)))
85 #endif
86 {
87 }
88 
89 
90 
91 ParmetisPartitioner::~ParmetisPartitioner() = default;
92 
93 
94 
95 PartitionerType ParmetisPartitioner::type() const
96 {
97  return PARMETIS_PARTITIONER;
98 }
99 
100 
101 
102 void ParmetisPartitioner::_do_partition (MeshBase & mesh,
103  const unsigned int n_sbdmns)
104 {
105  this->_do_repartition (mesh, n_sbdmns);
106 }
107 
108 
109 
110 void ParmetisPartitioner::_do_repartition (MeshBase & mesh,
111  const unsigned int n_sbdmns)
112 {
113  // This function must be run on all processors at once
114  libmesh_parallel_only(mesh.comm());
115 
116  // Check for easy returns
117  if (!mesh.n_elem())
118  return;
119 
120  if (n_sbdmns == 1)
121  {
122  this->single_partition(mesh);
123  return;
124  }
125 
126  libmesh_assert_greater (n_sbdmns, 0);
127 
128  // What to do if the Parmetis library IS NOT present
129 #ifndef LIBMESH_HAVE_PARMETIS
130 
131  libmesh_do_once(
132  libMesh::out << "ERROR: The library has been built without" << std::endl
133  << "Parmetis support. Using a Metis" << std::endl
134  << "partitioner instead!" << std::endl;);
135 
136  MetisPartitioner mp;
137 
138  // Metis and other fallbacks only work in serial, and need to get
139  // handed element ranges from an already-serialized mesh.
140  mesh.allgather();
141 
142  // Don't just call partition() here; that would end up calling
143  // post-element-partitioning work redundantly (and at the moment
144  // incorrectly)
145  mp.partition_range (mesh, mesh.active_elements_begin(),
146  mesh.active_elements_end(), n_sbdmns);
147 
148  // What to do if the Parmetis library IS present
149 #else
150 
151  // Revert to METIS on one processor.
152  if (mesh.n_processors() == 1)
153  {
154  // Make sure the mesh knows it's serial
155  mesh.allgather();
156 
157  MetisPartitioner mp;
158  // Don't just call partition() here; that would end up calling
159  // post-element-partitioning work redundantly (and at the moment
160  // incorrectly)
161  mp.partition_range (mesh, mesh.active_elements_begin(),
162  mesh.active_elements_end(), n_sbdmns);
163  return;
164  }
165 
166  LOG_SCOPE("repartition()", "ParmetisPartitioner");
167 
168  // Initialize the data structures required by ParMETIS
169  this->initialize (mesh, n_sbdmns);
170 
171  // build the graph corresponding to the mesh
172  this->build_graph (mesh);
173 
174  // Make sure all processors have enough active local elements and
175  // enough connectivity among them.
176  // Parmetis tends to die when it's given only a couple elements
177  // per partition or when it can't reach elements from each other.
178  {
179  bool ready_for_parmetis = true;
180  for (const auto & nelem : _n_active_elem_on_proc)
181  if (nelem < MIN_ELEM_PER_PROC)
182  ready_for_parmetis = false;
183 
184  std::size_t my_adjacency = _pmetis->adjncy.size();
185  mesh.comm().min(my_adjacency);
186  if (!my_adjacency)
187  ready_for_parmetis = false;
188 
189  // Parmetis will not work unless each processor has some
190  // elements. Specifically, it will abort when passed a nullptr
191  // partition or adjacency array on *any* of the processors.
192  if (!ready_for_parmetis)
193  {
194  // FIXME: revert to METIS, although this requires a serial mesh
195  MeshSerializer serialize(mesh);
196  MetisPartitioner mp;
197  mp.partition (mesh, n_sbdmns);
198  return;
199  }
200  }
201 
202 
203  // Partition the graph
204  std::vector<Parmetis::idx_t> vsize(_pmetis->vwgt.size(), 1);
205  Parmetis::real_t itr = 1000000.0;
206  MPI_Comm mpi_comm = mesh.comm().get();
207 
208  // Call the ParMETIS adaptive repartitioning method. This respects the
209  // original partitioning when computing the new partitioning so as to
210  // minimize the required data redistribution.
211  Parmetis::ParMETIS_V3_AdaptiveRepart(_pmetis->vtxdist.empty() ? nullptr : _pmetis->vtxdist.data(),
212  _pmetis->xadj.empty() ? nullptr : _pmetis->xadj.data(),
213  _pmetis->adjncy.empty() ? nullptr : _pmetis->adjncy.data(),
214  _pmetis->vwgt.empty() ? nullptr : _pmetis->vwgt.data(),
215  vsize.empty() ? nullptr : vsize.data(),
216  nullptr,
217  &_pmetis->wgtflag,
218  &_pmetis->numflag,
219  &_pmetis->ncon,
220  &_pmetis->nparts,
221  _pmetis->tpwgts.empty() ? nullptr : _pmetis->tpwgts.data(),
222  _pmetis->ubvec.empty() ? nullptr : _pmetis->ubvec.data(),
223  &itr,
224  _pmetis->options.data(),
225  &_pmetis->edgecut,
226  _pmetis->part.empty() ? nullptr : reinterpret_cast<Parmetis::idx_t *>(_pmetis->part.data()),
227  &mpi_comm);
228 
229  // Assign the returned processor ids
230  this->assign_partitioning (mesh, _pmetis->part);
231 
232 #endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ...
233 
234 }
235 
236 
237 
238 // Only need to compile these methods if ParMETIS is present
239 #ifdef LIBMESH_HAVE_PARMETIS
240 
242  const unsigned int n_sbdmns)
243 {
244  LOG_SCOPE("initialize()", "ParmetisPartitioner");
245 
246  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
247  // Set parameters.
248  _pmetis->wgtflag = 2; // weights on vertices only
249  _pmetis->ncon = 1; // one weight per vertex
250  _pmetis->numflag = 0; // C-style 0-based numbering
251  _pmetis->nparts = static_cast<Parmetis::idx_t>(n_sbdmns); // number of subdomains to create
252  _pmetis->edgecut = 0; // the numbers of edges cut by the
253  // partition
254 
255  // Initialize data structures for ParMETIS
256  _pmetis->vtxdist.assign (mesh.n_processors()+1, 0);
257  _pmetis->tpwgts.assign (_pmetis->nparts, 1./_pmetis->nparts);
258  _pmetis->ubvec.assign (_pmetis->ncon, 1.05);
259  _pmetis->part.assign (n_active_local_elem, 0);
260  _pmetis->options.resize (5);
261  _pmetis->vwgt.resize (n_active_local_elem);
262 
263  // Set the options
264  _pmetis->options[0] = 1; // don't use default options
265  _pmetis->options[1] = 0; // default (level of timing)
266  _pmetis->options[2] = 15; // random seed (default)
267  _pmetis->options[3] = 2; // processor distribution and subdomain distribution are decoupled
268 
269  // ParMetis expects the elements to be numbered in contiguous blocks
270  // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ...
271  // Since we only partition active elements we should have no expectation
272  // that we currently have such a distribution. So we need to create it.
273  // Also, at the same time we are going to map all the active elements into a globally
274  // unique range [0,n_active_elem) which is *independent* of the current partitioning.
275  // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled
276  // from the partitioning of the objects themselves). This allows us to get the same
277  // resultant partitioning independent of the input partitioning.
278  libMesh::BoundingBox bbox =
280 
281  _find_global_index_by_pid_map(mesh);
282 
283 
284  // count the total number of active elements in the mesh. Note we cannot
285  // use mesh.n_active_elem() in general since this only returns the number
286  // of active elements which are stored on the calling processor.
287  // We should not use n_active_elem for any allocation because that will
288  // be inherently unscalable, but it can be useful for libmesh_assertions.
289  dof_id_type n_active_elem=0;
290 
291  // Set up the vtxdist array. This will be the same on each processor.
292  // ***** Consult the Parmetis documentation. *****
293  libmesh_assert_equal_to (_pmetis->vtxdist.size(),
294  cast_int<std::size_t>(mesh.n_processors()+1));
295  libmesh_assert_equal_to (_pmetis->vtxdist[0], 0);
296 
297  for (auto pid : make_range(mesh.n_processors()))
298  {
299  _pmetis->vtxdist[pid+1] = _pmetis->vtxdist[pid] + _n_active_elem_on_proc[pid];
300  n_active_elem += _n_active_elem_on_proc[pid];
301  }
302  libmesh_assert_equal_to (_pmetis->vtxdist.back(), static_cast<Parmetis::idx_t>(n_active_elem));
303 
304 
305  // Maps active element ids into a contiguous range independent of partitioning.
306  // (only needs local scope)
307  std::unordered_map<dof_id_type, dof_id_type> global_index_map;
308 
309  {
310  std::vector<dof_id_type> global_index;
311 
312  // create the unique mapping for all active elements independent of partitioning
313  {
314  MeshBase::const_element_iterator it = mesh.active_elements_begin();
315  const MeshBase::const_element_iterator end = mesh.active_elements_end();
316 
317  // Calling this on all processors a unique range in [0,n_active_elem) is constructed.
318  // Only the indices for the elements we pass in are returned in the array.
320  bbox, it, end,
321  global_index);
322 
323  for (dof_id_type cnt=0; it != end; ++it)
324  {
325  const Elem * elem = *it;
326  // vectormap::count forces a sort, which is too expensive
327  // in a loop
328  // libmesh_assert (!global_index_map.count(elem->id()));
329  libmesh_assert_less (cnt, global_index.size());
330  libmesh_assert_less (global_index[cnt], n_active_elem);
331 
332  global_index_map.emplace(elem->id(), global_index[cnt++]);
333  }
334  }
335  // really, shouldn't be close!
336  libmesh_assert_less_equal (global_index_map.size(), n_active_elem);
337  libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem);
338 
339  // At this point the two maps should be the same size. If they are not
340  // then the number of active elements is not the same as the sum over all
341  // processors of the number of active elements per processor, which means
342  // there must be some unpartitioned objects out there.
343  libmesh_error_msg_if(global_index_map.size() != _global_index_by_pid_map.size(),
344  "ERROR: ParmetisPartitioner cannot handle unpartitioned objects!");
345  }
346 
347  // Finally, we need to initialize the vertex (partition) weights and the initial subdomain
348  // mapping. The subdomain mapping will be independent of the processor mapping, and is
349  // defined by a simple mapping of the global indices we just found.
350  {
351  std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
352 
353  const dof_id_type first_local_elem = _pmetis->vtxdist[mesh.processor_id()];
354 
355  for (auto pid : make_range(mesh.n_processors()))
356  {
357  dof_id_type tgt_subdomain_size = 0;
358 
359  // watch out for the case that n_subdomains < n_processors
360  if (pid < static_cast<unsigned int>(_pmetis->nparts))
361  {
362  tgt_subdomain_size = n_active_elem/std::min
363  (cast_int<Parmetis::idx_t>(mesh.n_processors()), _pmetis->nparts);
364 
365  if (pid < n_active_elem%_pmetis->nparts)
366  tgt_subdomain_size++;
367  }
368  if (pid == 0)
369  subdomain_bounds[0] = tgt_subdomain_size;
370  else
371  subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
372  }
373 
374  libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem);
375 
376  for (const auto & elem : mesh.active_local_element_ptr_range())
377  {
378  libmesh_assert (_global_index_by_pid_map.count(elem->id()));
379  const dof_id_type global_index_by_pid =
380  _global_index_by_pid_map[elem->id()];
381  libmesh_assert_less (global_index_by_pid, n_active_elem);
382 
383  const dof_id_type local_index =
384  global_index_by_pid - first_local_elem;
385 
386  libmesh_assert_less (local_index, n_active_local_elem);
387  libmesh_assert_less (local_index, _pmetis->vwgt.size());
388 
389  // Spline nodes are a special case (storing all the
390  // unconstrained DoFs in an IGA simulation), but in general
391  // we'll try to distribute work by expecting it to be roughly
392  // proportional to DoFs, which are roughly proportional to
393  // nodes.
394  if (elem->type() == NODEELEM &&
395  elem->mapping_type() == RATIONAL_BERNSTEIN_MAP)
396  _pmetis->vwgt[local_index] = 50;
397  else
398  _pmetis->vwgt[local_index] = elem->n_nodes();
399 
400  // find the subdomain this element belongs in
401  libmesh_assert (global_index_map.count(elem->id()));
402  const dof_id_type global_index =
403  global_index_map[elem->id()];
404 
405  libmesh_assert_less (global_index, subdomain_bounds.back());
406 
407  const unsigned int subdomain_id =
408  cast_int<unsigned int>
409  (std::distance(subdomain_bounds.begin(),
410  std::lower_bound(subdomain_bounds.begin(),
411  subdomain_bounds.end(),
412  global_index)));
413  libmesh_assert_less (subdomain_id, _pmetis->nparts);
414  libmesh_assert_less (local_index, _pmetis->part.size());
415 
416  _pmetis->part[local_index] = subdomain_id;
417  }
418  }
419 }
420 
421 
422 
423 void ParmetisPartitioner::build_graph (const MeshBase & mesh)
424 {
425  LOG_SCOPE("build_graph()", "ParmetisPartitioner");
426 
427  // build the graph in distributed CSR format. Note that
428  // the edges in the graph will correspond to
429  // face neighbors
430  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
431 
432  Partitioner::build_graph(mesh);
433 
434  dof_id_type graph_size=0;
435 
436  for (auto & row: _dual_graph)
437  graph_size += cast_int<dof_id_type>(row.size());
438 
439  // Reserve space in the adjacency array
440  _pmetis->xadj.clear();
441  _pmetis->xadj.reserve (n_active_local_elem + 1);
442  _pmetis->adjncy.clear();
443  _pmetis->adjncy.reserve (graph_size);
444 
445  for (auto & graph_row : _dual_graph)
446  {
447  _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
448  _pmetis->adjncy.insert(_pmetis->adjncy.end(),
449  graph_row.begin(),
450  graph_row.end());
451  }
452 
453  // The end of the adjacency array for the last elem
454  _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
455 
456  libmesh_assert_equal_to (_pmetis->xadj.size(), n_active_local_elem+1);
457  libmesh_assert_equal_to (_pmetis->adjncy.size(), graph_size);
458 }
459 
460 #endif // #ifdef LIBMESH_HAVE_PARMETIS
461 
462 } // namespace libMesh
void find_global_indices(const Parallel::Communicator &communicator, const libMesh::BoundingBox &, const ForwardIterator &, const ForwardIterator &, std::vector< dof_id_type > &) const
This method determines a globally unique, partition-agnostic index for each object in the input range...
libMesh::BoundingBox create_bounding_box(const MeshBase &mesh)
Definition: mesh_tools.C:559
The definition of the const_element_iterator struct.
Definition: mesh_base.h:2216
virtual void partition_range(MeshBase &mesh, MeshBase::element_iterator it, MeshBase::element_iterator end, const unsigned int n) override
Called by the SubdomainPartitioner to partition elements in the range (it, end).
This is the base class from which all geometric element types are derived.
Definition: elem.h:94
MeshBase & mesh
ParmetisPartitioner()
Default and copy ctors.
The MetisPartitioner uses the Metis graph partitioner to partition the elements.
The libMesh namespace provides an interface to certain functionality in the library.
void initialize(EquationSystems &es, const std::string &system_name)
Definition: main.C:52
Real distance(const Point &p)
This is the MeshBase class.
Definition: mesh_base.h:75
PartitionerType
Defines an enum for mesh partitioner types.
This is the MeshCommunication class.
dof_id_type id() const
Definition: dof_object.h:828
libmesh_assert(ctx)
Defines a Cartesian bounding box by the two corner extremum.
Definition: bounding_box.h:40
const unsigned int MIN_ELEM_PER_PROC
Temporarily serialize a DistributedMesh for non-distributed-mesh capable code paths.
virtual void partition(MeshBase &mesh, const unsigned int n)
Partitions the MeshBase into n parts by setting processor_id() on Nodes and Elems.
Definition: partitioner.C:196
OStreamProxy out
IntRange< T > make_range(T beg, T end)
The 2-parameter make_range() helper function returns an IntRange<T> when both input parameters are of...
Definition: int_range.h:140
uint8_t dof_id_type
Definition: id_types.h:67