Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 :
20 : // Local includes
21 : #include "libmesh/parmetis_partitioner.h"
22 :
23 : // libMesh includes
24 : #include "libmesh/libmesh_config.h"
25 : #include "libmesh/elem.h"
26 : #include "libmesh/elem_range.h"
27 : #include "libmesh/enum_partitioner_type.h"
28 : #include "libmesh/libmesh_logging.h"
29 : #include "libmesh/mesh_base.h"
30 : #include "libmesh/mesh_serializer.h"
31 : #include "libmesh/mesh_tools.h"
32 : #include "libmesh/mesh_communication.h"
33 : #include "libmesh/metis_partitioner.h"
34 : #include "libmesh/parallel_only.h"
35 : #include "libmesh/parmetis_helper.h"
36 : #include "libmesh/utility.h"
37 :
38 : // TIMPI includes
39 : #include "timpi/communicator.h" // also includes mpi.h
40 : #include "timpi/parallel_implementation.h" // for min()
41 :
42 : // Include the ParMETIS header file.
43 : #ifdef LIBMESH_HAVE_PARMETIS
44 :
45 : // Before we include a header wrapped in a namespace, we'd better make
46 : // sure none of its dependencies end up in that namespace
47 : #include <mpi.h>
48 :
49 : namespace Parmetis {
50 : extern "C" {
51 : # include "libmesh/ignore_warnings.h"
52 : # include "parmetis.h"
53 : # include "libmesh/restore_warnings.h"
54 : }
55 : }
56 :
57 : #endif
58 :
59 :
60 : // C++ includes
61 : #include <unordered_map>
62 :
63 :
64 : namespace libMesh
65 : {
66 :
67 : // Minimum elements on each processor required for us to choose
68 : // Parmetis over Metis.
69 : #ifdef LIBMESH_HAVE_PARMETIS
70 : const unsigned int MIN_ELEM_PER_PROC = 4;
71 : #endif
72 :
73 : // ------------------------------------------------------------
74 : // ParmetisPartitioner implementation
75 277558 : ParmetisPartitioner::ParmetisPartitioner()
76 : #ifdef LIBMESH_HAVE_PARMETIS
77 277826 : : _pmetis(std::make_unique<ParmetisHelper>())
78 : #endif
79 277558 : {}
80 :
81 :
82 :
83 19326 : ParmetisPartitioner::ParmetisPartitioner (const ParmetisPartitioner & other)
84 : : Partitioner(other)
85 : #ifdef LIBMESH_HAVE_PARMETIS
86 19326 : , _pmetis(std::make_unique<ParmetisHelper>(*(other._pmetis)))
87 : #endif
88 : {
89 19326 : }
90 :
91 :
92 :
93 591882 : ParmetisPartitioner::~ParmetisPartitioner() = default;
94 :
95 :
96 :
97 284 : PartitionerType ParmetisPartitioner::type() const
98 : {
99 284 : return PARMETIS_PARTITIONER;
100 : }
101 :
102 :
103 :
104 259136 : void ParmetisPartitioner::_do_partition (MeshBase & mesh,
105 : const unsigned int n_sbdmns)
106 : {
107 259136 : this->_do_repartition (mesh, n_sbdmns);
108 259136 : }
109 :
110 :
111 :
112 259136 : void ParmetisPartitioner::_do_repartition (MeshBase & mesh,
113 : const unsigned int n_sbdmns)
114 : {
115 : // This function must be run on all processors at once
116 376 : libmesh_parallel_only(mesh.comm());
117 :
118 : // Check for easy returns
119 259136 : if (!mesh.n_elem())
120 210155 : return;
121 :
122 258994 : if (n_sbdmns == 1)
123 : {
124 0 : this->single_partition(mesh);
125 0 : return;
126 : }
127 :
128 372 : libmesh_assert_greater (n_sbdmns, 0);
129 :
130 : // What to do if the Parmetis library IS NOT present
131 : #ifndef LIBMESH_HAVE_PARMETIS
132 :
133 : libmesh_do_once(
134 : libMesh::out << "ERROR: The library has been built without" << std::endl
135 : << "Parmetis support. Using a Metis" << std::endl
136 : << "partitioner instead!" << std::endl;);
137 :
138 : MetisPartitioner mp;
139 :
140 : // Metis and other fallbacks only work in serial, and need to get
141 : // handed element ranges from an already-serialized mesh.
142 : mesh.allgather();
143 :
144 : // Don't just call partition() here; that would end up calling
145 : // post-element-partitioning work redundantly (and at the moment
146 : // incorrectly)
147 : mp.partition_range (mesh, mesh.active_elements_begin(),
148 : mesh.active_elements_end(), n_sbdmns);
149 :
150 : // What to do if the Parmetis library IS present
151 : #else
152 :
153 : // Revert to METIS on one processor.
154 259366 : if (mesh.n_processors() == 1)
155 : {
156 : // Make sure the mesh knows it's serial
157 4 : mesh.allgather();
158 :
159 0 : MetisPartitioner mp;
160 : // Don't just call partition() here; that would end up calling
161 : // post-element-partitioning work redundantly (and at the moment
162 : // incorrectly)
163 8 : mp.partition_range (mesh, mesh.active_elements_begin(),
164 8 : mesh.active_elements_end(), n_sbdmns);
165 0 : return;
166 : }
167 :
168 372 : LOG_SCOPE("repartition()", "ParmetisPartitioner");
169 :
170 : // Initialize the data structures required by ParMETIS
171 258990 : this->initialize (mesh, n_sbdmns);
172 :
173 : // build the graph corresponding to the mesh
174 258990 : this->build_graph (mesh);
175 :
176 : // Make sure all processors have enough active local elements and
177 : // enough connectivity among them.
178 : // Parmetis tends to die when it's given only a couple elements
179 : // per partition or when it can't reach elements from each other.
180 : {
181 372 : bool ready_for_parmetis = true;
182 3040452 : for (const auto & nelem : _n_active_elem_on_proc)
183 2781462 : if (nelem < MIN_ELEM_PER_PROC)
184 154 : ready_for_parmetis = false;
185 :
186 258990 : std::size_t my_adjacency = _pmetis->adjncy.size();
187 258990 : mesh.comm().min(my_adjacency);
188 258990 : if (!my_adjacency)
189 14 : ready_for_parmetis = false;
190 :
191 : // Parmetis will not work unless each processor has some
192 : // elements. Specifically, it will abort when passed a nullptr
193 : // partition or adjacency array on *any* of the processors.
194 107285 : if (!ready_for_parmetis)
195 : {
196 : // FIXME: revert to METIS, although this requires a serial mesh
197 210181 : MeshSerializer serialize(mesh);
198 86 : MetisPartitioner mp;
199 210009 : mp.partition (mesh, n_sbdmns);
200 86 : return;
201 209837 : }
202 : }
203 :
204 :
205 : // Partition the graph
206 49839 : std::vector<Parmetis::idx_t> vsize(_pmetis->vwgt.size(), 1);
207 48981 : Parmetis::real_t itr = 1000000.0;
208 48981 : MPI_Comm mpi_comm = mesh.comm().get();
209 :
210 : // Call the ParMETIS adaptive repartitioning method. This respects the
211 : // original partitioning when computing the new partitioning so as to
212 : // minimize the required data redistribution.
213 441401 : Parmetis::ParMETIS_V3_AdaptiveRepart(_pmetis->vtxdist.empty() ? nullptr : _pmetis->vtxdist.data(),
214 572 : _pmetis->xadj.empty() ? nullptr : _pmetis->xadj.data(),
215 572 : _pmetis->adjncy.empty() ? nullptr : _pmetis->adjncy.data(),
216 572 : _pmetis->vwgt.empty() ? nullptr : _pmetis->vwgt.data(),
217 572 : vsize.empty() ? nullptr : vsize.data(),
218 : nullptr,
219 286 : &_pmetis->wgtflag,
220 286 : &_pmetis->numflag,
221 286 : &_pmetis->ncon,
222 286 : &_pmetis->nparts,
223 572 : _pmetis->tpwgts.empty() ? nullptr : _pmetis->tpwgts.data(),
224 572 : _pmetis->ubvec.empty() ? nullptr : _pmetis->ubvec.data(),
225 : &itr,
226 286 : _pmetis->options.data(),
227 286 : &_pmetis->edgecut,
228 572 : _pmetis->part.empty() ? nullptr : reinterpret_cast<Parmetis::idx_t *>(_pmetis->part.data()),
229 : &mpi_comm);
230 :
231 : // Assign the returned processor ids
232 49267 : this->assign_partitioning (mesh, _pmetis->part);
233 :
234 : #endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ...
235 :
236 : }
237 :
238 :
239 :
240 : // Only need to compile these methods if ParMETIS is present
241 : #ifdef LIBMESH_HAVE_PARMETIS
242 :
243 258990 : void ParmetisPartitioner::initialize (const MeshBase & mesh,
244 : const unsigned int n_sbdmns)
245 : {
246 744 : LOG_SCOPE("initialize()", "ParmetisPartitioner");
247 :
248 372 : const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
249 : // Set parameters.
250 258990 : _pmetis->wgtflag = 2; // weights on vertices only
251 258990 : _pmetis->ncon = 1; // one weight per vertex
252 258990 : _pmetis->numflag = 0; // C-style 0-based numbering
253 258990 : _pmetis->nparts = static_cast<Parmetis::idx_t>(n_sbdmns); // number of subdomains to create
254 258990 : _pmetis->edgecut = 0; // the numbers of edges cut by the
255 : // partition
256 :
257 : // Initialize data structures for ParMETIS
258 259362 : _pmetis->vtxdist.assign (mesh.n_processors()+1, 0);
259 258990 : _pmetis->tpwgts.assign (_pmetis->nparts, 1./_pmetis->nparts);
260 258990 : _pmetis->ubvec.assign (_pmetis->ncon, 1.05);
261 258990 : _pmetis->part.assign (n_active_local_elem, 0);
262 258990 : _pmetis->options.resize (5);
263 258990 : _pmetis->vwgt.resize (n_active_local_elem);
264 :
265 : // Set the options
266 258990 : _pmetis->options[0] = 1; // don't use default options
267 258990 : _pmetis->options[1] = 0; // default (level of timing)
268 258990 : _pmetis->options[2] = 15; // random seed (default)
269 258990 : _pmetis->options[3] = 2; // processor distribution and subdomain distribution are decoupled
270 :
271 : // ParMetis expects the elements to be numbered in contiguous blocks
272 : // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ...
273 : // Since we only partition active elements we should have no expectation
274 : // that we currently have such a distribution. So we need to create it.
275 : // Also, at the same time we are going to map all the active elements into a globally
276 : // unique range [0,n_active_elem) which is *independent* of the current partitioning.
277 : // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled
278 : // from the partitioning of the objects themselves). This allows us to get the same
279 : // resultant partitioning independent of the input partitioning.
280 : libMesh::BoundingBox bbox =
281 258990 : MeshTools::create_bounding_box(mesh);
282 :
283 258990 : _find_global_index_by_pid_map(mesh);
284 :
285 :
286 : // count the total number of active elements in the mesh. Note we cannot
287 : // use mesh.n_active_elem() in general since this only returns the number
288 : // of active elements which are stored on the calling processor.
289 : // We should not use n_active_elem for any allocation because that will
290 : // be inherently unscalable, but it can be useful for libmesh_assertions.
291 372 : dof_id_type n_active_elem=0;
292 :
293 : // Set up the vtxdist array. This will be the same on each processor.
294 : // ***** Consult the Parmetis documentation. *****
295 372 : libmesh_assert_equal_to (_pmetis->vtxdist.size(),
296 : cast_int<std::size_t>(mesh.n_processors()+1));
297 372 : libmesh_assert_equal_to (_pmetis->vtxdist[0], 0);
298 :
299 3040452 : for (auto pid : make_range(mesh.n_processors()))
300 : {
301 2782950 : _pmetis->vtxdist[pid+1] = _pmetis->vtxdist[pid] + _n_active_elem_on_proc[pid];
302 2781462 : n_active_elem += _n_active_elem_on_proc[pid];
303 : }
304 372 : libmesh_assert_equal_to (_pmetis->vtxdist.back(), static_cast<Parmetis::idx_t>(n_active_elem));
305 :
306 :
307 : // Maps active element ids into a contiguous range independent of partitioning.
308 : // (only needs local scope)
309 744 : std::unordered_map<dof_id_type, dof_id_type> global_index_map;
310 :
311 : {
312 744 : std::vector<dof_id_type> global_index;
313 :
314 : // create the unique mapping for all active elements independent of partitioning
315 : {
316 259362 : MeshBase::const_element_iterator it = mesh.active_elements_begin();
317 259362 : const MeshBase::const_element_iterator end = mesh.active_elements_end();
318 :
319 : // Calling this on all processors a unique range in [0,n_active_elem) is constructed.
320 : // Only the indices for the elements we pass in are returned in the array.
321 258990 : MeshCommunication().find_global_indices (mesh.comm(),
322 : bbox, it, end,
323 : global_index);
324 :
325 16809518 : for (dof_id_type cnt=0; it != end; ++it)
326 : {
327 16292282 : const Elem * elem = *it;
328 : // vectormap::count forces a sort, which is too expensive
329 : // in a loop
330 : // libmesh_assert (!global_index_map.count(elem->id()));
331 19684 : libmesh_assert_less (cnt, global_index.size());
332 19684 : libmesh_assert_less (global_index[cnt], n_active_elem);
333 :
334 16311978 : global_index_map.emplace(elem->id(), global_index[cnt++]);
335 : }
336 : }
337 : // really, shouldn't be close!
338 372 : libmesh_assert_less_equal (global_index_map.size(), n_active_elem);
339 372 : libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem);
340 :
341 : // At this point the two maps should be the same size. If they are not
342 : // then the number of active elements is not the same as the sum over all
343 : // processors of the number of active elements per processor, which means
344 : // there must be some unpartitioned objects out there.
345 258990 : libmesh_error_msg_if(global_index_map.size() != _global_index_by_pid_map.size(),
346 : "ERROR: ParmetisPartitioner cannot handle unpartitioned objects!");
347 : }
348 :
349 : // Finally, we need to initialize the vertex (partition) weights and the initial subdomain
350 : // mapping. The subdomain mapping will be independent of the processor mapping, and is
351 : // defined by a simple mapping of the global indices we just found.
352 : {
353 259362 : std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
354 :
355 259734 : const dof_id_type first_local_elem = _pmetis->vtxdist[mesh.processor_id()];
356 :
357 3040452 : for (auto pid : make_range(mesh.n_processors()))
358 : {
359 744 : dof_id_type tgt_subdomain_size = 0;
360 :
361 : // watch out for the case that n_subdomains < n_processors
362 2781462 : if (pid < static_cast<unsigned int>(_pmetis->nparts))
363 : {
364 1792140 : tgt_subdomain_size = n_active_elem/std::min
365 1792140 : (cast_int<Parmetis::idx_t>(mesh.n_processors()), _pmetis->nparts);
366 :
367 1791396 : if (pid < n_active_elem%_pmetis->nparts)
368 619849 : tgt_subdomain_size++;
369 : }
370 2781462 : if (pid == 0)
371 258990 : subdomain_bounds[0] = tgt_subdomain_size;
372 : else
373 2523216 : subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
374 : }
375 :
376 372 : libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem);
377 :
378 : Threads::parallel_for
379 258990 : (mesh.active_local_element_stored_range(),
380 775090 : [first_local_elem, n_active_elem, n_active_local_elem, this,
381 167370 : &global_index_map, &subdomain_bounds](const ConstElemRange & range)
382 : {
383 2940350 : for (const Elem * elem : range)
384 : {
385 : const dof_id_type global_index_by_pid =
386 2680304 : libmesh_map_find(_global_index_by_pid_map, elem->id());
387 11779 : libmesh_assert_less (global_index_by_pid, n_active_elem);
388 :
389 2680304 : const dof_id_type local_index =
390 23558 : global_index_by_pid - first_local_elem;
391 :
392 11779 : libmesh_assert_less (local_index, n_active_local_elem);
393 11779 : libmesh_assert_less (local_index, _pmetis->vwgt.size());
394 :
395 11779 : libmesh_ignore(n_active_elem); // unused outside dbg/devel
396 11779 : libmesh_ignore(n_active_local_elem); // unused outside dbg/devel
397 :
398 : // Spline nodes are a special case (storing all the
399 : // unconstrained DoFs in an IGA simulation), but in general
400 : // we'll try to distribute work by expecting it to be roughly
401 : // proportional to DoFs, which are roughly proportional to
402 : // nodes.
403 2680304 : if (elem->type() == NODEELEM &&
404 0 : elem->mapping_type() == RATIONAL_BERNSTEIN_MAP)
405 12789 : _pmetis->vwgt[local_index] = 50;
406 : else
407 2667515 : _pmetis->vwgt[local_index] = elem->n_nodes();
408 :
409 : // find the subdomain this element belongs in
410 : const dof_id_type global_index =
411 2680304 : libmesh_map_find(global_index_map, elem->id());
412 :
413 11779 : libmesh_assert_less (global_index, subdomain_bounds.back());
414 :
415 : const unsigned int subdomain_id =
416 : cast_int<unsigned int>
417 2668519 : (std::distance(subdomain_bounds.begin(),
418 : std::lower_bound(subdomain_bounds.begin(),
419 : subdomain_bounds.end(),
420 11779 : global_index)));
421 11779 : libmesh_assert_less (subdomain_id, _pmetis->nparts);
422 11779 : libmesh_assert_less (local_index, _pmetis->part.size());
423 :
424 2692089 : _pmetis->part[local_index] = subdomain_id;
425 : }
426 260046 : });
427 : }
428 258990 : }
429 :
430 :
431 :
432 258990 : void ParmetisPartitioner::build_graph (const MeshBase & mesh)
433 : {
434 744 : LOG_SCOPE("build_graph()", "ParmetisPartitioner");
435 :
436 : // build the graph in distributed CSR format. Note that
437 : // the edges in the graph will correspond to
438 : // face neighbors
439 372 : const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
440 :
441 258990 : Partitioner::build_graph(mesh);
442 :
443 372 : dof_id_type graph_size=0;
444 :
445 2939294 : for (auto & row: _dual_graph)
446 2692089 : graph_size += cast_int<dof_id_type>(row.size());
447 :
448 : // Reserve space in the adjacency array
449 258990 : _pmetis->xadj.clear();
450 258990 : _pmetis->xadj.reserve (n_active_local_elem + 1);
451 258990 : _pmetis->adjncy.clear();
452 258990 : _pmetis->adjncy.reserve (graph_size);
453 :
454 2939294 : for (auto & graph_row : _dual_graph)
455 : {
456 2692089 : _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
457 2680298 : _pmetis->adjncy.insert(_pmetis->adjncy.end(),
458 : graph_row.begin(),
459 47128 : graph_row.end());
460 : }
461 :
462 : // The end of the adjacency array for the last elem
463 259362 : _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
464 :
465 372 : libmesh_assert_equal_to (_pmetis->xadj.size(), n_active_local_elem+1);
466 372 : libmesh_assert_equal_to (_pmetis->adjncy.size(), graph_size);
467 258990 : }
468 :
469 : #endif // #ifdef LIBMESH_HAVE_PARMETIS
470 :
471 : } // namespace libMesh
|