Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 :
20 : // Local includes
21 : #include "libmesh/parmetis_partitioner.h"
22 :
23 : // libMesh includes
24 : #include "libmesh/libmesh_config.h"
25 : #include "libmesh/elem.h"
26 : #include "libmesh/enum_partitioner_type.h"
27 : #include "libmesh/libmesh_logging.h"
28 : #include "libmesh/mesh_base.h"
29 : #include "libmesh/mesh_serializer.h"
30 : #include "libmesh/mesh_tools.h"
31 : #include "libmesh/mesh_communication.h"
32 : #include "libmesh/metis_partitioner.h"
33 : #include "libmesh/parallel_only.h"
34 : #include "libmesh/parmetis_helper.h"
35 :
36 : // TIMPI includes
37 : #include "timpi/communicator.h" // also includes mpi.h
38 : #include "timpi/parallel_implementation.h" // for min()
39 :
40 : // Include the ParMETIS header file.
41 : #ifdef LIBMESH_HAVE_PARMETIS
42 :
43 : // Before we include a header wrapped in a namespace, we'd better make
44 : // sure none of its dependencies end up in that namespace
45 : #include <mpi.h>
46 :
47 : namespace Parmetis {
48 : extern "C" {
49 : # include "libmesh/ignore_warnings.h"
50 : # include "parmetis.h"
51 : # include "libmesh/restore_warnings.h"
52 : }
53 : }
54 :
55 : #endif
56 :
57 :
58 : // C++ includes
59 : #include <unordered_map>
60 :
61 :
62 : namespace libMesh
63 : {
64 :
65 : // Minimum elements on each processor required for us to choose
66 : // Parmetis over Metis.
67 : #ifdef LIBMESH_HAVE_PARMETIS
68 : const unsigned int MIN_ELEM_PER_PROC = 4;
69 : #endif
70 :
71 : // ------------------------------------------------------------
72 : // ParmetisPartitioner implementation
73 254277 : ParmetisPartitioner::ParmetisPartitioner()
74 : #ifdef LIBMESH_HAVE_PARMETIS
75 254531 : : _pmetis(std::make_unique<ParmetisHelper>())
76 : #endif
77 254277 : {}
78 :
79 :
80 :
81 16674 : ParmetisPartitioner::ParmetisPartitioner (const ParmetisPartitioner & other)
82 : : Partitioner(other)
83 : #ifdef LIBMESH_HAVE_PARMETIS
84 16674 : , _pmetis(std::make_unique<ParmetisHelper>(*(other._pmetis)))
85 : #endif
86 : {
87 16674 : }
88 :
89 :
90 :
91 540104 : ParmetisPartitioner::~ParmetisPartitioner() = default;
92 :
93 :
94 :
95 284 : PartitionerType ParmetisPartitioner::type() const
96 : {
97 284 : return PARMETIS_PARTITIONER;
98 : }
99 :
100 :
101 :
102 230450 : void ParmetisPartitioner::_do_partition (MeshBase & mesh,
103 : const unsigned int n_sbdmns)
104 : {
105 230450 : this->_do_repartition (mesh, n_sbdmns);
106 230450 : }
107 :
108 :
109 :
110 230450 : void ParmetisPartitioner::_do_repartition (MeshBase & mesh,
111 : const unsigned int n_sbdmns)
112 : {
113 : // This function must be run on all processors at once
114 348 : libmesh_parallel_only(mesh.comm());
115 :
116 : // Check for easy returns
117 230450 : if (!mesh.n_elem())
118 187634 : return;
119 :
120 230308 : if (n_sbdmns == 1)
121 : {
122 0 : this->single_partition(mesh);
123 0 : return;
124 : }
125 :
126 344 : libmesh_assert_greater (n_sbdmns, 0);
127 :
128 : // What to do if the Parmetis library IS NOT present
129 : #ifndef LIBMESH_HAVE_PARMETIS
130 :
131 : libmesh_do_once(
132 : libMesh::out << "ERROR: The library has been built without" << std::endl
133 : << "Parmetis support. Using a Metis" << std::endl
134 : << "partitioner instead!" << std::endl;);
135 :
136 : MetisPartitioner mp;
137 :
138 : // Metis and other fallbacks only work in serial, and need to get
139 : // handed element ranges from an already-serialized mesh.
140 : mesh.allgather();
141 :
142 : // Don't just call partition() here; that would end up calling
143 : // post-element-partitioning work redundantly (and at the moment
144 : // incorrectly)
145 : mp.partition_range (mesh, mesh.active_elements_begin(),
146 : mesh.active_elements_end(), n_sbdmns);
147 :
148 : // What to do if the Parmetis library IS present
149 : #else
150 :
151 : // Revert to METIS on one processor.
152 230652 : if (mesh.n_processors() == 1)
153 : {
154 : // Make sure the mesh knows it's serial
155 4 : mesh.allgather();
156 :
157 0 : MetisPartitioner mp;
158 : // Don't just call partition() here; that would end up calling
159 : // post-element-partitioning work redundantly (and at the moment
160 : // incorrectly)
161 8 : mp.partition_range (mesh, mesh.active_elements_begin(),
162 8 : mesh.active_elements_end(), n_sbdmns);
163 0 : return;
164 : }
165 :
166 344 : LOG_SCOPE("repartition()", "ParmetisPartitioner");
167 :
168 : // Initialize the data structures required by ParMETIS
169 230304 : this->initialize (mesh, n_sbdmns);
170 :
171 : // build the graph corresponding to the mesh
172 230304 : this->build_graph (mesh);
173 :
174 : // Make sure all processors have enough active local elements and
175 : // enough connectivity among them.
176 : // Parmetis tends to die when it's given only a couple elements
177 : // per partition or when it can't reach elements from each other.
178 : {
179 344 : bool ready_for_parmetis = true;
180 2703332 : for (const auto & nelem : _n_active_elem_on_proc)
181 2473028 : if (nelem < MIN_ELEM_PER_PROC)
182 138 : ready_for_parmetis = false;
183 :
184 230304 : std::size_t my_adjacency = _pmetis->adjncy.size();
185 230304 : mesh.comm().min(my_adjacency);
186 230304 : if (!my_adjacency)
187 12 : ready_for_parmetis = false;
188 :
189 : // Parmetis will not work unless each processor has some
190 : // elements. Specifically, it will abort when passed a nullptr
191 : // partition or adjacency array on *any* of the processors.
192 96362 : if (!ready_for_parmetis)
193 : {
194 : // FIXME: revert to METIS, although this requires a serial mesh
195 187644 : MeshSerializer serialize(mesh);
196 78 : MetisPartitioner mp;
197 187488 : mp.partition (mesh, n_sbdmns);
198 78 : return;
199 187332 : }
200 : }
201 :
202 :
203 : // Partition the graph
204 43614 : std::vector<Parmetis::idx_t> vsize(_pmetis->vwgt.size(), 1);
205 42816 : Parmetis::real_t itr = 1000000.0;
206 42816 : MPI_Comm mpi_comm = mesh.comm().get();
207 :
208 : // Call the ParMETIS adaptive repartitioning method. This respects the
209 : // original partitioning when computing the new partitioning so as to
210 : // minimize the required data redistribution.
211 385876 : Parmetis::ParMETIS_V3_AdaptiveRepart(_pmetis->vtxdist.empty() ? nullptr : _pmetis->vtxdist.data(),
212 532 : _pmetis->xadj.empty() ? nullptr : _pmetis->xadj.data(),
213 532 : _pmetis->adjncy.empty() ? nullptr : _pmetis->adjncy.data(),
214 532 : _pmetis->vwgt.empty() ? nullptr : _pmetis->vwgt.data(),
215 532 : vsize.empty() ? nullptr : vsize.data(),
216 : nullptr,
217 266 : &_pmetis->wgtflag,
218 266 : &_pmetis->numflag,
219 266 : &_pmetis->ncon,
220 266 : &_pmetis->nparts,
221 532 : _pmetis->tpwgts.empty() ? nullptr : _pmetis->tpwgts.data(),
222 532 : _pmetis->ubvec.empty() ? nullptr : _pmetis->ubvec.data(),
223 : &itr,
224 266 : _pmetis->options.data(),
225 266 : &_pmetis->edgecut,
226 532 : _pmetis->part.empty() ? nullptr : reinterpret_cast<Parmetis::idx_t *>(_pmetis->part.data()),
227 : &mpi_comm);
228 :
229 : // Assign the returned processor ids
230 43082 : this->assign_partitioning (mesh, _pmetis->part);
231 :
232 : #endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ...
233 :
234 : }
235 :
236 :
237 :
238 : // Only need to compile these methods if ParMETIS is present
239 : #ifdef LIBMESH_HAVE_PARMETIS
240 :
241 230304 : void ParmetisPartitioner::initialize (const MeshBase & mesh,
242 : const unsigned int n_sbdmns)
243 : {
244 688 : LOG_SCOPE("initialize()", "ParmetisPartitioner");
245 :
246 344 : const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
247 : // Set parameters.
248 230304 : _pmetis->wgtflag = 2; // weights on vertices only
249 230304 : _pmetis->ncon = 1; // one weight per vertex
250 230304 : _pmetis->numflag = 0; // C-style 0-based numbering
251 230304 : _pmetis->nparts = static_cast<Parmetis::idx_t>(n_sbdmns); // number of subdomains to create
252 230304 : _pmetis->edgecut = 0; // the numbers of edges cut by the
253 : // partition
254 :
255 : // Initialize data structures for ParMETIS
256 230648 : _pmetis->vtxdist.assign (mesh.n_processors()+1, 0);
257 230304 : _pmetis->tpwgts.assign (_pmetis->nparts, 1./_pmetis->nparts);
258 230304 : _pmetis->ubvec.assign (_pmetis->ncon, 1.05);
259 230304 : _pmetis->part.assign (n_active_local_elem, 0);
260 230304 : _pmetis->options.resize (5);
261 230304 : _pmetis->vwgt.resize (n_active_local_elem);
262 :
263 : // Set the options
264 230304 : _pmetis->options[0] = 1; // don't use default options
265 230304 : _pmetis->options[1] = 0; // default (level of timing)
266 230304 : _pmetis->options[2] = 15; // random seed (default)
267 230304 : _pmetis->options[3] = 2; // processor distribution and subdomain distribution are decoupled
268 :
269 : // ParMetis expects the elements to be numbered in contiguous blocks
270 : // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ...
271 : // Since we only partition active elements we should have no expectation
272 : // that we currently have such a distribution. So we need to create it.
273 : // Also, at the same time we are going to map all the active elements into a globally
274 : // unique range [0,n_active_elem) which is *independent* of the current partitioning.
275 : // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled
276 : // from the partitioning of the objects themselves). This allows us to get the same
277 : // resultant partitioning independent of the input partitioning.
278 : libMesh::BoundingBox bbox =
279 230304 : MeshTools::create_bounding_box(mesh);
280 :
281 230304 : _find_global_index_by_pid_map(mesh);
282 :
283 :
284 : // count the total number of active elements in the mesh. Note we cannot
285 : // use mesh.n_active_elem() in general since this only returns the number
286 : // of active elements which are stored on the calling processor.
287 : // We should not use n_active_elem for any allocation because that will
288 : // be inherently unscalable, but it can be useful for libmesh_assertions.
289 344 : dof_id_type n_active_elem=0;
290 :
291 : // Set up the vtxdist array. This will be the same on each processor.
292 : // ***** Consult the Parmetis documentation. *****
293 344 : libmesh_assert_equal_to (_pmetis->vtxdist.size(),
294 : cast_int<std::size_t>(mesh.n_processors()+1));
295 344 : libmesh_assert_equal_to (_pmetis->vtxdist[0], 0);
296 :
297 2703332 : for (auto pid : make_range(mesh.n_processors()))
298 : {
299 2474404 : _pmetis->vtxdist[pid+1] = _pmetis->vtxdist[pid] + _n_active_elem_on_proc[pid];
300 2473028 : n_active_elem += _n_active_elem_on_proc[pid];
301 : }
302 344 : libmesh_assert_equal_to (_pmetis->vtxdist.back(), static_cast<Parmetis::idx_t>(n_active_elem));
303 :
304 :
305 : // Maps active element ids into a contiguous range independent of partitioning.
306 : // (only needs local scope)
307 688 : std::unordered_map<dof_id_type, dof_id_type> global_index_map;
308 :
309 : {
310 688 : std::vector<dof_id_type> global_index;
311 :
312 : // create the unique mapping for all active elements independent of partitioning
313 : {
314 230648 : MeshBase::const_element_iterator it = mesh.active_elements_begin();
315 230648 : const MeshBase::const_element_iterator end = mesh.active_elements_end();
316 :
317 : // Calling this on all processors a unique range in [0,n_active_elem) is constructed.
318 : // Only the indices for the elements we pass in are returned in the array.
319 230304 : MeshCommunication().find_global_indices (mesh.comm(),
320 : bbox, it, end,
321 : global_index);
322 :
323 15583203 : for (dof_id_type cnt=0; it != end; ++it)
324 : {
325 15123283 : const Elem * elem = *it;
326 : // vectormap::count forces a sort, which is too expensive
327 : // in a loop
328 : // libmesh_assert (!global_index_map.count(elem->id()));
329 17840 : libmesh_assert_less (cnt, global_index.size());
330 17840 : libmesh_assert_less (global_index[cnt], n_active_elem);
331 :
332 15141135 : global_index_map.emplace(elem->id(), global_index[cnt++]);
333 : }
334 : }
335 : // really, shouldn't be close!
336 344 : libmesh_assert_less_equal (global_index_map.size(), n_active_elem);
337 344 : libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem);
338 :
339 : // At this point the two maps should be the same size. If they are not
340 : // then the number of active elements is not the same as the sum over all
341 : // processors of the number of active elements per processor, which means
342 : // there must be some unpartitioned objects out there.
343 230304 : libmesh_error_msg_if(global_index_map.size() != _global_index_by_pid_map.size(),
344 : "ERROR: ParmetisPartitioner cannot handle unpartitioned objects!");
345 : }
346 :
347 : // Finally, we need to initialize the vertex (partition) weights and the initial subdomain
348 : // mapping. The subdomain mapping will be independent of the processor mapping, and is
349 : // defined by a simple mapping of the global indices we just found.
350 : {
351 230992 : std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
352 :
353 230992 : const dof_id_type first_local_elem = _pmetis->vtxdist[mesh.processor_id()];
354 :
355 2703332 : for (auto pid : make_range(mesh.n_processors()))
356 : {
357 688 : dof_id_type tgt_subdomain_size = 0;
358 :
359 : // watch out for the case that n_subdomains < n_processors
360 2473028 : if (pid < static_cast<unsigned int>(_pmetis->nparts))
361 : {
362 1567389 : tgt_subdomain_size = n_active_elem/std::min
363 1567389 : (cast_int<Parmetis::idx_t>(mesh.n_processors()), _pmetis->nparts);
364 :
365 1566701 : if (pid < n_active_elem%_pmetis->nparts)
366 548290 : tgt_subdomain_size++;
367 : }
368 2473028 : if (pid == 0)
369 230304 : subdomain_bounds[0] = tgt_subdomain_size;
370 : else
371 2243412 : subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
372 : }
373 :
374 344 : libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem);
375 :
376 5428224 : for (const auto & elem : mesh.active_local_element_ptr_range())
377 : {
378 10503 : libmesh_assert (_global_index_by_pid_map.count(elem->id()));
379 : const dof_id_type global_index_by_pid =
380 2494486 : _global_index_by_pid_map[elem->id()];
381 10503 : libmesh_assert_less (global_index_by_pid, n_active_elem);
382 :
383 2494486 : const dof_id_type local_index =
384 10503 : global_index_by_pid - first_local_elem;
385 :
386 10503 : libmesh_assert_less (local_index, n_active_local_elem);
387 10503 : libmesh_assert_less (local_index, _pmetis->vwgt.size());
388 :
389 : // Spline nodes are a special case (storing all the
390 : // unconstrained DoFs in an IGA simulation), but in general
391 : // we'll try to distribute work by expecting it to be roughly
392 : // proportional to DoFs, which are roughly proportional to
393 : // nodes.
394 2494486 : if (elem->type() == NODEELEM &&
395 13881 : elem->mapping_type() == RATIONAL_BERNSTEIN_MAP)
396 12789 : _pmetis->vwgt[local_index] = 50;
397 : else
398 2481697 : _pmetis->vwgt[local_index] = elem->n_nodes();
399 :
400 : // find the subdomain this element belongs in
401 10503 : libmesh_assert (global_index_map.count(elem->id()));
402 : const dof_id_type global_index =
403 2494486 : global_index_map[elem->id()];
404 :
405 10503 : libmesh_assert_less (global_index, subdomain_bounds.back());
406 :
407 : const unsigned int subdomain_id =
408 : cast_int<unsigned int>
409 2483977 : (std::distance(subdomain_bounds.begin(),
410 : std::lower_bound(subdomain_bounds.begin(),
411 : subdomain_bounds.end(),
412 10503 : global_index)));
413 10503 : libmesh_assert_less (subdomain_id, _pmetis->nparts);
414 10503 : libmesh_assert_less (local_index, _pmetis->part.size());
415 :
416 2504995 : _pmetis->part[local_index] = subdomain_id;
417 229616 : }
418 : }
419 230304 : }
420 :
421 :
422 :
423 230304 : void ParmetisPartitioner::build_graph (const MeshBase & mesh)
424 : {
425 688 : LOG_SCOPE("build_graph()", "ParmetisPartitioner");
426 :
427 : // build the graph in distributed CSR format. Note that
428 : // the edges in the graph will correspond to
429 : // face neighbors
430 344 : const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
431 :
432 230304 : Partitioner::build_graph(mesh);
433 :
434 344 : dof_id_type graph_size=0;
435 :
436 2724790 : for (auto & row: _dual_graph)
437 2504995 : graph_size += cast_int<dof_id_type>(row.size());
438 :
439 : // Reserve space in the adjacency array
440 230304 : _pmetis->xadj.clear();
441 230304 : _pmetis->xadj.reserve (n_active_local_elem + 1);
442 230304 : _pmetis->adjncy.clear();
443 230304 : _pmetis->adjncy.reserve (graph_size);
444 :
445 2724790 : for (auto & graph_row : _dual_graph)
446 : {
447 2504995 : _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
448 2494480 : _pmetis->adjncy.insert(_pmetis->adjncy.end(),
449 : graph_row.begin(),
450 42024 : graph_row.end());
451 : }
452 :
453 : // The end of the adjacency array for the last elem
454 230648 : _pmetis->xadj.push_back(cast_int<int>(_pmetis->adjncy.size()));
455 :
456 344 : libmesh_assert_equal_to (_pmetis->xadj.size(), n_active_local_elem+1);
457 344 : libmesh_assert_equal_to (_pmetis->adjncy.size(), graph_size);
458 230304 : }
459 :
460 : #endif // #ifdef LIBMESH_HAVE_PARMETIS
461 :
462 : } // namespace libMesh
|