Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 :
20 : #include "libmesh/partitioner.h"
21 :
22 : // libMesh includes
23 : #include "libmesh/compare_elems_by_level.h"
24 : #include "libmesh/elem.h"
25 : #include "libmesh/enum_to_string.h"
26 : #include "libmesh/int_range.h"
27 : #include "libmesh/libmesh_logging.h"
28 : #include "libmesh/mesh_base.h"
29 : #include "libmesh/mesh_tools.h"
30 : #include "libmesh/mesh_communication.h"
31 : #include "libmesh/parallel_ghost_sync.h"
32 : #include "libmesh/wrapped_petsc.h"
33 : #include "libmesh/boundary_info.h"
34 :
35 : // Subclasses to build()
36 : #include "libmesh/enum_partitioner_type.h"
37 : #include "libmesh/centroid_partitioner.h"
38 : #include "libmesh/hilbert_sfc_partitioner.h"
39 : #include "libmesh/linear_partitioner.h"
40 : #include "libmesh/mapped_subdomain_partitioner.h"
41 : #include "libmesh/metis_partitioner.h"
42 : #include "libmesh/morton_sfc_partitioner.h"
43 : #include "libmesh/parmetis_partitioner.h"
44 : #include "libmesh/sfc_partitioner.h"
45 : #include "libmesh/subdomain_partitioner.h"
46 :
47 : // TIMPI includes
48 : #include "timpi/parallel_implementation.h"
49 : #include "timpi/parallel_sync.h"
50 :
51 : // C/C++ includes
52 : #ifdef LIBMESH_HAVE_PETSC
53 : #include "libmesh/ignore_warnings.h"
54 : #include "petscmat.h"
55 : #include "libmesh/restore_warnings.h"
56 : #include "libmesh/petsc_solver_exception.h"
57 : #endif
58 :
59 :
60 : namespace {
61 :
62 : using namespace libMesh;
63 :
64 : struct CorrectProcIds
65 : {
66 : typedef processor_id_type datum;
67 :
68 130 : CorrectProcIds(MeshBase & _mesh,
69 70944 : std::unordered_set<dof_id_type> & _bad_pids) :
70 70944 : mesh(_mesh), bad_pids(_bad_pids) {}
71 :
72 : MeshBase & mesh;
73 : std::unordered_set<dof_id_type> & bad_pids;
74 :
75 : // ------------------------------------------------------------
76 1208638 : void gather_data (const std::vector<dof_id_type> & ids,
77 : std::vector<datum> & data)
78 : {
79 : // Find the processor id of each requested node
80 1208638 : data.resize(ids.size());
81 :
82 117284566 : for (auto i : index_range(ids))
83 : {
84 : // Look for this point in the mesh and make sure we have a
85 : // good pid for it before sending that on
86 116150026 : if (ids[i] != DofObject::invalid_id &&
87 116060653 : !bad_pids.count(ids[i]))
88 : {
89 116040863 : Node & node = mesh.node_ref(ids[i]);
90 :
91 : // Return the node's correct processor id,
92 116040863 : data[i] = node.processor_id();
93 : }
94 : else
95 35065 : data[i] = DofObject::invalid_processor_id;
96 : }
97 1208638 : }
98 :
99 : // ------------------------------------------------------------
100 1208638 : bool act_on_data (const std::vector<dof_id_type> & ids,
101 : const std::vector<datum> proc_ids)
102 : {
103 384 : bool data_changed = false;
104 :
105 : // Set the ghost node processor ids we've now been informed of
106 117284566 : for (auto i : index_range(ids))
107 : {
108 116075928 : Node & node = mesh.node_ref(ids[i]);
109 :
110 74098 : processor_id_type & proc_id = node.processor_id();
111 116075928 : const processor_id_type new_proc_id = proc_ids[i];
112 :
113 116075928 : if (const auto it = bad_pids.find(ids[i]);
114 116075928 : (new_proc_id != proc_id || it != bad_pids.end()) &&
115 : new_proc_id != DofObject::invalid_processor_id)
116 : {
117 514923 : if (it != bad_pids.end())
118 : {
119 253468 : proc_id = new_proc_id;
120 0 : bad_pids.erase(it);
121 : }
122 : else
123 261455 : proc_id = node.choose_processor_id(proc_id, new_proc_id);
124 :
125 514923 : if (proc_id == new_proc_id)
126 0 : data_changed = true;
127 : }
128 : }
129 :
130 1208638 : return data_changed;
131 : }
132 : };
133 :
134 : }
135 :
136 : namespace libMesh
137 : {
138 :
139 :
140 :
141 : // ------------------------------------------------------------
142 : // Partitioner static data
143 : const dof_id_type Partitioner::communication_blocksize =
144 : dof_id_type(1000000);
145 :
146 :
147 :
148 : // ------------------------------------------------------------
149 : // Partitioner implementation
150 :
151 :
152 0 : PartitionerType Partitioner::type() const
153 : {
154 0 : return INVALID_PARTITIONER;
155 : }
156 :
157 :
158 : std::unique_ptr<Partitioner>
159 302994 : Partitioner::build (const PartitionerType partitioner_type)
160 : {
161 302994 : switch (partitioner_type)
162 : {
163 71 : case CENTROID_PARTITIONER:
164 71 : return std::make_unique<CentroidPartitioner>();
165 213 : case LINEAR_PARTITIONER:
166 213 : return std::make_unique<LinearPartitioner>();
167 0 : case MAPPED_SUBDOMAIN_PARTITIONER:
168 0 : return std::make_unique<MappedSubdomainPartitioner>();
169 49065 : case METIS_PARTITIONER:
170 49065 : return std::make_unique<MetisPartitioner>();
171 253432 : case PARMETIS_PARTITIONER:
172 253432 : return std::make_unique<ParmetisPartitioner>();
173 71 : case HILBERT_SFC_PARTITIONER:
174 71 : return std::make_unique<HilbertSFCPartitioner>();
175 71 : case MORTON_SFC_PARTITIONER:
176 71 : return std::make_unique<MortonSFCPartitioner>();
177 71 : case SFC_PARTITIONER:
178 71 : return std::make_unique<SFCPartitioner>();
179 0 : case SUBDOMAIN_PARTITIONER:
180 0 : return std::make_unique<SubdomainPartitioner>();
181 0 : default:
182 0 : libmesh_error_msg("Invalid partitioner type: " <<
183 : Utility::enum_to_string(partitioner_type));
184 : }
185 : }
186 :
187 :
188 :
189 0 : void Partitioner::partition (MeshBase & mesh)
190 : {
191 0 : this->partition(mesh,mesh.n_processors());
192 0 : }
193 :
194 :
195 :
196 574482 : void Partitioner::partition (MeshBase & mesh,
197 : const unsigned int n)
198 : {
199 11836 : libmesh_parallel_only(mesh.comm());
200 :
201 : // BSK - temporary fix while redistribution is integrated 6/26/2008
202 : // Uncomment this to not repartition in parallel
203 : // if (!mesh.is_serial())
204 : // return;
205 :
206 : // we cannot partition into more pieces than we have
207 : // active elements!
208 : const unsigned int n_parts =
209 : static_cast<unsigned int>
210 574482 : (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
211 :
212 : // Set the number of partitions in the mesh
213 574482 : mesh.set_n_partitions()=n_parts;
214 :
215 574482 : if (n_parts == 1)
216 : {
217 106486 : this->single_partition (mesh);
218 106486 : return;
219 : }
220 :
221 : // First assign a temporary partitioning to any unpartitioned elements
222 467996 : Partitioner::partition_unpartitioned_elements(mesh, n_parts);
223 :
224 : // Call the partitioning function
225 467996 : this->_do_partition(mesh,n_parts);
226 :
227 : // Set the parent's processor ids
228 467996 : Partitioner::set_parent_processor_ids(mesh);
229 :
230 : // Redistribute elements if necessary, before setting node processor
231 : // ids, to make sure those will be set consistently
232 467996 : mesh.redistribute();
233 :
234 : #ifdef DEBUG
235 8596 : MeshTools::libmesh_assert_valid_remote_elems(mesh);
236 :
237 : // Messed up elem processor_id()s can leave us without the child
238 : // elements we need to restrict vectors on a distributed mesh
239 8596 : MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
240 : #endif
241 :
242 : // Set the node's processor ids
243 467996 : Partitioner::set_node_processor_ids(mesh);
244 :
245 : #ifdef DEBUG
246 8596 : MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
247 : #endif
248 :
249 : // Give derived Mesh classes a chance to update any cached data to
250 : // reflect the new partitioning
251 467996 : mesh.update_post_partitioning();
252 : }
253 :
254 :
255 :
256 0 : void Partitioner::repartition (MeshBase & mesh)
257 : {
258 0 : this->repartition(mesh,mesh.n_processors());
259 0 : }
260 :
261 :
262 :
263 0 : void Partitioner::repartition (MeshBase & mesh,
264 : const unsigned int n)
265 : {
266 : // we cannot partition into more pieces than we have
267 : // active elements!
268 : const unsigned int n_parts =
269 : static_cast<unsigned int>
270 0 : (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
271 :
272 : // Set the number of partitions in the mesh
273 0 : mesh.set_n_partitions()=n_parts;
274 :
275 0 : if (n_parts == 1)
276 : {
277 0 : this->single_partition (mesh);
278 0 : return;
279 : }
280 :
281 : // First assign a temporary partitioning to any unpartitioned elements
282 0 : Partitioner::partition_unpartitioned_elements(mesh, n_parts);
283 :
284 : // Call the partitioning function
285 0 : this->_do_repartition(mesh,n_parts);
286 :
287 : // Set the parent's processor ids
288 0 : Partitioner::set_parent_processor_ids(mesh);
289 :
290 : // Set the node's processor ids
291 0 : Partitioner::set_node_processor_ids(mesh);
292 : }
293 :
294 :
295 :
296 :
297 :
298 106486 : bool Partitioner::single_partition (MeshBase & mesh)
299 : {
300 : bool changed_pid =
301 209732 : this->single_partition_range(mesh.elements_begin(),
302 312978 : mesh.elements_end());
303 :
304 : // If we have a distributed mesh with an empty rank (or where rank
305 : // 0 has only its own component of a disconnected mesh, I guess),
306 : // that rank might need to be informed of a change.
307 106486 : mesh.comm().max(changed_pid);
308 :
309 : // We may need to redistribute, in case someone (like our unit
310 : // tests) is doing something silly (like moving a whole
311 : // already-distributed mesh back onto rank 0).
312 106486 : if (changed_pid)
313 99532 : mesh.redistribute();
314 :
315 106486 : return changed_pid;
316 : }
317 :
318 :
319 :
320 106486 : bool Partitioner::single_partition_range (MeshBase::element_iterator it,
321 : MeshBase::element_iterator end)
322 : {
323 3240 : LOG_SCOPE("single_partition_range()", "Partitioner");
324 :
325 3240 : bool changed_pid = false;
326 :
327 2713442 : for (auto & elem : as_range(it, end))
328 : {
329 1258252 : if (elem->processor_id())
330 5080 : changed_pid = true;
331 1258252 : elem->processor_id() = 0;
332 :
333 : // Assign all this element's nodes to processor 0 as well.
334 10950055 : for (Node & node : elem->node_ref_range())
335 : {
336 9691803 : if (node.processor_id())
337 28898 : changed_pid = true;
338 9691803 : node.processor_id() = 0;
339 : }
340 100006 : }
341 :
342 109726 : return changed_pid;
343 : }
344 :
345 0 : void Partitioner::partition_unpartitioned_elements (MeshBase & mesh)
346 : {
347 0 : Partitioner::partition_unpartitioned_elements(mesh, mesh.n_processors());
348 0 : }
349 :
350 :
351 :
352 467996 : void Partitioner::partition_unpartitioned_elements (MeshBase & mesh,
353 : const unsigned int n_subdomains)
354 : {
355 467996 : MeshBase::element_iterator it = mesh.unpartitioned_elements_begin();
356 467996 : const MeshBase::element_iterator end = mesh.unpartitioned_elements_end();
357 :
358 927396 : const dof_id_type n_unpartitioned_elements = MeshTools::n_elem (it, end);
359 :
360 : // the unpartitioned elements must exist on all processors. If the range is empty on one
361 : // it is empty on all, and we can quit right here.
362 467996 : if (!n_unpartitioned_elements)
363 2832 : return;
364 :
365 : // find the target subdomain sizes
366 208014 : std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
367 :
368 2162796 : for (auto pid : make_range(mesh.n_processors()))
369 : {
370 11528 : dof_id_type tgt_subdomain_size = 0;
371 :
372 : // watch out for the case that n_subdomains < n_processors
373 1966310 : if (pid < n_subdomains)
374 : {
375 1113685 : tgt_subdomain_size = n_unpartitioned_elements/n_subdomains;
376 :
377 1113685 : if (pid < n_unpartitioned_elements%n_subdomains)
378 327145 : tgt_subdomain_size++;
379 :
380 : }
381 :
382 : //libMesh::out << "pid, #= " << pid << ", " << tgt_subdomain_size << std::endl;
383 1966310 : if (pid == 0)
384 196486 : subdomain_bounds[0] = tgt_subdomain_size;
385 : else
386 1781352 : subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
387 : }
388 :
389 5764 : libmesh_assert_equal_to (subdomain_bounds.back(), n_unpartitioned_elements);
390 :
391 : // create the unique mapping for all unpartitioned elements independent of partitioning
392 : // determine the global indexing for all the unpartitioned elements
393 11528 : std::vector<dof_id_type> global_indices;
394 :
395 : // Calling this on all processors a unique range in [0,n_unpartitioned_elements) is constructed.
396 : // Only the indices for the elements we pass in are returned in the array.
397 196486 : MeshCommunication().find_global_indices (mesh.comm(),
398 196486 : MeshTools::create_bounding_box(mesh), it, end,
399 : global_indices);
400 :
401 5764 : dof_id_type cnt=0;
402 12430784 : for (auto & elem : as_range(it, end))
403 : {
404 357974 : libmesh_assert_less (cnt, global_indices.size());
405 : const dof_id_type global_index =
406 12401550 : global_indices[cnt++];
407 :
408 357974 : libmesh_assert_less (global_index, subdomain_bounds.back());
409 357974 : libmesh_assert_less (global_index, n_unpartitioned_elements);
410 :
411 : const processor_id_type subdomain_id =
412 : cast_int<processor_id_type>
413 11685602 : (std::distance(subdomain_bounds.begin(),
414 : std::upper_bound(subdomain_bounds.begin(),
415 : subdomain_bounds.end(),
416 357974 : global_index)));
417 357974 : libmesh_assert_less (subdomain_id, n_subdomains);
418 :
419 12043576 : elem->processor_id() = subdomain_id;
420 : //libMesh::out << "assigning " << global_index << " to " << subdomain_id << std::endl;
421 184958 : }
422 : }
423 :
424 :
425 :
426 467996 : void Partitioner::set_parent_processor_ids(MeshBase & mesh)
427 : {
428 : // Ignore the parameter when !LIBMESH_ENABLE_AMR
429 8596 : libmesh_ignore(mesh);
430 :
431 17192 : LOG_SCOPE("set_parent_processor_ids()", "Partitioner");
432 :
433 : #ifdef LIBMESH_ENABLE_AMR
434 :
435 : // If the mesh is serial we have access to all the elements,
436 : // in particular all the active ones. We can therefore set
437 : // the parent processor ids indirectly through their children, and
438 : // set the subactive processor ids while examining their active
439 : // ancestors.
440 : // By convention a parent is assigned to the minimum processor
441 : // of all its children, and a subactive is assigned to the processor
442 : // of its active ancestor.
443 467996 : if (mesh.is_serial())
444 : {
445 44989100 : for (auto & elem : mesh.active_element_ptr_range())
446 : {
447 : // First set descendents
448 2132096 : std::vector<Elem *> subactive_family;
449 23156474 : elem->total_family_tree(subactive_family);
450 47371564 : for (const auto & f : subactive_family)
451 24215090 : f->processor_id() = elem->processor_id();
452 :
453 : // Then set ancestors
454 23156474 : Elem * parent = elem->parent();
455 :
456 52458616 : while (parent)
457 : {
458 : // invalidate the parent id, otherwise the min below
459 : // will not work if the current parent id is less
460 : // than all the children!
461 1892666 : parent->invalidate_processor_id();
462 :
463 153124364 : for (auto & child : parent->child_ref_range())
464 : {
465 9133660 : libmesh_assert(!child.is_remote());
466 9133660 : libmesh_assert_not_equal_to (child.processor_id(), DofObject::invalid_processor_id);
467 123822222 : parent->processor_id() = std::min(parent->processor_id(),
468 9133660 : child.processor_id());
469 : }
470 3785356 : parent = parent->parent();
471 : }
472 391405 : }
473 : }
474 :
475 : // When the mesh is parallel we cannot guarantee that parents have access to
476 : // all their children.
477 : else
478 : {
479 : // Setting subactive processor ids is easy: we can guarantee
480 : // that children have access to all their parents.
481 :
482 : // Loop over all the active elements in the mesh
483 9573948 : for (auto & child : mesh.active_element_ptr_range())
484 : {
485 19772 : std::vector<Elem *> subactive_family;
486 4737287 : child->total_family_tree(subactive_family);
487 9477883 : for (const auto & f : subactive_family)
488 4740596 : f->processor_id() = child->processor_id();
489 59399 : }
490 :
491 : // When the mesh is parallel we cannot guarantee that parents have access to
492 : // all their children.
493 :
494 : // We will use a brute-force approach here. Each processor finds its parent
495 : // elements and sets the parent pid to the minimum of its
496 : // semilocal descendants.
497 : // A global reduction is then performed to make sure the true minimum is found.
498 : // As noted, this is required because we cannot guarantee that a parent has
499 : // access to all its children on any single processor.
500 116 : libmesh_parallel_only(mesh.comm());
501 116 : libmesh_assert(MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
502 : mesh.unpartitioned_elements_end()) == 0);
503 :
504 59631 : const dof_id_type max_elem_id = mesh.max_elem_id();
505 :
506 : std::vector<processor_id_type>
507 232 : parent_processor_ids (std::min(communication_blocksize,
508 59747 : max_elem_id));
509 :
510 119262 : for (dof_id_type blk=0, last_elem_id=0; last_elem_id<max_elem_id; blk++)
511 : {
512 59631 : last_elem_id =
513 59747 : std::min(static_cast<dof_id_type>((blk+1)*communication_blocksize),
514 116 : max_elem_id);
515 59631 : const dof_id_type first_elem_id = blk*communication_blocksize;
516 :
517 116 : std::fill (parent_processor_ids.begin(),
518 : parent_processor_ids.end(),
519 : DofObject::invalid_processor_id);
520 :
521 : // first build up local contributions to parent_processor_ids
522 116 : bool have_parent_in_block = false;
523 :
524 119818 : for (auto & parent : as_range(mesh.ancestor_elements_begin(),
525 1552226 : mesh.ancestor_elements_end()))
526 : {
527 657303 : const dof_id_type parent_idx = parent->id();
528 672 : libmesh_assert_less (parent_idx, max_elem_id);
529 :
530 657975 : if ((parent_idx >= first_elem_id) &&
531 657303 : (parent_idx < last_elem_id))
532 : {
533 672 : have_parent_in_block = true;
534 657303 : processor_id_type parent_pid = DofObject::invalid_processor_id;
535 :
536 672 : std::vector<const Elem *> active_family;
537 657303 : parent->active_family_tree(active_family);
538 7656314 : for (const auto & f : active_family)
539 7748505 : parent_pid = std::min (parent_pid, f->processor_id());
540 :
541 657303 : const dof_id_type packed_idx = parent_idx - first_elem_id;
542 672 : libmesh_assert_less (packed_idx, parent_processor_ids.size());
543 :
544 657975 : parent_processor_ids[packed_idx] = parent_pid;
545 : }
546 59399 : }
547 :
548 : // then find the global minimum
549 59631 : mesh.comm().min (parent_processor_ids);
550 :
551 : // and assign the ids, if we have a parent in this block.
552 59631 : if (have_parent_in_block)
553 43744 : for (auto & parent : as_range(mesh.ancestor_elements_begin(),
554 1400078 : mesh.ancestor_elements_end()))
555 : {
556 657303 : const dof_id_type parent_idx = parent->id();
557 :
558 657975 : if ((parent_idx >= first_elem_id) &&
559 656631 : (parent_idx < last_elem_id))
560 : {
561 657303 : const dof_id_type packed_idx = parent_idx - first_elem_id;
562 672 : libmesh_assert_less (packed_idx, parent_processor_ids.size());
563 :
564 : const processor_id_type parent_pid =
565 657303 : parent_processor_ids[packed_idx];
566 :
567 672 : libmesh_assert_not_equal_to (parent_pid, DofObject::invalid_processor_id);
568 :
569 657303 : parent->processor_id() = parent_pid;
570 : }
571 21476 : }
572 : }
573 : }
574 :
575 : #endif // LIBMESH_ENABLE_AMR
576 467996 : }
577 :
578 : void
579 0 : Partitioner::processor_pairs_to_interface_nodes(MeshBase & mesh,
580 : std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> & processor_pair_to_nodes)
581 : {
582 : // This function must be run on all processors at once
583 0 : libmesh_parallel_only(mesh.comm());
584 :
585 0 : processor_pair_to_nodes.clear();
586 :
587 0 : std::set<dof_id_type> mynodes;
588 0 : std::set<dof_id_type> neighbor_nodes;
589 0 : std::vector<dof_id_type> common_nodes;
590 :
591 : // Loop over all the active elements
592 0 : for (auto & elem : mesh.active_element_ptr_range())
593 : {
594 0 : libmesh_assert(elem);
595 :
596 0 : libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
597 :
598 0 : auto n_nodes = elem->n_nodes();
599 :
600 : // prepare data for this element
601 0 : mynodes.clear();
602 0 : neighbor_nodes.clear();
603 0 : common_nodes.clear();
604 :
605 0 : for (unsigned int inode = 0; inode < n_nodes; inode++)
606 0 : mynodes.insert(elem->node_id(inode));
607 :
608 0 : for (auto i : elem->side_index_range())
609 : {
610 0 : auto neigh = elem->neighbor_ptr(i);
611 0 : if (neigh && !neigh->is_remote() && neigh->processor_id() != elem->processor_id())
612 : {
613 0 : neighbor_nodes.clear();
614 0 : common_nodes.clear();
615 0 : auto neigh_n_nodes = neigh->n_nodes();
616 0 : for (unsigned int inode = 0; inode < neigh_n_nodes; inode++)
617 0 : neighbor_nodes.insert(neigh->node_id(inode));
618 :
619 0 : std::set_intersection(mynodes.begin(), mynodes.end(),
620 : neighbor_nodes.begin(), neighbor_nodes.end(),
621 0 : std::back_inserter(common_nodes));
622 :
623 0 : auto & map_set = processor_pair_to_nodes[std::make_pair(std::min(elem->processor_id(), neigh->processor_id()),
624 0 : std::max(elem->processor_id(), neigh->processor_id()))];
625 0 : for (auto global_node_id : common_nodes)
626 0 : map_set.insert(global_node_id);
627 : }
628 : }
629 0 : }
630 0 : }
631 :
632 0 : void Partitioner::set_interface_node_processor_ids_linear(MeshBase & mesh)
633 : {
634 : // This function must be run on all processors at once
635 0 : libmesh_parallel_only(mesh.comm());
636 :
637 0 : std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
638 :
639 0 : processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
640 :
641 0 : for (auto & pmap : processor_pair_to_nodes)
642 : {
643 0 : std::size_t n_own_nodes = pmap.second.size()/2, i = 0;
644 :
645 0 : for (dof_id_type id : pmap.second)
646 : {
647 0 : auto & node = mesh.node_ref(id);
648 0 : if (i <= n_own_nodes)
649 0 : node.processor_id() = pmap.first.first;
650 : else
651 0 : node.processor_id() = pmap.first.second;
652 0 : i++;
653 : }
654 : }
655 0 : }
656 :
657 0 : void Partitioner::set_interface_node_processor_ids_BFS(MeshBase & mesh)
658 : {
659 : // This function must be run on all processors at once
660 0 : libmesh_parallel_only(mesh.comm());
661 :
662 : // I see occasional consistency failures when using this on a
663 : // distributed mesh
664 : libmesh_experimental();
665 :
666 0 : std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
667 :
668 0 : processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
669 :
670 0 : std::unordered_map<dof_id_type, std::vector<const Elem *>> nodes_to_elem_map;
671 :
672 0 : MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
673 :
674 0 : std::vector<const Node *> neighbors;
675 0 : std::set<dof_id_type> neighbors_order;
676 0 : std::vector<dof_id_type> common_nodes;
677 0 : std::queue<dof_id_type> nodes_queue;
678 0 : std::set<dof_id_type> visted_nodes;
679 :
680 0 : for (auto & pmap : processor_pair_to_nodes)
681 : {
682 0 : std::size_t n_own_nodes = pmap.second.size()/2;
683 :
684 : // Initialize node assignment
685 0 : for (dof_id_type id : pmap.second)
686 0 : mesh.node_ref(id).processor_id() = pmap.first.second;
687 :
688 0 : visted_nodes.clear();
689 0 : for (dof_id_type id : pmap.second)
690 : {
691 0 : mesh.node_ref(id).processor_id() = pmap.first.second;
692 :
693 0 : if (visted_nodes.count(id))
694 0 : continue;
695 : else
696 : {
697 0 : nodes_queue.push(id);
698 0 : visted_nodes.insert(id);
699 0 : if (visted_nodes.size() >= n_own_nodes)
700 0 : break;
701 : }
702 :
703 0 : while (!nodes_queue.empty())
704 : {
705 0 : auto & node = mesh.node_ref(nodes_queue.front());
706 0 : nodes_queue.pop();
707 :
708 0 : neighbors.clear();
709 0 : MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
710 0 : neighbors_order.clear();
711 0 : for (auto & neighbor : neighbors)
712 0 : neighbors_order.insert(neighbor->id());
713 :
714 0 : common_nodes.clear();
715 0 : std::set_intersection(pmap.second.begin(), pmap.second.end(),
716 : neighbors_order.begin(), neighbors_order.end(),
717 0 : std::back_inserter(common_nodes));
718 :
719 0 : for (auto c_node : common_nodes)
720 0 : if (!visted_nodes.count(c_node))
721 : {
722 0 : nodes_queue.push(c_node);
723 0 : visted_nodes.insert(c_node);
724 0 : if (visted_nodes.size() >= n_own_nodes)
725 0 : goto queue_done;
726 : }
727 :
728 0 : if (visted_nodes.size() >= n_own_nodes)
729 0 : goto queue_done;
730 : }
731 : }
732 0 : queue_done:
733 0 : for (auto node : visted_nodes)
734 0 : mesh.node_ref(node).processor_id() = pmap.first.first;
735 : }
736 0 : }
737 :
738 0 : void Partitioner::set_interface_node_processor_ids_petscpartitioner(MeshBase & mesh)
739 : {
740 0 : libmesh_ignore(mesh); // Only used if LIBMESH_HAVE_PETSC
741 :
742 : // This function must be run on all processors at once
743 0 : libmesh_parallel_only(mesh.comm());
744 :
745 : #if LIBMESH_HAVE_PETSC
746 0 : std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
747 :
748 0 : processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
749 :
750 0 : std::vector<std::vector<const Elem *>> nodes_to_elem_map;
751 :
752 0 : MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
753 :
754 0 : std::vector<const Node *> neighbors;
755 0 : std::set<dof_id_type> neighbors_order;
756 0 : std::vector<dof_id_type> common_nodes;
757 :
758 0 : std::vector<dof_id_type> rows;
759 0 : std::vector<dof_id_type> cols;
760 :
761 0 : std::map<dof_id_type, dof_id_type> global_to_local;
762 :
763 0 : for (auto & pmap : processor_pair_to_nodes)
764 : {
765 0 : unsigned int i = 0;
766 :
767 0 : rows.clear();
768 0 : rows.resize(pmap.second.size()+1);
769 0 : cols.clear();
770 0 : for (dof_id_type id : pmap.second)
771 0 : global_to_local[id] = i++;
772 :
773 0 : i = 0;
774 0 : for (auto id : pmap.second)
775 : {
776 0 : auto & node = mesh.node_ref(id);
777 0 : neighbors.clear();
778 0 : MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
779 0 : neighbors_order.clear();
780 0 : for (auto & neighbor : neighbors)
781 0 : neighbors_order.insert(neighbor->id());
782 :
783 0 : common_nodes.clear();
784 0 : std::set_intersection(pmap.second.begin(), pmap.second.end(),
785 : neighbors_order.begin(), neighbors_order.end(),
786 0 : std::back_inserter(common_nodes));
787 :
788 0 : rows[i+1] = rows[i] + cast_int<dof_id_type>(common_nodes.size());
789 :
790 0 : for (auto c_node : common_nodes)
791 0 : cols.push_back(global_to_local[c_node]);
792 :
793 0 : i++;
794 : }
795 :
796 : // Next we construct an IS from a MatPartitioning
797 0 : WrappedPetsc<IS> is;
798 : {
799 : PetscInt *adj_i, *adj_j;
800 0 : LibmeshPetscCall2(mesh.comm(), PetscCalloc1(rows.size(), &adj_i));
801 0 : LibmeshPetscCall2(mesh.comm(), PetscCalloc1(cols.size(), &adj_j));
802 0 : PetscInt rows_size = cast_int<PetscInt>(rows.size());
803 0 : for (PetscInt ii=0; ii<rows_size; ii++)
804 0 : adj_i[ii] = rows[ii];
805 :
806 0 : PetscInt cols_size = cast_int<PetscInt>(cols.size());
807 0 : for (PetscInt ii=0; ii<cols_size; ii++)
808 0 : adj_j[ii] = cols[ii];
809 :
810 0 : const PetscInt sz = cast_int<PetscInt>(pmap.second.size());
811 :
812 : // Create sparse matrix representing an adjacency list
813 0 : WrappedPetsc<Mat> adj;
814 0 : LibmeshPetscCall2(mesh.comm(), MatCreateMPIAdj(PETSC_COMM_SELF, sz, sz, adj_i, adj_j, nullptr, adj.get()));
815 :
816 : // Create MatPartitioning object
817 0 : WrappedPetsc<MatPartitioning> part;
818 0 : LibmeshPetscCall2(mesh.comm(), MatPartitioningCreate(PETSC_COMM_SELF, part.get()));
819 :
820 : // Apply MatPartitioning, storing results in "is"
821 0 : LibmeshPetscCall2(mesh.comm(), MatPartitioningSetAdjacency(part, adj));
822 0 : LibmeshPetscCall2(mesh.comm(), MatPartitioningSetNParts(part, 2));
823 0 : LibmeshPetscCall2(mesh.comm(), PetscObjectSetOptionsPrefix((PetscObject)(*part), "balance_"));
824 0 : LibmeshPetscCall2(mesh.comm(), MatPartitioningSetFromOptions(part));
825 0 : LibmeshPetscCall2(mesh.comm(), MatPartitioningApply(part, is.get()));
826 : }
827 :
828 : PetscInt local_size;
829 : const PetscInt *indices;
830 0 : LibmeshPetscCall2(mesh.comm(), ISGetLocalSize(is, &local_size));
831 0 : LibmeshPetscCall2(mesh.comm(), ISGetIndices(is, &indices));
832 :
833 0 : i = 0;
834 0 : for (auto id : pmap.second)
835 : {
836 0 : auto & node = mesh.node_ref(id);
837 0 : if (indices[i])
838 0 : node.processor_id() = pmap.first.second;
839 : else
840 0 : node.processor_id() = pmap.first.first;
841 :
842 0 : i++;
843 : }
844 0 : LibmeshPetscCall2(mesh.comm(), ISRestoreIndices(is, &indices));
845 : }
846 : #else
847 0 : libmesh_error_msg("PETSc is required");
848 : #endif
849 0 : }
850 :
851 :
852 493936 : void Partitioner::set_node_processor_ids(MeshBase & mesh)
853 : {
854 20392 : LOG_SCOPE("set_node_processor_ids()","Partitioner");
855 :
856 : // This function must be run on all processors at once
857 10196 : libmesh_parallel_only(mesh.comm());
858 :
859 : // If we have any unpartitioned elements at this
860 : // stage there is a problem
861 10196 : libmesh_assert (MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
862 : mesh.unpartitioned_elements_end()) == 0);
863 :
864 : // Start from scratch here: nodes we used to own may not be
865 : // eligible for us to own any more.
866 77490756 : for (auto & node : mesh.node_ptr_range())
867 : {
868 76513080 : node->processor_id() = DofObject::invalid_processor_id;
869 473544 : }
870 :
871 : // Loop over all the active elements
872 67318690 : for (auto & elem : mesh.active_element_ptr_range())
873 : {
874 1669900 : libmesh_assert(elem);
875 :
876 1669900 : libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
877 :
878 : // Consider updating the processor id on this element's nodes
879 295315318 : for (Node & node : elem->node_ref_range())
880 : {
881 11830293 : processor_id_type & pid = node.processor_id();
882 258805005 : pid = node.choose_processor_id(pid, elem->processor_id());
883 : }
884 473544 : }
885 :
886 : // How we finish off the node partitioning depends on our command
887 : // line options.
888 :
889 : const bool load_balanced_nodes_linear =
890 493936 : libMesh::on_command_line ("--load-balanced-nodes-linear");
891 :
892 : const bool load_balanced_nodes_bfs =
893 493936 : libMesh::on_command_line ("--load-balanced-nodes-bfs");
894 :
895 : const bool load_balanced_nodes_petscpartition =
896 493936 : libMesh::on_command_line ("--load-balanced-nodes-petscpartitioner");
897 :
898 493936 : unsigned int n_load_balance_options = load_balanced_nodes_linear;
899 493936 : n_load_balance_options += load_balanced_nodes_bfs;
900 493936 : n_load_balance_options += load_balanced_nodes_petscpartition;
901 493936 : libmesh_error_msg_if(n_load_balance_options > 1,
902 : "Cannot perform more than one load balancing type at a time");
903 :
904 493936 : if (load_balanced_nodes_linear)
905 0 : set_interface_node_processor_ids_linear(mesh);
906 493936 : else if (load_balanced_nodes_bfs)
907 0 : set_interface_node_processor_ids_BFS(mesh);
908 493936 : else if (load_balanced_nodes_petscpartition)
909 0 : set_interface_node_processor_ids_petscpartitioner(mesh);
910 :
911 : // Node balancing algorithm will response to assign owned nodes.
912 : // We still need to sync PIDs
913 : {
914 : // For inactive elements, we will have already gotten most of
915 : // these nodes, *except* for the case of a parent with a subset
916 : // of active descendants which are remote elements. In that
917 : // case some of the parent nodes will not have been properly
918 : // handled yet on our processor.
919 : //
920 : // We don't want to inadvertently give one of them an incorrect
921 : // processor id, but if we're not in serial then we have to
922 : // assign them temporary pids to make querying work, so we'll
923 : // save our *valid* pids before assigning temporaries.
924 : //
925 : // Even in serial we'll want to check and make sure we're not
926 : // overwriting valid active node pids with pids from subactive
927 : // elements.
928 20392 : std::unordered_set<dof_id_type> bad_pids;
929 :
930 147608618 : for (auto & node : mesh.node_ptr_range())
931 76513080 : if (node->processor_id() == DofObject::invalid_processor_id)
932 1562221 : bad_pids.insert(node->id());
933 :
934 : // If we assign our temporary ids by looping from finer elements
935 : // to coarser elements, we'll always get an id from the finest
936 : // ghost element we can see, which will usually be "closer" to
937 : // the true processor we want to query and so will reduce query
938 : // cycles that don't reach that processor.
939 :
940 : // But we can still end up with a query cycle that dead-ends, so
941 : // we need to prepare a "push" communication step here.
942 :
943 493936 : const bool is_serial = mesh.is_serial();
944 : std::unordered_map
945 : <processor_id_type,
946 : std::unordered_map<dof_id_type, processor_id_type>>
947 20392 : potential_pids;
948 :
949 493936 : const unsigned int n_levels = MeshTools::n_levels(mesh);
950 1172111 : for (unsigned int level = n_levels; level > 0; --level)
951 : {
952 3367484 : for (auto & elem : as_range(mesh.level_elements_begin(level-1),
953 83218324 : mesh.level_elements_end(level-1)))
954 : {
955 2028952 : libmesh_assert_not_equal_to (elem->processor_id(),
956 : DofObject::invalid_processor_id);
957 :
958 41285110 : const processor_id_type elem_pid = elem->processor_id();
959 :
960 : // Consider updating the processor id on this element's nodes
961 327134044 : for (Node & node : elem->node_ref_range())
962 : {
963 13548801 : processor_id_type & pid = node.processor_id();
964 285848934 : if (bad_pids.count(node.id()))
965 3945556 : pid = node.choose_processor_id(pid, elem_pid);
966 281903378 : else if (!is_serial)
967 52937350 : potential_pids[elem_pid][node.id()] = pid;
968 : }
969 642539 : }
970 : }
971 :
972 493936 : if (!is_serial)
973 : {
974 : std::unordered_map
975 : <processor_id_type,
976 : std::vector<std::pair<dof_id_type, processor_id_type>>>
977 130 : potential_pids_vecs;
978 :
979 499649 : for (auto & pair : potential_pids)
980 856930 : potential_pids_vecs[pair.first].assign(pair.second.begin(), pair.second.end());
981 :
982 : auto pids_action_functor =
983 428225 : [& mesh, & bad_pids]
984 : (processor_id_type /* src_pid */,
985 19177097 : const std::vector<std::pair<dof_id_type, processor_id_type>> & data)
986 : {
987 19507327 : for (auto pair : data)
988 : {
989 19078622 : Node & node = mesh.node_ref(pair.first);
990 32745 : processor_id_type & pid = node.processor_id();
991 19078622 : if (const auto it = bad_pids.find(pair.first);
992 32745 : it != bad_pids.end())
993 : {
994 14298 : pid = pair.second;
995 0 : bad_pids.erase(it);
996 : }
997 : else
998 19064324 : pid = node.choose_processor_id(pid, pair.second);
999 : }
1000 71294 : };
1001 :
1002 : Parallel::push_parallel_vector_data
1003 70944 : (mesh.comm(), potential_pids_vecs, pids_action_functor);
1004 :
1005 : // Using default libMesh options, we'll just need to sync
1006 : // between processors now. The catch here is that we can't
1007 : // initially trust Node::choose_processor_id() because some
1008 : // of those node processor ids are the temporary ones.
1009 130 : CorrectProcIds correct_pids(mesh, bad_pids);
1010 : Parallel::sync_node_data_by_element_id
1011 212572 : (mesh, mesh.elements_begin(), mesh.elements_end(),
1012 70814 : Parallel::SyncEverything(), Parallel::SyncEverything(),
1013 : correct_pids);
1014 :
1015 : // But once we've got all the non-temporary pids synced, we
1016 : // may need to sync again to get any pids on nodes only
1017 : // connected to subactive elements, for which *only*
1018 : // "temporary" pids are possible.
1019 130 : bad_pids.clear();
1020 : Parallel::sync_node_data_by_element_id
1021 141888 : (mesh,
1022 212962 : mesh.elements_begin(), mesh.elements_end(),
1023 70944 : Parallel::SyncEverything(), Parallel::SyncEverything(),
1024 : correct_pids);
1025 : }
1026 : }
1027 :
1028 : // We can't assert that all nodes are connected to elements, because
1029 : // a DistributedMesh with NodeConstraints might have pulled in some
1030 : // remote nodes solely for evaluating those constraints.
1031 : // MeshTools::libmesh_assert_connected_nodes(mesh);
1032 :
1033 : #ifdef DEBUG
1034 10196 : MeshTools::libmesh_assert_valid_procids<Node>(mesh);
1035 : //MeshTools::libmesh_assert_canonical_node_procids(mesh);
1036 : #endif
1037 493936 : }
1038 :
1039 :
1040 :
1041 : struct SyncLocalIDs
1042 : {
1043 : typedef dof_id_type datum;
1044 :
1045 : typedef std::unordered_map<dof_id_type, dof_id_type> map_type;
1046 :
1047 460230 : SyncLocalIDs(map_type & _id_map) : id_map(_id_map) {}
1048 :
1049 : map_type & id_map;
1050 :
1051 2405834 : void gather_data (const std::vector<dof_id_type> & ids,
1052 : std::vector<datum> & local_ids) const
1053 : {
1054 2406510 : local_ids.resize(ids.size());
1055 :
1056 27662472 : for (auto i : index_range(ids))
1057 25256638 : local_ids[i] = id_map[ids[i]];
1058 2405834 : }
1059 :
1060 2405834 : void act_on_data (const std::vector<dof_id_type> & ids,
1061 : const std::vector<datum> & local_ids)
1062 : {
1063 27662472 : for (auto i : index_range(local_ids))
1064 25271312 : id_map[ids[i]] = local_ids[i];
1065 2405834 : }
1066 : };
1067 :
1068 460230 : void Partitioner::_find_global_index_by_pid_map(const MeshBase & mesh)
1069 : {
1070 460230 : const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1071 :
1072 : // Find the number of active elements on each processor. We cannot use
1073 : // mesh.n_active_elem_on_proc(pid) since that only returns the number of
1074 : // elements assigned to pid which are currently stored on the calling
1075 : // processor. This will not in general be correct for parallel meshes
1076 : // when (pid!=mesh.processor_id()).
1077 1376 : auto n_proc = mesh.n_processors();
1078 460230 : _n_active_elem_on_proc.resize(n_proc);
1079 460230 : mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc);
1080 :
1081 461606 : std::vector<dof_id_type> n_active_elem_before_proc(mesh.n_processors());
1082 :
1083 4941982 : for (auto i : make_range(n_proc-1))
1084 4481752 : n_active_elem_before_proc[i+1] =
1085 4483128 : n_active_elem_before_proc[i] + _n_active_elem_on_proc[i];
1086 :
1087 : libMesh::BoundingBox bbox =
1088 460230 : MeshTools::create_bounding_box(mesh);
1089 :
1090 688 : _global_index_by_pid_map.clear();
1091 :
1092 : // create the mapping which is contiguous by processor
1093 460230 : MeshCommunication().find_local_indices (bbox,
1094 920460 : mesh.active_local_elements_begin(),
1095 919772 : mesh.active_local_elements_end(),
1096 460230 : _global_index_by_pid_map);
1097 :
1098 688 : SyncLocalIDs sync(_global_index_by_pid_map);
1099 :
1100 : Parallel::sync_dofobject_data_by_id
1101 919772 : (mesh.comm(), mesh.active_elements_begin(), mesh.active_elements_end(), sync);
1102 :
1103 31165204 : for (const auto & elem : mesh.active_element_ptr_range())
1104 : {
1105 30245432 : const processor_id_type pid = elem->processor_id();
1106 35680 : libmesh_assert_less (_global_index_by_pid_map[elem->id()], _n_active_elem_on_proc[pid]);
1107 :
1108 30281112 : _global_index_by_pid_map[elem->id()] += n_active_elem_before_proc[pid];
1109 458854 : }
1110 460230 : }
1111 :
1112 230115 : void Partitioner::build_graph (const MeshBase & mesh)
1113 : {
1114 688 : LOG_SCOPE("build_graph()", "Partitioner");
1115 :
1116 344 : const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1117 :
1118 : // If we have boundary elements in this mesh, we want to account for
1119 : // the connectivity between them and interior elements. We can find
1120 : // interior elements from boundary elements, but we need to build up
1121 : // a lookup map to do the reverse.
1122 : typedef std::unordered_multimap<const Elem *, const Elem *> map_type;
1123 688 : map_type interior_to_boundary_map;
1124 :
1125 : // If we have spline nodes in this mesh, we want to account for the
1126 : // connectivity between them and integration elements. We can find
1127 : // spline nodes from integration elements, but need a reverse map
1128 : // {integration_elements} = elems_constrained_by[spline_nodeelem]
1129 688 : map_type elems_constrained_by;
1130 :
1131 344 : const auto & mesh_constrained_nodes = mesh.get_constraint_rows();
1132 :
1133 45774514 : for (const Elem * elem : mesh.active_element_ptr_range())
1134 : {
1135 15122716 : if (!mesh_constrained_nodes.empty()) // quick test for non-IGA cases
1136 : {
1137 0 : const auto end_it = mesh_constrained_nodes.end();
1138 :
1139 : // Use a set to avoid duplicates. Use a well-defined method
1140 : // of ordering that set to make debugging easier.
1141 0 : std::set<const Elem *, CompareElemIdsByLevel> constraining_elems;
1142 1209187 : for (const Node & node : elem->node_ref_range())
1143 : {
1144 980466 : if (const auto row_it = mesh_constrained_nodes.find(&node);
1145 0 : row_it != end_it)
1146 3946461 : for (const auto & [pr, coef] : row_it->second)
1147 : {
1148 0 : libmesh_ignore(coef); // avoid gcc 7 warning
1149 3092771 : constraining_elems.insert(pr.first);
1150 : }
1151 : }
1152 1583959 : for (const Elem * constraining_elem : constraining_elems)
1153 0 : elems_constrained_by.emplace(constraining_elem, elem);
1154 : }
1155 :
1156 : // If we don't have an interior_parent and we don't have any
1157 : // constrained nodes then there's nothing else to look up.
1158 15122716 : if (elem->interior_parent())
1159 : {
1160 : // get all relevant interior elements
1161 0 : std::set<const Elem *> neighbor_set;
1162 1094 : elem->find_interior_neighbors(neighbor_set);
1163 :
1164 2188 : for (const auto & neighbor : neighbor_set)
1165 0 : interior_to_boundary_map.emplace(neighbor, elem);
1166 : }
1167 229427 : }
1168 :
1169 : #ifdef LIBMESH_ENABLE_AMR
1170 688 : std::vector<const Elem *> neighbors_offspring;
1171 : #endif
1172 :
1173 : // This is costly, and we only need to do it if the mesh has
1174 : // changed since we last partitioned... but the mesh probably has
1175 : // changed since we last partitioned, and if it hasn't we don't
1176 : // have a reliable way to be sure of that.
1177 230115 : _find_global_index_by_pid_map(mesh);
1178 :
1179 344 : dof_id_type first_local_elem = 0;
1180 1350553 : for (auto pid : make_range(mesh.processor_id()))
1181 1120610 : first_local_elem += _n_active_elem_on_proc[pid];
1182 :
1183 229771 : _dual_graph.clear();
1184 230115 : _dual_graph.resize(n_active_local_elem);
1185 230115 : _local_id_to_elem.resize(n_active_local_elem);
1186 :
1187 : // We may need to communicate constraint-row-based connections
1188 : // between processors
1189 : std::unordered_map<processor_id_type,
1190 688 : std::vector<std::pair<dof_id_type, dof_id_type>>> connections_to_push;
1191 :
1192 5427674 : for (const auto & elem : mesh.active_local_element_ptr_range())
1193 : {
1194 10503 : libmesh_assert (_global_index_by_pid_map.count(elem->id()));
1195 : const dof_id_type global_index_by_pid =
1196 2494397 : _global_index_by_pid_map[elem->id()];
1197 :
1198 2494397 : const dof_id_type local_index =
1199 10503 : global_index_by_pid - first_local_elem;
1200 10503 : libmesh_assert_less (local_index, n_active_local_elem);
1201 :
1202 21006 : std::vector<dof_id_type> & graph_row = _dual_graph[local_index];
1203 :
1204 : // Save this off to make it easy to index later
1205 2494397 : _local_id_to_elem[local_index] = const_cast<Elem*>(elem);
1206 :
1207 : // Loop over the element's neighbors. An element
1208 : // adjacency corresponds to a face neighbor
1209 13088368 : for (auto neighbor : elem->neighbor_ptr_range())
1210 : {
1211 10583468 : if (neighbor != nullptr)
1212 : {
1213 : // If the neighbor is active treat it
1214 : // as a connection
1215 44176 : if (neighbor->active())
1216 : {
1217 44016 : libmesh_assert(_global_index_by_pid_map.count(neighbor->id()));
1218 : const dof_id_type neighbor_global_index_by_pid =
1219 9640091 : _global_index_by_pid_map[neighbor->id()];
1220 :
1221 9640091 : graph_row.push_back(neighbor_global_index_by_pid);
1222 : }
1223 :
1224 : #ifdef LIBMESH_ENABLE_AMR
1225 :
1226 : // Otherwise we need to find all of the
1227 : // neighbor's children that are connected to
1228 : // us and add them
1229 : else
1230 : {
1231 : // The side of the neighbor to which
1232 : // we are connected
1233 : const unsigned int ns =
1234 44297 : neighbor->which_neighbor_am_i (elem);
1235 160 : libmesh_assert_less (ns, neighbor->n_neighbors());
1236 :
1237 : // Get all the active children (& grandchildren, etc...)
1238 : // of the neighbor
1239 :
1240 : // FIXME - this is the wrong thing, since we
1241 : // should be getting the active family tree on
1242 : // our side only. But adding too many graph
1243 : // links may cause hanging nodes to tend to be
1244 : // on partition interiors, which would reduce
1245 : // communication overhead for constraint
1246 : // equations, so we'll leave it.
1247 :
1248 44297 : neighbor->active_family_tree (neighbors_offspring);
1249 :
1250 : // Get all the neighbor's children that
1251 : // live on that side and are thus connected
1252 : // to us
1253 293888 : for (const auto & child : neighbors_offspring)
1254 : {
1255 : // This does not assume a level-1 mesh.
1256 : // Note that since children have sides numbered
1257 : // coincident with the parent then this is a sufficient test.
1258 250345 : if (child->neighbor_ptr(ns) == elem)
1259 : {
1260 320 : libmesh_assert (child->active());
1261 320 : libmesh_assert (_global_index_by_pid_map.count(child->id()));
1262 : const dof_id_type child_global_index_by_pid =
1263 118243 : _global_index_by_pid_map[child->id()];
1264 :
1265 118243 : graph_row.push_back(child_global_index_by_pid);
1266 : }
1267 : }
1268 : }
1269 :
1270 : #endif /* ifdef LIBMESH_ENABLE_AMR */
1271 :
1272 :
1273 : }
1274 : }
1275 :
1276 3351142 : if ((elem->dim() < LIBMESH_DIM) &&
1277 856745 : elem->interior_parent())
1278 : {
1279 : // get all relevant interior elements
1280 0 : std::set<const Elem *> neighbor_set;
1281 896 : elem->find_interior_neighbors(neighbor_set);
1282 :
1283 1792 : for (const auto & neighbor : neighbor_set)
1284 : {
1285 : const dof_id_type neighbor_global_index_by_pid =
1286 896 : _global_index_by_pid_map[neighbor->id()];
1287 :
1288 896 : graph_row.push_back(neighbor_global_index_by_pid);
1289 : }
1290 : }
1291 :
1292 : // Check for any boundary neighbors
1293 2495293 : for (const auto & pr : as_range(interior_to_boundary_map.equal_range(elem)))
1294 : {
1295 896 : const Elem * neighbor = pr.second;
1296 :
1297 : const dof_id_type neighbor_global_index_by_pid =
1298 896 : _global_index_by_pid_map[neighbor->id()];
1299 :
1300 896 : graph_row.push_back(neighbor_global_index_by_pid);
1301 : }
1302 :
1303 : // Check for any constraining elements
1304 2494397 : if (!mesh_constrained_nodes.empty()) // quick test for non-IGA cases
1305 : {
1306 0 : const auto end_it = mesh_constrained_nodes.end();
1307 :
1308 : // Use a set to avoid duplicates. Use a well-defined method
1309 : // of ordering that set to make debugging easier.
1310 0 : std::set<const Elem *, CompareElemIdsByLevel> constraining_elems;
1311 146202 : for (const Node & node : elem->node_ref_range())
1312 : {
1313 118363 : if (const auto row_it = mesh_constrained_nodes.find(&node);
1314 0 : row_it != end_it)
1315 555100 : for (const auto & [pr, coef] : row_it->second)
1316 : {
1317 0 : libmesh_ignore(coef); // avoid gcc 7 warning
1318 451304 : constraining_elems.insert(pr.first);
1319 : }
1320 : }
1321 240695 : for (const Elem * constraining_elem : constraining_elems)
1322 : {
1323 : const dof_id_type constraining_global_index_by_pid =
1324 212856 : _global_index_by_pid_map[constraining_elem->id()];
1325 :
1326 212856 : graph_row.push_back(constraining_global_index_by_pid);
1327 :
1328 : // We can't be sure if the constraining element's owner sees
1329 : // the assembly element, so to get a symmetric connectivity
1330 : // graph we'll need to tell them about us to be safe.
1331 212856 : if (constraining_elem->processor_id() != mesh.processor_id())
1332 237014 : connections_to_push[constraining_elem->processor_id()].emplace_back
1333 118507 : (global_index_by_pid, constraining_global_index_by_pid);
1334 : }
1335 : }
1336 :
1337 : // Check for any constrained elements
1338 2670770 : for (const auto & pr : as_range(elems_constrained_by.equal_range(elem)))
1339 : {
1340 176373 : const Elem * constrained = pr.second;
1341 : const dof_id_type constrained_global_index_by_pid =
1342 176373 : _global_index_by_pid_map[constrained->id()];
1343 :
1344 176373 : graph_row.push_back(constrained_global_index_by_pid);
1345 :
1346 : // We can't be sure if the constrained element's owner sees
1347 : // the assembly element, so to get a symmetric connectivity
1348 : // graph we'll need to tell them about us to be safe.
1349 176373 : if (constrained->processor_id() != mesh.processor_id())
1350 164048 : connections_to_push[constrained->processor_id()].emplace_back
1351 82024 : (global_index_by_pid, constrained_global_index_by_pid);
1352 : }
1353 229427 : }
1354 :
1355 : // Partitioners like Parmetis require a symmetric adjacency matrix,
1356 : // but if we have an assembly element constrained by a spline
1357 : // NodeElem owned by another processor, it's possible that that
1358 : // processor doesn't see our assembly element. Let's push those
1359 : // entries, to ensure they're counted on both sides.
1360 : auto symmetrize_entries =
1361 4059 : [this, first_local_elem]
1362 : (processor_id_type /*src_pid*/,
1363 200531 : const std::vector<std::pair<dof_id_type, dof_id_type>> & incoming_entries)
1364 : {
1365 204590 : for (auto [i, j] : incoming_entries)
1366 : {
1367 0 : libmesh_assert_greater_equal(j, first_local_elem);
1368 200531 : const std::size_t jl = j - first_local_elem;
1369 0 : libmesh_assert_less(jl, _dual_graph.size());
1370 0 : std::vector<dof_id_type> & graph_row = _dual_graph[jl];
1371 200531 : if (std::find(graph_row.begin(), graph_row.end(), i) == graph_row.end())
1372 : {
1373 : // std::cerr << "Pushing back (" << j << ", " << i << ") from " << src_pid << std::endl;
1374 36483 : graph_row.push_back(i);
1375 : }
1376 : }
1377 229771 : };
1378 :
1379 230115 : Parallel::push_parallel_vector_data(mesh.comm(),
1380 : connections_to_push,
1381 : symmetrize_entries);
1382 :
1383 : // *Now* we should have a symmetric adjacency matrix. Let's check
1384 : // that, so any failures get caught before they e.g. confuse
1385 : // Parmetis in hard-to-debug ways. That's a global communication so
1386 : // we'll only do it in debug mode.
1387 : #ifdef DEBUG
1388 344 : auto n_proc = mesh.n_processors();
1389 688 : std::vector<dof_id_type> first_local_index_on_proc(n_proc, 0);
1390 688 : for (auto pid : make_range(1u,n_proc))
1391 344 : first_local_index_on_proc[pid] = first_local_index_on_proc[pid-1] + _n_active_elem_on_proc[pid-1];
1392 :
1393 344 : libmesh_assert_equal_to(first_local_index_on_proc[mesh.processor_id()],
1394 : first_local_elem);
1395 :
1396 688 : std::unordered_map<processor_id_type, std::vector<std::pair<dof_id_type, dof_id_type>>> entries_to_send;
1397 10847 : for (auto il : index_range(_dual_graph))
1398 : {
1399 10503 : const std::vector<dof_id_type> & graph_row = _dual_graph[il];
1400 :
1401 10503 : const auto i = il + first_local_elem;
1402 :
1403 54839 : for (auto j : graph_row)
1404 : {
1405 : // Stupid graph rows aren't sorted yet...
1406 44336 : processor_id_type target_pid = 0;
1407 110637 : while (target_pid+1 < n_proc &&
1408 44336 : j >= first_local_index_on_proc[target_pid+1])
1409 21965 : ++target_pid;
1410 :
1411 44336 : entries_to_send[target_pid].emplace_back(i,j);
1412 : }
1413 : }
1414 :
1415 688 : std::vector<std::tuple<processor_id_type, dof_id_type, dof_id_type>> bad_entries;
1416 :
1417 : auto check_incoming_entries =
1418 : [this, first_local_elem, &bad_entries]
1419 : (processor_id_type src_pid,
1420 178001 : const std::vector<std::pair<dof_id_type, dof_id_type>> & incoming_entries)
1421 : {
1422 44993 : for (auto [i, j] : incoming_entries)
1423 : {
1424 44336 : if (j < first_local_elem)
1425 : {
1426 0 : bad_entries.emplace_back(src_pid,i,j);
1427 0 : continue;
1428 : }
1429 88672 : const std::size_t jl = j - first_local_elem;
1430 44336 : if (jl >= _dual_graph.size())
1431 : {
1432 0 : bad_entries.emplace_back(src_pid,i,j);
1433 0 : continue;
1434 : }
1435 44336 : const std::vector<dof_id_type> & graph_row = _dual_graph[jl];
1436 44336 : if (std::find(graph_row.begin(), graph_row.end(), i) ==
1437 88672 : graph_row.end())
1438 : {
1439 0 : bad_entries.emplace_back(src_pid,i,j);
1440 0 : continue;
1441 : }
1442 : }
1443 657 : };
1444 :
1445 : // Keep any failures in sync in parallel, for easier debugging in
1446 : // unit tests where the failure exception will be caught.
1447 : Parallel::push_parallel_vector_data
1448 344 : (mesh.comm(), entries_to_send, check_incoming_entries);
1449 344 : bool bad_entries_exist = !bad_entries.empty();
1450 344 : mesh.comm().max(bad_entries_exist);
1451 344 : if (bad_entries_exist)
1452 : {
1453 : #if 0 // Optional verbosity for if this breaks again...
1454 : if (!bad_entries.empty())
1455 : {
1456 : std::cerr << "Bad entries on processor " << mesh.processor_id() << ": ";
1457 : for (auto [p, i, j] : bad_entries)
1458 : std::cerr << '(' << p << ", " << i << ", " << j << "), ";
1459 : std::cerr << std::endl;
1460 : }
1461 : #endif
1462 0 : libmesh_error_msg("Asymmetric partitioner graph detected");
1463 : }
1464 : #endif
1465 230115 : }
1466 :
1467 42816 : void Partitioner::assign_partitioning (MeshBase & mesh, const std::vector<dof_id_type> & parts)
1468 : {
1469 532 : LOG_SCOPE("assign_partitioning()", "Partitioner");
1470 :
1471 : // This function must be run on all processors at once
1472 266 : libmesh_parallel_only(mesh.comm());
1473 :
1474 266 : dof_id_type first_local_elem = 0;
1475 216895 : for (auto pid : make_range(mesh.processor_id()))
1476 174212 : first_local_elem += _n_active_elem_on_proc[pid];
1477 :
1478 : #ifndef NDEBUG
1479 266 : const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1480 : #endif
1481 :
1482 : std::map<processor_id_type, std::vector<dof_id_type>>
1483 532 : requested_ids;
1484 :
1485 : // Results to gather from each processor - kept in a map so we
1486 : // do only one loop over elements after all receives are done.
1487 : std::map<processor_id_type, std::vector<processor_id_type>>
1488 532 : filled_request;
1489 :
1490 13006978 : for (auto & elem : mesh.active_element_ptr_range())
1491 : {
1492 : // we need to get the index from the owning processor
1493 : // (note we cannot assign it now -- we are iterating
1494 : // over elements again and this will be bad!)
1495 12938898 : requested_ids[elem->processor_id()].push_back(elem->id());
1496 42284 : }
1497 :
1498 : auto gather_functor =
1499 344737 : [this,
1500 : & parts,
1501 : #ifndef NDEBUG
1502 : & mesh,
1503 : n_active_local_elem,
1504 : #endif
1505 : first_local_elem]
1506 : (processor_id_type, const std::vector<dof_id_type> & ids,
1507 25930718 : std::vector<processor_id_type> & data)
1508 : {
1509 1064 : const std::size_t ids_size = ids.size();
1510 345801 : data.resize(ids.size());
1511 :
1512 13267413 : for (std::size_t i=0; i != ids_size; i++)
1513 : {
1514 12938898 : const dof_id_type requested_elem_index = ids[i];
1515 :
1516 17286 : libmesh_assert(_global_index_by_pid_map.count(requested_elem_index));
1517 :
1518 : const dof_id_type global_index_by_pid =
1519 12921612 : _global_index_by_pid_map[requested_elem_index];
1520 :
1521 12921612 : const dof_id_type local_index =
1522 17286 : global_index_by_pid - first_local_elem;
1523 :
1524 17286 : libmesh_assert_less (local_index, parts.size());
1525 17286 : libmesh_assert_less (local_index, n_active_local_elem);
1526 :
1527 : const processor_id_type elem_procid =
1528 12921612 : cast_int<processor_id_type>(parts[local_index]);
1529 :
1530 17286 : libmesh_assert_less (elem_procid, mesh.n_partitions());
1531 :
1532 12938898 : data[i] = elem_procid;
1533 : }
1534 388351 : };
1535 :
1536 : auto action_functor =
1537 344737 : [&filled_request]
1538 : (processor_id_type pid,
1539 : const std::vector<dof_id_type> &,
1540 345269 : const std::vector<processor_id_type> & new_procids)
1541 : {
1542 345801 : filled_request[pid] = new_procids;
1543 43348 : };
1544 :
1545 : // Trade requests with other processors
1546 266 : const processor_id_type * ex = nullptr;
1547 : Parallel::pull_parallel_vector_data
1548 42816 : (mesh.comm(), requested_ids, gather_functor, action_functor, ex);
1549 :
1550 : // and finally assign the partitioning.
1551 : // note we are iterating in exactly the same order
1552 : // used to build up the request, so we can expect the
1553 : // required entries to be in the proper sequence.
1554 43348 : std::vector<unsigned int> counters(mesh.n_processors(), 0);
1555 13006978 : for (auto & elem : mesh.active_element_ptr_range())
1556 : {
1557 12921612 : const processor_id_type current_pid = elem->processor_id();
1558 :
1559 17286 : libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size());
1560 :
1561 : const processor_id_type elem_procid =
1562 12921612 : filled_request[current_pid][counters[current_pid]++];
1563 :
1564 17286 : libmesh_assert_less (elem_procid, mesh.n_partitions());
1565 12921612 : elem->processor_id() = elem_procid;
1566 42284 : }
1567 42816 : }
1568 :
1569 :
1570 : } // namespace libMesh
|