libMesh
partitioner.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 
20 #include "libmesh/partitioner.h"
21 
22 // libMesh includes
23 #include "libmesh/compare_elems_by_level.h"
24 #include "libmesh/elem.h"
25 #include "libmesh/enum_to_string.h"
26 #include "libmesh/int_range.h"
27 #include "libmesh/libmesh_logging.h"
28 #include "libmesh/mesh_base.h"
29 #include "libmesh/mesh_tools.h"
30 #include "libmesh/mesh_communication.h"
31 #include "libmesh/parallel_ghost_sync.h"
32 #include "libmesh/wrapped_petsc.h"
33 #include "libmesh/boundary_info.h"
34 
35 // Subclasses to build()
36 #include "libmesh/enum_partitioner_type.h"
37 #include "libmesh/centroid_partitioner.h"
38 #include "libmesh/hilbert_sfc_partitioner.h"
39 #include "libmesh/linear_partitioner.h"
40 #include "libmesh/mapped_subdomain_partitioner.h"
41 #include "libmesh/metis_partitioner.h"
42 #include "libmesh/morton_sfc_partitioner.h"
43 #include "libmesh/parmetis_partitioner.h"
44 #include "libmesh/sfc_partitioner.h"
45 #include "libmesh/subdomain_partitioner.h"
46 
47 // TIMPI includes
49 #include "timpi/parallel_sync.h"
50 
51 // C/C++ includes
52 #ifdef LIBMESH_HAVE_PETSC
53 #include "libmesh/ignore_warnings.h"
54 #include "petscmat.h"
55 #include "libmesh/restore_warnings.h"
56 #include "libmesh/petsc_solver_exception.h"
57 #endif
58 
59 
60 namespace {
61 
62 using namespace libMesh;
63 
64 struct CorrectProcIds
65 {
66  typedef processor_id_type datum;
67 
68  CorrectProcIds(MeshBase & _mesh,
69  std::unordered_set<dof_id_type> & _bad_pids) :
70  mesh(_mesh), bad_pids(_bad_pids) {}
71 
72  MeshBase & mesh;
73  std::unordered_set<dof_id_type> & bad_pids;
74 
75  // ------------------------------------------------------------
76  void gather_data (const std::vector<dof_id_type> & ids,
77  std::vector<datum> & data)
78  {
79  // Find the processor id of each requested node
80  data.resize(ids.size());
81 
82  for (auto i : index_range(ids))
83  {
84  // Look for this point in the mesh and make sure we have a
85  // good pid for it before sending that on
86  if (ids[i] != DofObject::invalid_id &&
87  !bad_pids.count(ids[i]))
88  {
89  Node & node = mesh.node_ref(ids[i]);
90 
91  // Return the node's correct processor id,
92  data[i] = node.processor_id();
93  }
94  else
96  }
97  }
98 
99  // ------------------------------------------------------------
100  bool act_on_data (const std::vector<dof_id_type> & ids,
101  const std::vector<datum> proc_ids)
102  {
103  bool data_changed = false;
104 
105  // Set the ghost node processor ids we've now been informed of
106  for (auto i : index_range(ids))
107  {
108  Node & node = mesh.node_ref(ids[i]);
109 
110  processor_id_type & proc_id = node.processor_id();
111  const processor_id_type new_proc_id = proc_ids[i];
112 
113  if (const auto it = bad_pids.find(ids[i]);
114  (new_proc_id != proc_id || it != bad_pids.end()) &&
115  new_proc_id != DofObject::invalid_processor_id)
116  {
117  if (it != bad_pids.end())
118  {
119  proc_id = new_proc_id;
120  bad_pids.erase(it);
121  }
122  else
123  proc_id = node.choose_processor_id(proc_id, new_proc_id);
124 
125  if (proc_id == new_proc_id)
126  data_changed = true;
127  }
128  }
129 
130  return data_changed;
131  }
132 };
133 
134 }
135 
136 namespace libMesh
137 {
138 
139 
140 
141 // ------------------------------------------------------------
142 // Partitioner static data
144  dof_id_type(1000000);
145 
146 
147 
148 // ------------------------------------------------------------
149 // Partitioner implementation
150 
151 
153 {
154  return INVALID_PARTITIONER;
155 }
156 
157 
158 std::unique_ptr<Partitioner>
159 Partitioner::build (const PartitionerType partitioner_type)
160 {
161  switch (partitioner_type)
162  {
164  return std::make_unique<CentroidPartitioner>();
165  case LINEAR_PARTITIONER:
166  return std::make_unique<LinearPartitioner>();
168  return std::make_unique<MappedSubdomainPartitioner>();
169  case METIS_PARTITIONER:
170  return std::make_unique<MetisPartitioner>();
172  return std::make_unique<ParmetisPartitioner>();
174  return std::make_unique<HilbertSFCPartitioner>();
176  return std::make_unique<MortonSFCPartitioner>();
177  case SFC_PARTITIONER:
178  return std::make_unique<SFCPartitioner>();
180  return std::make_unique<SubdomainPartitioner>();
181  default:
182  libmesh_error_msg("Invalid partitioner type: " <<
183  Utility::enum_to_string(partitioner_type));
184  }
185 }
186 
187 
188 
190 {
191  this->partition(mesh,mesh.n_processors());
192 }
193 
194 
195 
197  const unsigned int n)
198 {
199  libmesh_parallel_only(mesh.comm());
200 
201  // BSK - temporary fix while redistribution is integrated 6/26/2008
202  // Uncomment this to not repartition in parallel
203  // if (!mesh.is_serial())
204  // return;
205 
206  // we cannot partition into more pieces than we have
207  // active elements!
208  const unsigned int n_parts =
209  static_cast<unsigned int>
210  (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
211 
212  // Set the number of partitions in the mesh
213  mesh.set_n_partitions()=n_parts;
214 
215  if (n_parts == 1)
216  {
217  this->single_partition (mesh);
218  return;
219  }
220 
221  // First assign a temporary partitioning to any unpartitioned elements
223 
224  // Call the partitioning function
225  this->_do_partition(mesh,n_parts);
226 
227  // Set the parent's processor ids
229 
230  // Redistribute elements if necessary, before setting node processor
231  // ids, to make sure those will be set consistently
232  mesh.redistribute();
233 
234 #ifdef DEBUG
236 
237  // Messed up elem processor_id()s can leave us without the child
238  // elements we need to restrict vectors on a distributed mesh
239  MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
240 #endif
241 
242  // Set the node's processor ids
244 
245 #ifdef DEBUG
246  MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
247 #endif
248 
249  // Give derived Mesh classes a chance to update any cached data to
250  // reflect the new partitioning
252 }
253 
254 
255 
257 {
258  this->repartition(mesh,mesh.n_processors());
259 }
260 
261 
262 
264  const unsigned int n)
265 {
266  // we cannot partition into more pieces than we have
267  // active elements!
268  const unsigned int n_parts =
269  static_cast<unsigned int>
270  (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
271 
272  // Set the number of partitions in the mesh
273  mesh.set_n_partitions()=n_parts;
274 
275  if (n_parts == 1)
276  {
277  this->single_partition (mesh);
278  return;
279  }
280 
281  // First assign a temporary partitioning to any unpartitioned elements
283 
284  // Call the partitioning function
285  this->_do_repartition(mesh,n_parts);
286 
287  // Set the parent's processor ids
289 
290  // Set the node's processor ids
292 }
293 
294 
295 
296 
297 
299 {
300  bool changed_pid =
301  this->single_partition_range(mesh.elements_begin(),
302  mesh.elements_end());
303 
304  // If we have a distributed mesh with an empty rank (or where rank
305  // 0 has only its own component of a disconnected mesh, I guess),
306  // that rank might need to be informed of a change.
307  mesh.comm().max(changed_pid);
308 
309  // We may need to redistribute, in case someone (like our unit
310  // tests) is doing something silly (like moving a whole
311  // already-distributed mesh back onto rank 0).
312  if (changed_pid)
313  mesh.redistribute();
314 
315  return changed_pid;
316 }
317 
318 
319 
322 {
323  LOG_SCOPE("single_partition_range()", "Partitioner");
324 
325  bool changed_pid = false;
326 
327  for (auto & elem : as_range(it, end))
328  {
329  if (elem->processor_id())
330  changed_pid = true;
331  elem->processor_id() = 0;
332 
333  // Assign all this element's nodes to processor 0 as well.
334  for (Node & node : elem->node_ref_range())
335  {
336  if (node.processor_id())
337  changed_pid = true;
338  node.processor_id() = 0;
339  }
340  }
341 
342  return changed_pid;
343 }
344 
346 {
348 }
349 
350 
351 
353  const unsigned int n_subdomains)
354 {
355  MeshBase::element_iterator it = mesh.unpartitioned_elements_begin();
356  const MeshBase::element_iterator end = mesh.unpartitioned_elements_end();
357 
358  const dof_id_type n_unpartitioned_elements = MeshTools::n_elem (it, end);
359 
360  // the unpartitioned elements must exist on all processors. If the range is empty on one
361  // it is empty on all, and we can quit right here.
362  if (!n_unpartitioned_elements)
363  return;
364 
365  // find the target subdomain sizes
366  std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
367 
368  for (auto pid : make_range(mesh.n_processors()))
369  {
370  dof_id_type tgt_subdomain_size = 0;
371 
372  // watch out for the case that n_subdomains < n_processors
373  if (pid < n_subdomains)
374  {
375  tgt_subdomain_size = n_unpartitioned_elements/n_subdomains;
376 
377  if (pid < n_unpartitioned_elements%n_subdomains)
378  tgt_subdomain_size++;
379 
380  }
381 
382  //libMesh::out << "pid, #= " << pid << ", " << tgt_subdomain_size << std::endl;
383  if (pid == 0)
384  subdomain_bounds[0] = tgt_subdomain_size;
385  else
386  subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
387  }
388 
389  libmesh_assert_equal_to (subdomain_bounds.back(), n_unpartitioned_elements);
390 
391  // create the unique mapping for all unpartitioned elements independent of partitioning
392  // determine the global indexing for all the unpartitioned elements
393  std::vector<dof_id_type> global_indices;
394 
395  // Calling this on all processors a unique range in [0,n_unpartitioned_elements) is constructed.
396  // Only the indices for the elements we pass in are returned in the array.
399  global_indices);
400 
401  dof_id_type cnt=0;
402  for (auto & elem : as_range(it, end))
403  {
404  libmesh_assert_less (cnt, global_indices.size());
405  const dof_id_type global_index =
406  global_indices[cnt++];
407 
408  libmesh_assert_less (global_index, subdomain_bounds.back());
409  libmesh_assert_less (global_index, n_unpartitioned_elements);
410 
411  const processor_id_type subdomain_id =
412  cast_int<processor_id_type>
413  (std::distance(subdomain_bounds.begin(),
414  std::upper_bound(subdomain_bounds.begin(),
415  subdomain_bounds.end(),
416  global_index)));
417  libmesh_assert_less (subdomain_id, n_subdomains);
418 
419  elem->processor_id() = subdomain_id;
420  //libMesh::out << "assigning " << global_index << " to " << subdomain_id << std::endl;
421  }
422 }
423 
424 
425 
427 {
428  // Ignore the parameter when !LIBMESH_ENABLE_AMR
430 
431  LOG_SCOPE("set_parent_processor_ids()", "Partitioner");
432 
433 #ifdef LIBMESH_ENABLE_AMR
434 
435  // If the mesh is serial we have access to all the elements,
436  // in particular all the active ones. We can therefore set
437  // the parent processor ids indirectly through their children, and
438  // set the subactive processor ids while examining their active
439  // ancestors.
440  // By convention a parent is assigned to the minimum processor
441  // of all its children, and a subactive is assigned to the processor
442  // of its active ancestor.
443  if (mesh.is_serial())
444  {
445  for (auto & elem : mesh.active_element_ptr_range())
446  {
447  // First set descendents
448  std::vector<Elem *> subactive_family;
449  elem->total_family_tree(subactive_family);
450  for (const auto & f : subactive_family)
451  f->processor_id() = elem->processor_id();
452 
453  // Then set ancestors
454  Elem * parent = elem->parent();
455 
456  while (parent)
457  {
458  // invalidate the parent id, otherwise the min below
459  // will not work if the current parent id is less
460  // than all the children!
461  parent->invalidate_processor_id();
462 
463  for (auto & child : parent->child_ref_range())
464  {
465  libmesh_assert(!child.is_remote());
466  libmesh_assert_not_equal_to (child.processor_id(), DofObject::invalid_processor_id);
467  parent->processor_id() = std::min(parent->processor_id(),
468  child.processor_id());
469  }
470  parent = parent->parent();
471  }
472  }
473  }
474 
475  // When the mesh is parallel we cannot guarantee that parents have access to
476  // all their children.
477  else
478  {
479  // Setting subactive processor ids is easy: we can guarantee
480  // that children have access to all their parents.
481 
482  // Loop over all the active elements in the mesh
483  for (auto & child : mesh.active_element_ptr_range())
484  {
485  std::vector<Elem *> subactive_family;
486  child->total_family_tree(subactive_family);
487  for (const auto & f : subactive_family)
488  f->processor_id() = child->processor_id();
489  }
490 
491  // When the mesh is parallel we cannot guarantee that parents have access to
492  // all their children.
493 
494  // We will use a brute-force approach here. Each processor finds its parent
495  // elements and sets the parent pid to the minimum of its
496  // semilocal descendants.
497  // A global reduction is then performed to make sure the true minimum is found.
498  // As noted, this is required because we cannot guarantee that a parent has
499  // access to all its children on any single processor.
500  libmesh_parallel_only(mesh.comm());
501  libmesh_assert(MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
502  mesh.unpartitioned_elements_end()) == 0);
503 
504  const dof_id_type max_elem_id = mesh.max_elem_id();
505 
506  std::vector<processor_id_type>
507  parent_processor_ids (std::min(communication_blocksize,
508  max_elem_id));
509 
510  for (dof_id_type blk=0, last_elem_id=0; last_elem_id<max_elem_id; blk++)
511  {
512  last_elem_id =
513  std::min(static_cast<dof_id_type>((blk+1)*communication_blocksize),
514  max_elem_id);
515  const dof_id_type first_elem_id = blk*communication_blocksize;
516 
517  std::fill (parent_processor_ids.begin(),
518  parent_processor_ids.end(),
520 
521  // first build up local contributions to parent_processor_ids
522  bool have_parent_in_block = false;
523 
524  for (auto & parent : as_range(mesh.ancestor_elements_begin(),
525  mesh.ancestor_elements_end()))
526  {
527  const dof_id_type parent_idx = parent->id();
528  libmesh_assert_less (parent_idx, max_elem_id);
529 
530  if ((parent_idx >= first_elem_id) &&
531  (parent_idx < last_elem_id))
532  {
533  have_parent_in_block = true;
535 
536  std::vector<const Elem *> active_family;
537  parent->active_family_tree(active_family);
538  for (const auto & f : active_family)
539  parent_pid = std::min (parent_pid, f->processor_id());
540 
541  const dof_id_type packed_idx = parent_idx - first_elem_id;
542  libmesh_assert_less (packed_idx, parent_processor_ids.size());
543 
544  parent_processor_ids[packed_idx] = parent_pid;
545  }
546  }
547 
548  // then find the global minimum
549  mesh.comm().min (parent_processor_ids);
550 
551  // and assign the ids, if we have a parent in this block.
552  if (have_parent_in_block)
553  for (auto & parent : as_range(mesh.ancestor_elements_begin(),
554  mesh.ancestor_elements_end()))
555  {
556  const dof_id_type parent_idx = parent->id();
557 
558  if ((parent_idx >= first_elem_id) &&
559  (parent_idx < last_elem_id))
560  {
561  const dof_id_type packed_idx = parent_idx - first_elem_id;
562  libmesh_assert_less (packed_idx, parent_processor_ids.size());
563 
564  const processor_id_type parent_pid =
565  parent_processor_ids[packed_idx];
566 
567  libmesh_assert_not_equal_to (parent_pid, DofObject::invalid_processor_id);
568 
569  parent->processor_id() = parent_pid;
570  }
571  }
572  }
573  }
574 
575 #endif // LIBMESH_ENABLE_AMR
576 }
577 
578 void
580  std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> & processor_pair_to_nodes)
581 {
582  // This function must be run on all processors at once
583  libmesh_parallel_only(mesh.comm());
584 
585  processor_pair_to_nodes.clear();
586 
587  std::set<dof_id_type> mynodes;
588  std::set<dof_id_type> neighbor_nodes;
589  std::vector<dof_id_type> common_nodes;
590 
591  // Loop over all the active elements
592  for (auto & elem : mesh.active_element_ptr_range())
593  {
594  libmesh_assert(elem);
595 
596  libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
597 
598  auto n_nodes = elem->n_nodes();
599 
600  // prepare data for this element
601  mynodes.clear();
602  neighbor_nodes.clear();
603  common_nodes.clear();
604 
605  for (unsigned int inode = 0; inode < n_nodes; inode++)
606  mynodes.insert(elem->node_id(inode));
607 
608  for (auto i : elem->side_index_range())
609  {
610  auto neigh = elem->neighbor_ptr(i);
611  if (neigh && !neigh->is_remote() && neigh->processor_id() != elem->processor_id())
612  {
613  neighbor_nodes.clear();
614  common_nodes.clear();
615  auto neigh_n_nodes = neigh->n_nodes();
616  for (unsigned int inode = 0; inode < neigh_n_nodes; inode++)
617  neighbor_nodes.insert(neigh->node_id(inode));
618 
619  std::set_intersection(mynodes.begin(), mynodes.end(),
620  neighbor_nodes.begin(), neighbor_nodes.end(),
621  std::back_inserter(common_nodes));
622 
623  auto & map_set = processor_pair_to_nodes[std::make_pair(std::min(elem->processor_id(), neigh->processor_id()),
624  std::max(elem->processor_id(), neigh->processor_id()))];
625  for (auto global_node_id : common_nodes)
626  map_set.insert(global_node_id);
627  }
628  }
629  }
630 }
631 
633 {
634  // This function must be run on all processors at once
635  libmesh_parallel_only(mesh.comm());
636 
637  std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
638 
639  processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
640 
641  for (auto & pmap : processor_pair_to_nodes)
642  {
643  std::size_t n_own_nodes = pmap.second.size()/2, i = 0;
644 
645  for (dof_id_type id : pmap.second)
646  {
647  auto & node = mesh.node_ref(id);
648  if (i <= n_own_nodes)
649  node.processor_id() = pmap.first.first;
650  else
651  node.processor_id() = pmap.first.second;
652  i++;
653  }
654  }
655 }
656 
658 {
659  // This function must be run on all processors at once
660  libmesh_parallel_only(mesh.comm());
661 
662  // I see occasional consistency failures when using this on a
663  // distributed mesh
664  libmesh_experimental();
665 
666  std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
667 
668  processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
669 
670  std::unordered_map<dof_id_type, std::vector<const Elem *>> nodes_to_elem_map;
671 
672  MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
673 
674  std::vector<const Node *> neighbors;
675  std::set<dof_id_type> neighbors_order;
676  std::vector<dof_id_type> common_nodes;
677  std::queue<dof_id_type> nodes_queue;
678  std::set<dof_id_type> visted_nodes;
679 
680  for (auto & pmap : processor_pair_to_nodes)
681  {
682  std::size_t n_own_nodes = pmap.second.size()/2;
683 
684  // Initialize node assignment
685  for (dof_id_type id : pmap.second)
686  mesh.node_ref(id).processor_id() = pmap.first.second;
687 
688  visted_nodes.clear();
689  for (dof_id_type id : pmap.second)
690  {
691  mesh.node_ref(id).processor_id() = pmap.first.second;
692 
693  if (visted_nodes.count(id))
694  continue;
695  else
696  {
697  nodes_queue.push(id);
698  visted_nodes.insert(id);
699  if (visted_nodes.size() >= n_own_nodes)
700  break;
701  }
702 
703  while (!nodes_queue.empty())
704  {
705  auto & node = mesh.node_ref(nodes_queue.front());
706  nodes_queue.pop();
707 
708  neighbors.clear();
709  MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
710  neighbors_order.clear();
711  for (auto & neighbor : neighbors)
712  neighbors_order.insert(neighbor->id());
713 
714  common_nodes.clear();
715  std::set_intersection(pmap.second.begin(), pmap.second.end(),
716  neighbors_order.begin(), neighbors_order.end(),
717  std::back_inserter(common_nodes));
718 
719  for (auto c_node : common_nodes)
720  if (!visted_nodes.count(c_node))
721  {
722  nodes_queue.push(c_node);
723  visted_nodes.insert(c_node);
724  if (visted_nodes.size() >= n_own_nodes)
725  goto queue_done;
726  }
727 
728  if (visted_nodes.size() >= n_own_nodes)
729  goto queue_done;
730  }
731  }
732  queue_done:
733  for (auto node : visted_nodes)
734  mesh.node_ref(node).processor_id() = pmap.first.first;
735  }
736 }
737 
739 {
740  libmesh_ignore(mesh); // Only used if LIBMESH_HAVE_PETSC
741 
742  // This function must be run on all processors at once
743  libmesh_parallel_only(mesh.comm());
744 
745 #if LIBMESH_HAVE_PETSC
746  std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
747 
748  processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
749 
750  std::vector<std::vector<const Elem *>> nodes_to_elem_map;
751 
752  MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
753 
754  std::vector<const Node *> neighbors;
755  std::set<dof_id_type> neighbors_order;
756  std::vector<dof_id_type> common_nodes;
757 
758  std::vector<dof_id_type> rows;
759  std::vector<dof_id_type> cols;
760 
761  std::map<dof_id_type, dof_id_type> global_to_local;
762 
763  for (auto & pmap : processor_pair_to_nodes)
764  {
765  unsigned int i = 0;
766 
767  rows.clear();
768  rows.resize(pmap.second.size()+1);
769  cols.clear();
770  for (dof_id_type id : pmap.second)
771  global_to_local[id] = i++;
772 
773  i = 0;
774  for (auto id : pmap.second)
775  {
776  auto & node = mesh.node_ref(id);
777  neighbors.clear();
778  MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
779  neighbors_order.clear();
780  for (auto & neighbor : neighbors)
781  neighbors_order.insert(neighbor->id());
782 
783  common_nodes.clear();
784  std::set_intersection(pmap.second.begin(), pmap.second.end(),
785  neighbors_order.begin(), neighbors_order.end(),
786  std::back_inserter(common_nodes));
787 
788  rows[i+1] = rows[i] + cast_int<dof_id_type>(common_nodes.size());
789 
790  for (auto c_node : common_nodes)
791  cols.push_back(global_to_local[c_node]);
792 
793  i++;
794  }
795 
796  // Next we construct an IS from a MatPartitioning
798  {
799  PetscInt *adj_i, *adj_j;
800  LibmeshPetscCall2(mesh.comm(), PetscCalloc1(rows.size(), &adj_i));
801  LibmeshPetscCall2(mesh.comm(), PetscCalloc1(cols.size(), &adj_j));
802  PetscInt rows_size = cast_int<PetscInt>(rows.size());
803  for (PetscInt ii=0; ii<rows_size; ii++)
804  adj_i[ii] = rows[ii];
805 
806  PetscInt cols_size = cast_int<PetscInt>(cols.size());
807  for (PetscInt ii=0; ii<cols_size; ii++)
808  adj_j[ii] = cols[ii];
809 
810  const PetscInt sz = cast_int<PetscInt>(pmap.second.size());
811 
812  // Create sparse matrix representing an adjacency list
813  WrappedPetsc<Mat> adj;
814  LibmeshPetscCall2(mesh.comm(), MatCreateMPIAdj(PETSC_COMM_SELF, sz, sz, adj_i, adj_j, nullptr, adj.get()));
815 
816  // Create MatPartitioning object
818  LibmeshPetscCall2(mesh.comm(), MatPartitioningCreate(PETSC_COMM_SELF, part.get()));
819 
820  // Apply MatPartitioning, storing results in "is"
821  LibmeshPetscCall2(mesh.comm(), MatPartitioningSetAdjacency(part, adj));
822  LibmeshPetscCall2(mesh.comm(), MatPartitioningSetNParts(part, 2));
823  LibmeshPetscCall2(mesh.comm(), PetscObjectSetOptionsPrefix((PetscObject)(*part), "balance_"));
824  LibmeshPetscCall2(mesh.comm(), MatPartitioningSetFromOptions(part));
825  LibmeshPetscCall2(mesh.comm(), MatPartitioningApply(part, is.get()));
826  }
827 
828  PetscInt local_size;
829  const PetscInt *indices;
830  LibmeshPetscCall2(mesh.comm(), ISGetLocalSize(is, &local_size));
831  LibmeshPetscCall2(mesh.comm(), ISGetIndices(is, &indices));
832 
833  i = 0;
834  for (auto id : pmap.second)
835  {
836  auto & node = mesh.node_ref(id);
837  if (indices[i])
838  node.processor_id() = pmap.first.second;
839  else
840  node.processor_id() = pmap.first.first;
841 
842  i++;
843  }
844  LibmeshPetscCall2(mesh.comm(), ISRestoreIndices(is, &indices));
845  }
846 #else
847  libmesh_error_msg("PETSc is required");
848 #endif
849 }
850 
851 
853 {
854  LOG_SCOPE("set_node_processor_ids()","Partitioner");
855 
856  // This function must be run on all processors at once
857  libmesh_parallel_only(mesh.comm());
858 
859  // If we have any unpartitioned elements at this
860  // stage there is a problem
861  libmesh_assert (MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
862  mesh.unpartitioned_elements_end()) == 0);
863 
864  // Start from scratch here: nodes we used to own may not be
865  // eligible for us to own any more.
866  for (auto & node : mesh.node_ptr_range())
867  {
869  }
870 
871  // Loop over all the active elements
872  for (auto & elem : mesh.active_element_ptr_range())
873  {
874  libmesh_assert(elem);
875 
876  libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
877 
878  // Consider updating the processor id on this element's nodes
879  for (Node & node : elem->node_ref_range())
880  {
881  processor_id_type & pid = node.processor_id();
882  pid = node.choose_processor_id(pid, elem->processor_id());
883  }
884  }
885 
886  // How we finish off the node partitioning depends on our command
887  // line options.
888 
889  const bool load_balanced_nodes_linear =
890  libMesh::on_command_line ("--load-balanced-nodes-linear");
891 
892  const bool load_balanced_nodes_bfs =
893  libMesh::on_command_line ("--load-balanced-nodes-bfs");
894 
895  const bool load_balanced_nodes_petscpartition =
896  libMesh::on_command_line ("--load-balanced-nodes-petscpartitioner");
897 
898  unsigned int n_load_balance_options = load_balanced_nodes_linear;
899  n_load_balance_options += load_balanced_nodes_bfs;
900  n_load_balance_options += load_balanced_nodes_petscpartition;
901  libmesh_error_msg_if(n_load_balance_options > 1,
902  "Cannot perform more than one load balancing type at a time");
903 
904  if (load_balanced_nodes_linear)
906  else if (load_balanced_nodes_bfs)
908  else if (load_balanced_nodes_petscpartition)
910 
911  // Node balancing algorithm will response to assign owned nodes.
912  // We still need to sync PIDs
913  {
914  // For inactive elements, we will have already gotten most of
915  // these nodes, *except* for the case of a parent with a subset
916  // of active descendants which are remote elements. In that
917  // case some of the parent nodes will not have been properly
918  // handled yet on our processor.
919  //
920  // We don't want to inadvertently give one of them an incorrect
921  // processor id, but if we're not in serial then we have to
922  // assign them temporary pids to make querying work, so we'll
923  // save our *valid* pids before assigning temporaries.
924  //
925  // Even in serial we'll want to check and make sure we're not
926  // overwriting valid active node pids with pids from subactive
927  // elements.
928  std::unordered_set<dof_id_type> bad_pids;
929 
930  for (auto & node : mesh.node_ptr_range())
932  bad_pids.insert(node->id());
933 
934  // If we assign our temporary ids by looping from finer elements
935  // to coarser elements, we'll always get an id from the finest
936  // ghost element we can see, which will usually be "closer" to
937  // the true processor we want to query and so will reduce query
938  // cycles that don't reach that processor.
939 
940  // But we can still end up with a query cycle that dead-ends, so
941  // we need to prepare a "push" communication step here.
942 
943  const bool is_serial = mesh.is_serial();
944  std::unordered_map
946  std::unordered_map<dof_id_type, processor_id_type>>
947  potential_pids;
948 
949  const unsigned int n_levels = MeshTools::n_levels(mesh);
950  for (unsigned int level = n_levels; level > 0; --level)
951  {
952  for (auto & elem : as_range(mesh.level_elements_begin(level-1),
953  mesh.level_elements_end(level-1)))
954  {
955  libmesh_assert_not_equal_to (elem->processor_id(),
957 
958  const processor_id_type elem_pid = elem->processor_id();
959 
960  // Consider updating the processor id on this element's nodes
961  for (Node & node : elem->node_ref_range())
962  {
963  processor_id_type & pid = node.processor_id();
964  if (bad_pids.count(node.id()))
965  pid = node.choose_processor_id(pid, elem_pid);
966  else if (!is_serial)
967  potential_pids[elem_pid][node.id()] = pid;
968  }
969  }
970  }
971 
972  if (!is_serial)
973  {
974  std::unordered_map
976  std::vector<std::pair<dof_id_type, processor_id_type>>>
977  potential_pids_vecs;
978 
979  for (auto & pair : potential_pids)
980  potential_pids_vecs[pair.first].assign(pair.second.begin(), pair.second.end());
981 
982  auto pids_action_functor =
983  [& mesh, & bad_pids]
984  (processor_id_type /* src_pid */,
985  const std::vector<std::pair<dof_id_type, processor_id_type>> & data)
986  {
987  for (auto pair : data)
988  {
989  Node & node = mesh.node_ref(pair.first);
990  processor_id_type & pid = node.processor_id();
991  if (const auto it = bad_pids.find(pair.first);
992  it != bad_pids.end())
993  {
994  pid = pair.second;
995  bad_pids.erase(it);
996  }
997  else
998  pid = node.choose_processor_id(pid, pair.second);
999  }
1000  };
1001 
1002  Parallel::push_parallel_vector_data
1003  (mesh.comm(), potential_pids_vecs, pids_action_functor);
1004 
1005  // Using default libMesh options, we'll just need to sync
1006  // between processors now. The catch here is that we can't
1007  // initially trust Node::choose_processor_id() because some
1008  // of those node processor ids are the temporary ones.
1009  CorrectProcIds correct_pids(mesh, bad_pids);
1011  (mesh, mesh.elements_begin(), mesh.elements_end(),
1013  correct_pids);
1014 
1015  // But once we've got all the non-temporary pids synced, we
1016  // may need to sync again to get any pids on nodes only
1017  // connected to subactive elements, for which *only*
1018  // "temporary" pids are possible.
1019  bad_pids.clear();
1021  (mesh,
1022  mesh.elements_begin(), mesh.elements_end(),
1024  correct_pids);
1025  }
1026  }
1027 
1028  // We can't assert that all nodes are connected to elements, because
1029  // a DistributedMesh with NodeConstraints might have pulled in some
1030  // remote nodes solely for evaluating those constraints.
1031  // MeshTools::libmesh_assert_connected_nodes(mesh);
1032 
1033 #ifdef DEBUG
1034  MeshTools::libmesh_assert_valid_procids<Node>(mesh);
1035  //MeshTools::libmesh_assert_canonical_node_procids(mesh);
1036 #endif
1037 }
1038 
1039 
1040 
1042 {
1044 
1045  typedef std::unordered_map<dof_id_type, dof_id_type> map_type;
1046 
1047  SyncLocalIDs(map_type & _id_map) : id_map(_id_map) {}
1048 
1050 
1051  void gather_data (const std::vector<dof_id_type> & ids,
1052  std::vector<datum> & local_ids) const
1053  {
1054  local_ids.resize(ids.size());
1055 
1056  for (auto i : index_range(ids))
1057  local_ids[i] = id_map[ids[i]];
1058  }
1059 
1060  void act_on_data (const std::vector<dof_id_type> & ids,
1061  const std::vector<datum> & local_ids)
1062  {
1063  for (auto i : index_range(local_ids))
1064  id_map[ids[i]] = local_ids[i];
1065  }
1066 };
1067 
1069 {
1070  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1071 
1072  // Find the number of active elements on each processor. We cannot use
1073  // mesh.n_active_elem_on_proc(pid) since that only returns the number of
1074  // elements assigned to pid which are currently stored on the calling
1075  // processor. This will not in general be correct for parallel meshes
1076  // when (pid!=mesh.processor_id()).
1077  auto n_proc = mesh.n_processors();
1078  _n_active_elem_on_proc.resize(n_proc);
1079  mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc);
1080 
1081  std::vector<dof_id_type> n_active_elem_before_proc(mesh.n_processors());
1082 
1083  for (auto i : make_range(n_proc-1))
1084  n_active_elem_before_proc[i+1] =
1085  n_active_elem_before_proc[i] + _n_active_elem_on_proc[i];
1086 
1087  libMesh::BoundingBox bbox =
1089 
1090  _global_index_by_pid_map.clear();
1091 
1092  // create the mapping which is contiguous by processor
1094  mesh.active_local_elements_begin(),
1095  mesh.active_local_elements_end(),
1097 
1099 
1101  (mesh.comm(), mesh.active_elements_begin(), mesh.active_elements_end(), sync);
1102 
1103  for (const auto & elem : mesh.active_element_ptr_range())
1104  {
1105  const processor_id_type pid = elem->processor_id();
1106  libmesh_assert_less (_global_index_by_pid_map[elem->id()], _n_active_elem_on_proc[pid]);
1107 
1108  _global_index_by_pid_map[elem->id()] += n_active_elem_before_proc[pid];
1109  }
1110 }
1111 
1113 {
1114  LOG_SCOPE("build_graph()", "Partitioner");
1115 
1116  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1117 
1118  // If we have boundary elements in this mesh, we want to account for
1119  // the connectivity between them and interior elements. We can find
1120  // interior elements from boundary elements, but we need to build up
1121  // a lookup map to do the reverse.
1122  typedef std::unordered_multimap<const Elem *, const Elem *> map_type;
1123  map_type interior_to_boundary_map;
1124 
1125  // If we have spline nodes in this mesh, we want to account for the
1126  // connectivity between them and integration elements. We can find
1127  // spline nodes from integration elements, but need a reverse map
1128  // {integration_elements} = elems_constrained_by[spline_nodeelem]
1129  map_type elems_constrained_by;
1130 
1131  const auto & mesh_constrained_nodes = mesh.get_constraint_rows();
1132 
1133  for (const Elem * elem : mesh.active_element_ptr_range())
1134  {
1135  if (!mesh_constrained_nodes.empty()) // quick test for non-IGA cases
1136  {
1137  const auto end_it = mesh_constrained_nodes.end();
1138 
1139  // Use a set to avoid duplicates. Use a well-defined method
1140  // of ordering that set to make debugging easier.
1141  std::set<const Elem *, CompareElemIdsByLevel> constraining_elems;
1142  for (const Node & node : elem->node_ref_range())
1143  {
1144  if (const auto row_it = mesh_constrained_nodes.find(&node);
1145  row_it != end_it)
1146  for (const auto & [pr, coef] : row_it->second)
1147  {
1148  libmesh_ignore(coef); // avoid gcc 7 warning
1149  constraining_elems.insert(pr.first);
1150  }
1151  }
1152  for (const Elem * constraining_elem : constraining_elems)
1153  elems_constrained_by.emplace(constraining_elem, elem);
1154  }
1155 
1156  // If we don't have an interior_parent and we don't have any
1157  // constrained nodes then there's nothing else to look up.
1158  if (elem->interior_parent())
1159  {
1160  // get all relevant interior elements
1161  std::set<const Elem *> neighbor_set;
1162  elem->find_interior_neighbors(neighbor_set);
1163 
1164  for (const auto & neighbor : neighbor_set)
1165  interior_to_boundary_map.emplace(neighbor, elem);
1166  }
1167  }
1168 
1169 #ifdef LIBMESH_ENABLE_AMR
1170  std::vector<const Elem *> neighbors_offspring;
1171 #endif
1172 
1173  // This is costly, and we only need to do it if the mesh has
1174  // changed since we last partitioned... but the mesh probably has
1175  // changed since we last partitioned, and if it hasn't we don't
1176  // have a reliable way to be sure of that.
1178 
1179  dof_id_type first_local_elem = 0;
1180  for (auto pid : make_range(mesh.processor_id()))
1181  first_local_elem += _n_active_elem_on_proc[pid];
1182 
1183  _dual_graph.clear();
1184  _dual_graph.resize(n_active_local_elem);
1185  _local_id_to_elem.resize(n_active_local_elem);
1186 
1187  // We may need to communicate constraint-row-based connections
1188  // between processors
1189  std::unordered_map<processor_id_type,
1190  std::vector<std::pair<dof_id_type, dof_id_type>>> connections_to_push;
1191 
1192  for (const auto & elem : mesh.active_local_element_ptr_range())
1193  {
1194  libmesh_assert (_global_index_by_pid_map.count(elem->id()));
1195  const dof_id_type global_index_by_pid =
1196  _global_index_by_pid_map[elem->id()];
1197 
1198  const dof_id_type local_index =
1199  global_index_by_pid - first_local_elem;
1200  libmesh_assert_less (local_index, n_active_local_elem);
1201 
1202  std::vector<dof_id_type> & graph_row = _dual_graph[local_index];
1203 
1204  // Save this off to make it easy to index later
1205  _local_id_to_elem[local_index] = const_cast<Elem*>(elem);
1206 
1207  // Loop over the element's neighbors. An element
1208  // adjacency corresponds to a face neighbor
1209  for (auto neighbor : elem->neighbor_ptr_range())
1210  {
1211  if (neighbor != nullptr)
1212  {
1213  // If the neighbor is active treat it
1214  // as a connection
1215  if (neighbor->active())
1216  {
1217  libmesh_assert(_global_index_by_pid_map.count(neighbor->id()));
1218  const dof_id_type neighbor_global_index_by_pid =
1219  _global_index_by_pid_map[neighbor->id()];
1220 
1221  graph_row.push_back(neighbor_global_index_by_pid);
1222  }
1223 
1224 #ifdef LIBMESH_ENABLE_AMR
1225 
1226  // Otherwise we need to find all of the
1227  // neighbor's children that are connected to
1228  // us and add them
1229  else
1230  {
1231  // The side of the neighbor to which
1232  // we are connected
1233  const unsigned int ns =
1234  neighbor->which_neighbor_am_i (elem);
1235  libmesh_assert_less (ns, neighbor->n_neighbors());
1236 
1237  // Get all the active children (& grandchildren, etc...)
1238  // of the neighbor
1239 
1240  // FIXME - this is the wrong thing, since we
1241  // should be getting the active family tree on
1242  // our side only. But adding too many graph
1243  // links may cause hanging nodes to tend to be
1244  // on partition interiors, which would reduce
1245  // communication overhead for constraint
1246  // equations, so we'll leave it.
1247 
1248  neighbor->active_family_tree (neighbors_offspring);
1249 
1250  // Get all the neighbor's children that
1251  // live on that side and are thus connected
1252  // to us
1253  for (const auto & child : neighbors_offspring)
1254  {
1255  // This does not assume a level-1 mesh.
1256  // Note that since children have sides numbered
1257  // coincident with the parent then this is a sufficient test.
1258  if (child->neighbor_ptr(ns) == elem)
1259  {
1260  libmesh_assert (child->active());
1261  libmesh_assert (_global_index_by_pid_map.count(child->id()));
1262  const dof_id_type child_global_index_by_pid =
1263  _global_index_by_pid_map[child->id()];
1264 
1265  graph_row.push_back(child_global_index_by_pid);
1266  }
1267  }
1268  }
1269 
1270 #endif /* ifdef LIBMESH_ENABLE_AMR */
1271 
1272 
1273  }
1274  }
1275 
1276  if ((elem->dim() < LIBMESH_DIM) &&
1277  elem->interior_parent())
1278  {
1279  // get all relevant interior elements
1280  std::set<const Elem *> neighbor_set;
1281  elem->find_interior_neighbors(neighbor_set);
1282 
1283  for (const auto & neighbor : neighbor_set)
1284  {
1285  const dof_id_type neighbor_global_index_by_pid =
1286  _global_index_by_pid_map[neighbor->id()];
1287 
1288  graph_row.push_back(neighbor_global_index_by_pid);
1289  }
1290  }
1291 
1292  // Check for any boundary neighbors
1293  for (const auto & pr : as_range(interior_to_boundary_map.equal_range(elem)))
1294  {
1295  const Elem * neighbor = pr.second;
1296 
1297  const dof_id_type neighbor_global_index_by_pid =
1298  _global_index_by_pid_map[neighbor->id()];
1299 
1300  graph_row.push_back(neighbor_global_index_by_pid);
1301  }
1302 
1303  // Check for any constraining elements
1304  if (!mesh_constrained_nodes.empty()) // quick test for non-IGA cases
1305  {
1306  const auto end_it = mesh_constrained_nodes.end();
1307 
1308  // Use a set to avoid duplicates. Use a well-defined method
1309  // of ordering that set to make debugging easier.
1310  std::set<const Elem *, CompareElemIdsByLevel> constraining_elems;
1311  for (const Node & node : elem->node_ref_range())
1312  {
1313  if (const auto row_it = mesh_constrained_nodes.find(&node);
1314  row_it != end_it)
1315  for (const auto & [pr, coef] : row_it->second)
1316  {
1317  libmesh_ignore(coef); // avoid gcc 7 warning
1318  constraining_elems.insert(pr.first);
1319  }
1320  }
1321  for (const Elem * constraining_elem : constraining_elems)
1322  {
1323  const dof_id_type constraining_global_index_by_pid =
1324  _global_index_by_pid_map[constraining_elem->id()];
1325 
1326  graph_row.push_back(constraining_global_index_by_pid);
1327 
1328  // We can't be sure if the constraining element's owner sees
1329  // the assembly element, so to get a symmetric connectivity
1330  // graph we'll need to tell them about us to be safe.
1331  if (constraining_elem->processor_id() != mesh.processor_id())
1332  connections_to_push[constraining_elem->processor_id()].emplace_back
1333  (global_index_by_pid, constraining_global_index_by_pid);
1334  }
1335  }
1336 
1337  // Check for any constrained elements
1338  for (const auto & pr : as_range(elems_constrained_by.equal_range(elem)))
1339  {
1340  const Elem * constrained = pr.second;
1341  const dof_id_type constrained_global_index_by_pid =
1342  _global_index_by_pid_map[constrained->id()];
1343 
1344  graph_row.push_back(constrained_global_index_by_pid);
1345 
1346  // We can't be sure if the constrained element's owner sees
1347  // the assembly element, so to get a symmetric connectivity
1348  // graph we'll need to tell them about us to be safe.
1349  if (constrained->processor_id() != mesh.processor_id())
1350  connections_to_push[constrained->processor_id()].emplace_back
1351  (global_index_by_pid, constrained_global_index_by_pid);
1352  }
1353  }
1354 
1355  // Partitioners like Parmetis require a symmetric adjacency matrix,
1356  // but if we have an assembly element constrained by a spline
1357  // NodeElem owned by another processor, it's possible that that
1358  // processor doesn't see our assembly element. Let's push those
1359  // entries, to ensure they're counted on both sides.
1360  auto symmetrize_entries =
1361  [this, first_local_elem]
1362  (processor_id_type /*src_pid*/,
1363  const std::vector<std::pair<dof_id_type, dof_id_type>> & incoming_entries)
1364  {
1365  for (auto [i, j] : incoming_entries)
1366  {
1367  libmesh_assert_greater_equal(j, first_local_elem);
1368  const std::size_t jl = j - first_local_elem;
1369  libmesh_assert_less(jl, _dual_graph.size());
1370  std::vector<dof_id_type> & graph_row = _dual_graph[jl];
1371  if (std::find(graph_row.begin(), graph_row.end(), i) == graph_row.end())
1372  {
1373 // std::cerr << "Pushing back (" << j << ", " << i << ") from " << src_pid << std::endl;
1374  graph_row.push_back(i);
1375  }
1376  }
1377  };
1378 
1379  Parallel::push_parallel_vector_data(mesh.comm(),
1380  connections_to_push,
1381  symmetrize_entries);
1382 
1383  // *Now* we should have a symmetric adjacency matrix. Let's check
1384  // that, so any failures get caught before they e.g. confuse
1385  // Parmetis in hard-to-debug ways. That's a global communication so
1386  // we'll only do it in debug mode.
1387 #ifdef DEBUG
1388  auto n_proc = mesh.n_processors();
1389  std::vector<dof_id_type> first_local_index_on_proc(n_proc, 0);
1390  for (auto pid : make_range(1u,n_proc))
1391  first_local_index_on_proc[pid] = first_local_index_on_proc[pid-1] + _n_active_elem_on_proc[pid-1];
1392 
1393  libmesh_assert_equal_to(first_local_index_on_proc[mesh.processor_id()],
1394  first_local_elem);
1395 
1396  std::unordered_map<processor_id_type, std::vector<std::pair<dof_id_type, dof_id_type>>> entries_to_send;
1397  for (auto il : index_range(_dual_graph))
1398  {
1399  const std::vector<dof_id_type> & graph_row = _dual_graph[il];
1400 
1401  const auto i = il + first_local_elem;
1402 
1403  for (auto j : graph_row)
1404  {
1405  // Stupid graph rows aren't sorted yet...
1406  processor_id_type target_pid = 0;
1407  while (target_pid+1 < n_proc &&
1408  j >= first_local_index_on_proc[target_pid+1])
1409  ++target_pid;
1410 
1411  entries_to_send[target_pid].emplace_back(i,j);
1412  }
1413  }
1414 
1415  std::vector<std::tuple<processor_id_type, dof_id_type, dof_id_type>> bad_entries;
1416 
1417  auto check_incoming_entries =
1418  [this, first_local_elem, &bad_entries]
1419  (processor_id_type src_pid,
1420  const std::vector<std::pair<dof_id_type, dof_id_type>> & incoming_entries)
1421  {
1422  for (auto [i, j] : incoming_entries)
1423  {
1424  if (j < first_local_elem)
1425  {
1426  bad_entries.emplace_back(src_pid,i,j);
1427  continue;
1428  }
1429  const std::size_t jl = j - first_local_elem;
1430  if (jl >= _dual_graph.size())
1431  {
1432  bad_entries.emplace_back(src_pid,i,j);
1433  continue;
1434  }
1435  const std::vector<dof_id_type> & graph_row = _dual_graph[jl];
1436  if (std::find(graph_row.begin(), graph_row.end(), i) ==
1437  graph_row.end())
1438  {
1439  bad_entries.emplace_back(src_pid,i,j);
1440  continue;
1441  }
1442  }
1443  };
1444 
1445  // Keep any failures in sync in parallel, for easier debugging in
1446  // unit tests where the failure exception will be caught.
1447  Parallel::push_parallel_vector_data
1448  (mesh.comm(), entries_to_send, check_incoming_entries);
1449  bool bad_entries_exist = !bad_entries.empty();
1450  mesh.comm().max(bad_entries_exist);
1451  if (bad_entries_exist)
1452  {
1453 #if 0 // Optional verbosity for if this breaks again...
1454  if (!bad_entries.empty())
1455  {
1456  std::cerr << "Bad entries on processor " << mesh.processor_id() << ": ";
1457  for (auto [p, i, j] : bad_entries)
1458  std::cerr << '(' << p << ", " << i << ", " << j << "), ";
1459  std::cerr << std::endl;
1460  }
1461 #endif
1462  libmesh_error_msg("Asymmetric partitioner graph detected");
1463  }
1464 #endif
1465 }
1466 
1467 void Partitioner::assign_partitioning (MeshBase & mesh, const std::vector<dof_id_type> & parts)
1468 {
1469  LOG_SCOPE("assign_partitioning()", "Partitioner");
1470 
1471  // This function must be run on all processors at once
1472  libmesh_parallel_only(mesh.comm());
1473 
1474  dof_id_type first_local_elem = 0;
1475  for (auto pid : make_range(mesh.processor_id()))
1476  first_local_elem += _n_active_elem_on_proc[pid];
1477 
1478 #ifndef NDEBUG
1479  const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
1480 #endif
1481 
1482  std::map<processor_id_type, std::vector<dof_id_type>>
1483  requested_ids;
1484 
1485  // Results to gather from each processor - kept in a map so we
1486  // do only one loop over elements after all receives are done.
1487  std::map<processor_id_type, std::vector<processor_id_type>>
1488  filled_request;
1489 
1490  for (auto & elem : mesh.active_element_ptr_range())
1491  {
1492  // we need to get the index from the owning processor
1493  // (note we cannot assign it now -- we are iterating
1494  // over elements again and this will be bad!)
1495  requested_ids[elem->processor_id()].push_back(elem->id());
1496  }
1497 
1498  auto gather_functor =
1499  [this,
1500  & parts,
1501 #ifndef NDEBUG
1502  & mesh,
1503  n_active_local_elem,
1504 #endif
1505  first_local_elem]
1506  (processor_id_type, const std::vector<dof_id_type> & ids,
1507  std::vector<processor_id_type> & data)
1508  {
1509  const std::size_t ids_size = ids.size();
1510  data.resize(ids.size());
1511 
1512  for (std::size_t i=0; i != ids_size; i++)
1513  {
1514  const dof_id_type requested_elem_index = ids[i];
1515 
1516  libmesh_assert(_global_index_by_pid_map.count(requested_elem_index));
1517 
1518  const dof_id_type global_index_by_pid =
1519  _global_index_by_pid_map[requested_elem_index];
1520 
1521  const dof_id_type local_index =
1522  global_index_by_pid - first_local_elem;
1523 
1524  libmesh_assert_less (local_index, parts.size());
1525  libmesh_assert_less (local_index, n_active_local_elem);
1526 
1527  const processor_id_type elem_procid =
1528  cast_int<processor_id_type>(parts[local_index]);
1529 
1530  libmesh_assert_less (elem_procid, mesh.n_partitions());
1531 
1532  data[i] = elem_procid;
1533  }
1534  };
1535 
1536  auto action_functor =
1537  [&filled_request]
1538  (processor_id_type pid,
1539  const std::vector<dof_id_type> &,
1540  const std::vector<processor_id_type> & new_procids)
1541  {
1542  filled_request[pid] = new_procids;
1543  };
1544 
1545  // Trade requests with other processors
1546  const processor_id_type * ex = nullptr;
1547  Parallel::pull_parallel_vector_data
1548  (mesh.comm(), requested_ids, gather_functor, action_functor, ex);
1549 
1550  // and finally assign the partitioning.
1551  // note we are iterating in exactly the same order
1552  // used to build up the request, so we can expect the
1553  // required entries to be in the proper sequence.
1554  std::vector<unsigned int> counters(mesh.n_processors(), 0);
1555  for (auto & elem : mesh.active_element_ptr_range())
1556  {
1557  const processor_id_type current_pid = elem->processor_id();
1558 
1559  libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size());
1560 
1561  const processor_id_type elem_procid =
1562  filled_request[current_pid][counters[current_pid]++];
1563 
1564  libmesh_assert_less (elem_procid, mesh.n_partitions());
1565  elem->processor_id() = elem_procid;
1566  }
1567 }
1568 
1569 
1570 } // namespace libMesh
The definition of the element_iterator struct.
Definition: mesh_base.h:2198
void allgather(const T &send_data, std::vector< T, A > &recv_data) const
void find_global_indices(const Parallel::Communicator &communicator, const libMesh::BoundingBox &, const ForwardIterator &, const ForwardIterator &, std::vector< dof_id_type > &) const
This method determines a globally unique, partition-agnostic index for each object in the input range...
const Elem * parent() const
Definition: elem.h:3030
constraint_rows_type & get_constraint_rows()
Constraint rows accessors.
Definition: mesh_base.h:1703
static void set_interface_node_processor_ids_petscpartitioner(MeshBase &mesh)
Nodes on the partitioning interface is partitioned into two groups using a PETSc partitioner for each...
Definition: partitioner.C:738
virtual dof_id_type n_active_elem() const =0
A Node is like a Point, but with more information.
Definition: node.h:52
std::unordered_map< dof_id_type, dof_id_type > _global_index_by_pid_map
Maps active element ids into a contiguous range, as needed by parallel partitioner.
Definition: partitioner.h:286
void libmesh_assert_valid_remote_elems(const MeshBase &mesh)
A function for verifying that active local elements&#39; neighbors are never remote elements.
Definition: mesh_tools.C:1287
bool single_partition(MeshBase &mesh)
Trivially "partitions" the mesh for one processor.
Definition: partitioner.C:298
static void set_interface_node_processor_ids_BFS(MeshBase &mesh)
Nodes on the partitioning interface is clustered into two groups BFS (Breadth First Search)scheme for...
Definition: partitioner.C:657
dof_id_type n_elem(const MeshBase::const_element_iterator &begin, const MeshBase::const_element_iterator &end)
Count up the number of elements of a specific type (as defined by an iterator range).
Definition: mesh_tools.C:969
bool single_partition_range(MeshBase::element_iterator it, MeshBase::element_iterator end)
Slightly generalized version of single_partition which acts on a range of elements defined by the pai...
Definition: partitioner.C:320
libMesh::BoundingBox create_bounding_box(const MeshBase &mesh)
Definition: mesh_tools.C:559
static void set_node_processor_ids(MeshBase &mesh)
This function is called after partitioning to set the processor IDs for the nodes.
Definition: partitioner.C:852
void act_on_data(const std::vector< dof_id_type > &ids, const std::vector< datum > &local_ids)
Definition: partitioner.C:1060
static void set_interface_node_processor_ids_linear(MeshBase &mesh)
Nodes on the partitioning interface is linearly assigned to each pair of processors.
Definition: partitioner.C:632
This is the base class from which all geometric element types are derived.
Definition: elem.h:94
MeshBase & mesh
void find_nodal_neighbors(const MeshBase &mesh, const Node &n, const std::vector< std::vector< const Elem *>> &nodes_to_elem_map, std::vector< const Node *> &neighbors)
Given a mesh and a node in the mesh, the vector will be filled with every node directly attached to t...
Definition: mesh_tools.C:1036
const Parallel::Communicator & comm() const
void assign_partitioning(MeshBase &mesh, const std::vector< dof_id_type > &parts)
Assign the computed partitioning to the mesh.
Definition: partitioner.C:1467
void build_nodes_to_elem_map(const MeshBase &mesh, std::vector< std::vector< dof_id_type >> &nodes_to_elem_map)
After calling this function the input vector nodes_to_elem_map will contain the node to element conne...
Definition: mesh_tools.C:449
The libMesh namespace provides an interface to certain functionality in the library.
virtual void build_graph(const MeshBase &mesh)
Build a dual graph for partitioner.
Definition: partitioner.C:1112
Real distance(const Point &p)
std::unordered_map< dof_id_type, dof_id_type > map_type
Definition: partitioner.C:1045
processor_id_type choose_processor_id(processor_id_type pid1, processor_id_type pid2) const
Return which of pid1 and pid2 would be preferred by the current load-balancing heuristic applied to t...
Definition: node.C:78
std::vector< Elem * > _local_id_to_elem
Definition: partitioner.h:305
uint8_t processor_id_type
Definition: id_types.h:104
This is the MeshBase class.
Definition: mesh_base.h:75
SimpleRange< ChildRefIter > child_ref_range()
Returns a range with all children of a parent element, usable in range-based for loops.
Definition: elem.h:2346
virtual void _do_repartition(MeshBase &mesh, const unsigned int n)
This is the actual re-partitioning method which can be overridden in derived classes.
Definition: partitioner.h:251
void repartition(MeshBase &mesh, const unsigned int n)
Repartitions the MeshBase into n parts.
Definition: partitioner.C:263
uint8_t processor_id_type
PartitionerType
Defines an enum for mesh partitioner types.
dof_id_type n_active_local_elem() const
Definition: mesh_base.h:565
processor_id_type n_processors() const
virtual bool is_serial() const
Definition: mesh_base.h:211
void libmesh_ignore(const Args &...)
const dof_id_type n_nodes
Definition: tecplot_io.C:67
This is the MeshCommunication class.
static std::unique_ptr< Partitioner > build(const PartitionerType solver_package)
Builds a Partitioner of the type specified by partitioner_type.
Definition: partitioner.C:159
dof_id_type id() const
Definition: dof_object.h:828
void min(const T &r, T &o, Request &req) const
static const processor_id_type invalid_processor_id
An invalid processor_id to distinguish DoFs that have not been assigned to a processor.
Definition: dof_object.h:493
static void processor_pairs_to_interface_nodes(MeshBase &mesh, std::map< std::pair< processor_id_type, processor_id_type >, std::set< dof_id_type >> &processor_pair_to_nodes)
On the partitioning interface, a surface is shared by two and only two processors.
Definition: partitioner.C:579
void sync_node_data_by_element_id(MeshBase &mesh, const MeshBase::const_element_iterator &range_begin, const MeshBase::const_element_iterator &range_end, const ElemCheckFunctor &elem_check, const NodeCheckFunctor &node_check, SyncFunctor &sync)
Synchronize data about a range of ghost nodes uniquely identified by an element id and local node id...
SimpleRange< IndexType > as_range(const std::pair< IndexType, IndexType > &p)
Helper function that allows us to treat a homogenous pair as a range.
Definition: simple_range.h:57
unsigned int n_levels(const MeshBase &mesh)
Definition: mesh_tools.C:802
virtual dof_id_type max_elem_id() const =0
libmesh_assert(ctx)
void find_local_indices(const libMesh::BoundingBox &, const ForwardIterator &, const ForwardIterator &, std::unordered_map< dof_id_type, dof_id_type > &) const
This method determines a locally unique, contiguous index for each object in the input range...
static const dof_id_type invalid_id
An invalid id to distinguish an uninitialized DofObject.
Definition: dof_object.h:482
PetscErrorCode PetscInt const PetscInt IS * is
virtual void _find_global_index_by_pid_map(const MeshBase &mesh)
Construct contiguous global indices for the current partitioning.
Definition: partitioner.C:1068
void sync_dofobject_data_by_id(const Communicator &comm, const Iterator &range_begin, const Iterator &range_end, SyncFunctor &sync)
Request data about a range of ghost dofobjects uniquely identified by their id.
void invalidate_processor_id()
Sets the processor id to invalid_processor_id.
Definition: dof_object.h:780
virtual void _do_partition(MeshBase &mesh, const unsigned int n)=0
This is the actual partitioning method which must be overridden in derived classes.
std::string enum_to_string(const T e)
static void partition_unpartitioned_elements(MeshBase &mesh)
These functions assign processor IDs to newly-created elements (in parallel) which are currently assi...
Definition: partitioner.C:345
unsigned int n_partitions() const
Definition: mesh_base.h:1345
Defines a Cartesian bounding box by the two corner extremum.
Definition: bounding_box.h:40
void max(const T &r, T &o, Request &req) const
static const dof_id_type communication_blocksize
The blocksize to use when doing blocked parallel communication.
Definition: partitioner.h:258
virtual void partition(MeshBase &mesh, const unsigned int n)
Partitions the MeshBase into n parts by setting processor_id() on Nodes and Elems.
Definition: partitioner.C:196
void gather_data(const std::vector< dof_id_type > &ids, std::vector< datum > &local_ids) const
Definition: partitioner.C:1051
IntRange< T > make_range(T beg, T end)
The 2-parameter make_range() helper function returns an IntRange<T> when both input parameters are of...
Definition: int_range.h:140
virtual PartitionerType type() const
Definition: partitioner.C:152
virtual const Node & node_ref(const dof_id_type i) const
Definition: mesh_base.h:596
static void set_parent_processor_ids(MeshBase &mesh)
This function is called after partitioning to set the processor IDs for the inactive parent elements...
Definition: partitioner.C:426
bool on_command_line(std::string arg)
Definition: libmesh.C:987
void total_family_tree(std::vector< const Elem *> &family, bool reset=true) const
Same as the family_tree() member, but also adds any subactive descendants.
Definition: elem.C:2117
virtual void update_post_partitioning()
Recalculate any cached data after elements and nodes have been repartitioned.
Definition: mesh_base.h:1177
processor_id_type processor_id() const
virtual void redistribute()
Redistribute elements between processors.
Definition: mesh_base.C:985
processor_id_type processor_id() const
Definition: dof_object.h:905
std::vector< std::vector< dof_id_type > > _dual_graph
A dual graph corresponds to the mesh, and it is typically used in paritioner.
Definition: partitioner.h:302
unsigned int & set_n_partitions()
Definition: mesh_base.h:1865
auto index_range(const T &sizable)
Helper function that returns an IntRange<std::size_t> representing all the indices of the passed-in v...
Definition: int_range.h:117
std::vector< dof_id_type > _n_active_elem_on_proc
The number of active elements on each processor.
Definition: partitioner.h:295
SyncLocalIDs(map_type &_id_map)
Definition: partitioner.C:1047
uint8_t dof_id_type
Definition: id_types.h:67