doxygen/libmesh/partitioner_8C_source.html

 // The libMesh Finite Element Library.
 // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner

 // This library is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 2.1 of the License, or (at your option) any later version.

 // This library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // Lesser General Public License for more details.

 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 #include "libmesh/partitioner.h"

 // libMesh includes
 #include "libmesh/compare_elems_by_level.h"
 #include "libmesh/elem.h"
 #include "libmesh/enum_to_string.h"
 #include "libmesh/int_range.h"
 #include "libmesh/libmesh_logging.h"
 #include "libmesh/mesh_base.h"
 #include "libmesh/mesh_tools.h"
 #include "libmesh/mesh_communication.h"
 #include "libmesh/parallel_ghost_sync.h"
 #include "libmesh/wrapped_petsc.h"
 #include "libmesh/boundary_info.h"

 // Subclasses to build()
 #include "libmesh/enum_partitioner_type.h"
 #include "libmesh/centroid_partitioner.h"
 #include "libmesh/hilbert_sfc_partitioner.h"
 #include "libmesh/linear_partitioner.h"
 #include "libmesh/mapped_subdomain_partitioner.h"
 #include "libmesh/metis_partitioner.h"
 #include "libmesh/morton_sfc_partitioner.h"
 #include "libmesh/parmetis_partitioner.h"
 #include "libmesh/sfc_partitioner.h"
 #include "libmesh/subdomain_partitioner.h"

 // TIMPI includes
 #include "timpi/parallel_implementation.h"
 #include "timpi/parallel_sync.h"

 // C/C++ includes
 #ifdef LIBMESH_HAVE_PETSC
 #include "libmesh/ignore_warnings.h"
 #include "petscmat.h"
 #include "libmesh/restore_warnings.h"
 #include "libmesh/petsc_solver_exception.h"
 #endif


 namespace {

 using namespace libMesh;

 struct CorrectProcIds
 {
   typedef processor_id_type datum;

   CorrectProcIds(MeshBase & _mesh,
                  std::unordered_set<dof_id_type> & _bad_pids) :
     mesh(_mesh), bad_pids(_bad_pids) {}

   MeshBase & mesh;
   std::unordered_set<dof_id_type> & bad_pids;

   // ------------------------------------------------------------
   void gather_data (const std::vector<dof_id_type> & ids,
                     std::vector<datum> & data)
   {
     // Find the processor id of each requested node
     data.resize(ids.size());

     for (auto i : index_range(ids))
       {
         // Look for this point in the mesh and make sure we have a
         // good pid for it before sending that on
         if (ids[i] != DofObject::invalid_id &&
             !bad_pids.count(ids[i]))
           {
             Node & node = mesh.node_ref(ids[i]);

             // Return the node's correct processor id,
             data[i] = node.processor_id();
           }
         else
           data[i] = DofObject::invalid_processor_id;
       }
   }

   // ------------------------------------------------------------
   bool act_on_data (const std::vector<dof_id_type> & ids,
                     const std::vector<datum> proc_ids)
   {
     bool data_changed = false;

     // Set the ghost node processor ids we've now been informed of
     for (auto i : index_range(ids))
       {
         Node & node = mesh.node_ref(ids[i]);

         processor_id_type & proc_id = node.processor_id();
         const processor_id_type new_proc_id = proc_ids[i];

         if (const auto it = bad_pids.find(ids[i]);
             (new_proc_id != proc_id || it != bad_pids.end()) &&
             new_proc_id != DofObject::invalid_processor_id)
           {
             if (it != bad_pids.end())
               {
                 proc_id = new_proc_id;
                 bad_pids.erase(it);
               }
             else
               proc_id = node.choose_processor_id(proc_id, new_proc_id);

             if (proc_id == new_proc_id)
               data_changed = true;
           }
       }

     return data_changed;
   }
 };

 }

 namespace libMesh
 {


 // ------------------------------------------------------------
 // Partitioner static data
 const dof_id_type Partitioner::communication_blocksize =
   dof_id_type(1000000);


 // ------------------------------------------------------------
 // Partitioner implementation


 PartitionerType Partitioner::type() const
 {
   return INVALID_PARTITIONER;
 }


 std::unique_ptr<Partitioner>
 Partitioner::build (const PartitionerType partitioner_type)
 {
   switch (partitioner_type)
   {
     case CENTROID_PARTITIONER:
       return std::make_unique<CentroidPartitioner>();
     case LINEAR_PARTITIONER:
       return std::make_unique<LinearPartitioner>();
     case MAPPED_SUBDOMAIN_PARTITIONER:
       return std::make_unique<MappedSubdomainPartitioner>();
     case METIS_PARTITIONER:
       return std::make_unique<MetisPartitioner>();
     case PARMETIS_PARTITIONER:
       return std::make_unique<ParmetisPartitioner>();
     case HILBERT_SFC_PARTITIONER:
       return std::make_unique<HilbertSFCPartitioner>();
     case MORTON_SFC_PARTITIONER:
       return std::make_unique<MortonSFCPartitioner>();
     case SFC_PARTITIONER:
       return std::make_unique<SFCPartitioner>();
     case SUBDOMAIN_PARTITIONER:
       return std::make_unique<SubdomainPartitioner>();
     default:
       libmesh_error_msg("Invalid partitioner type: " <<
                         Utility::enum_to_string(partitioner_type));
   }
 }


 void Partitioner::partition (MeshBase & mesh)
 {
   this->partition(mesh,mesh.n_processors());
 }


 void Partitioner::partition (MeshBase & mesh,
                              const unsigned int n)
 {
   libmesh_parallel_only(mesh.comm());

   // BSK - temporary fix while redistribution is integrated 6/26/2008
   // Uncomment this to not repartition in parallel
   //   if (!mesh.is_serial())
   //     return;

   // we cannot partition into more pieces than we have
   // active elements!
   const unsigned int n_parts =
     static_cast<unsigned int>
     (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));

   // Set the number of partitions in the mesh
   mesh.set_n_partitions()=n_parts;

   if (n_parts == 1)
     {
       this->single_partition (mesh);
       return;
     }

   // First assign a temporary partitioning to any unpartitioned elements
   Partitioner::partition_unpartitioned_elements(mesh, n_parts);

   // Call the partitioning function
   this->_do_partition(mesh,n_parts);

   // Set the parent's processor ids
   Partitioner::set_parent_processor_ids(mesh);

   // Redistribute elements if necessary, before setting node processor
   // ids, to make sure those will be set consistently
   mesh.redistribute();

 #ifdef DEBUG
   MeshTools::libmesh_assert_valid_remote_elems(mesh);

   // Messed up elem processor_id()s can leave us without the child
   // elements we need to restrict vectors on a distributed mesh
   MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
 #endif

   // Set the node's processor ids
   Partitioner::set_node_processor_ids(mesh);

 #ifdef DEBUG
   MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
 #endif

   // Give derived Mesh classes a chance to update any cached data to
   // reflect the new partitioning
   mesh.update_post_partitioning();
 }


 void Partitioner::repartition (MeshBase & mesh)
 {
   this->repartition(mesh,mesh.n_processors());
 }


 void Partitioner::repartition (MeshBase & mesh,
                                const unsigned int n)
 {
   // we cannot partition into more pieces than we have
   // active elements!
   const unsigned int n_parts =
     static_cast<unsigned int>
     (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));

   // Set the number of partitions in the mesh
   mesh.set_n_partitions()=n_parts;

   if (n_parts == 1)
     {
       this->single_partition (mesh);
       return;
     }

   // First assign a temporary partitioning to any unpartitioned elements
   Partitioner::partition_unpartitioned_elements(mesh, n_parts);

   // Call the partitioning function
   this->_do_repartition(mesh,n_parts);

   // Set the parent's processor ids
   Partitioner::set_parent_processor_ids(mesh);

   // Set the node's processor ids
   Partitioner::set_node_processor_ids(mesh);
 }


 bool Partitioner::single_partition (MeshBase & mesh)
 {
   bool changed_pid =
     this->single_partition_range(mesh.elements_begin(),
                                  mesh.elements_end());

   // If we have a distributed mesh with an empty rank (or where rank
   // 0 has only its own component of a disconnected mesh, I guess),
   // that rank might need to be informed of a change.
   mesh.comm().max(changed_pid);

   // We may need to redistribute, in case someone (like our unit
   // tests) is doing something silly (like moving a whole
   // already-distributed mesh back onto rank 0).
   if (changed_pid)
     mesh.redistribute();

   return changed_pid;
 }


 bool Partitioner::single_partition_range (MeshBase::element_iterator it,
                                           MeshBase::element_iterator end)
 {
   LOG_SCOPE("single_partition_range()", "Partitioner");

   bool changed_pid = false;

   for (auto & elem : as_range(it, end))
     {
       if (elem->processor_id())
         changed_pid = true;
       elem->processor_id() = 0;

       // Assign all this element's nodes to processor 0 as well.
       for (Node & node : elem->node_ref_range())
         {
           if (node.processor_id())
             changed_pid = true;
           node.processor_id() = 0;
         }
     }

   return changed_pid;
 }

 void Partitioner::partition_unpartitioned_elements (MeshBase & mesh)
 {
   Partitioner::partition_unpartitioned_elements(mesh, mesh.n_processors());
 }


 void Partitioner::partition_unpartitioned_elements (MeshBase & mesh,
                                                     const unsigned int n_subdomains)
 {
   MeshBase::element_iterator       it  = mesh.unpartitioned_elements_begin();
   const MeshBase::element_iterator end = mesh.unpartitioned_elements_end();

   const dof_id_type n_unpartitioned_elements = MeshTools::n_elem (it, end);

   // the unpartitioned elements must exist on all processors. If the range is empty on one
   // it is empty on all, and we can quit right here.
   if (!n_unpartitioned_elements)
     return;

   // find the target subdomain sizes
   std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());

   for (auto pid : make_range(mesh.n_processors()))
     {
       dof_id_type tgt_subdomain_size = 0;

       // watch out for the case that n_subdomains < n_processors
       if (pid < n_subdomains)
         {
           tgt_subdomain_size = n_unpartitioned_elements/n_subdomains;

           if (pid < n_unpartitioned_elements%n_subdomains)
             tgt_subdomain_size++;

         }

       //libMesh::out << "pid, #= " << pid << ", " << tgt_subdomain_size << std::endl;
       if (pid == 0)
         subdomain_bounds[0] = tgt_subdomain_size;
       else
         subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
     }

   libmesh_assert_equal_to (subdomain_bounds.back(), n_unpartitioned_elements);

   // create the unique mapping for all unpartitioned elements independent of partitioning
   // determine the global indexing for all the unpartitioned elements
   std::vector<dof_id_type> global_indices;

   // Calling this on all processors a unique range in [0,n_unpartitioned_elements) is constructed.
   // Only the indices for the elements we pass in are returned in the array.
   MeshCommunication().find_global_indices (mesh.comm(),
                                            MeshTools::create_bounding_box(mesh), it, end,
                                            global_indices);

   dof_id_type cnt=0;
   for (auto & elem : as_range(it, end))
     {
       libmesh_assert_less (cnt, global_indices.size());
       const dof_id_type global_index =
         global_indices[cnt++];

       libmesh_assert_less (global_index, subdomain_bounds.back());
       libmesh_assert_less (global_index, n_unpartitioned_elements);

       const processor_id_type subdomain_id =
         cast_int<processor_id_type>
         (std::distance(subdomain_bounds.begin(),
                        std::upper_bound(subdomain_bounds.begin(),
                                         subdomain_bounds.end(),
                                         global_index)));
       libmesh_assert_less (subdomain_id, n_subdomains);

       elem->processor_id() = subdomain_id;
       //libMesh::out << "assigning " << global_index << " to " << subdomain_id << std::endl;
     }
 }


 void Partitioner::set_parent_processor_ids(MeshBase & mesh)
 {
   // Ignore the parameter when !LIBMESH_ENABLE_AMR
   libmesh_ignore(mesh);

   LOG_SCOPE("set_parent_processor_ids()", "Partitioner");

 #ifdef LIBMESH_ENABLE_AMR

   // If the mesh is serial we have access to all the elements,
   // in particular all the active ones.  We can therefore set
   // the parent processor ids indirectly through their children, and
   // set the subactive processor ids while examining their active
   // ancestors.
   // By convention a parent is assigned to the minimum processor
   // of all its children, and a subactive is assigned to the processor
   // of its active ancestor.
   if (mesh.is_serial())
     {
       for (auto & elem : mesh.active_element_ptr_range())
         {
           // First set descendents
           std::vector<Elem *> subactive_family;
           elem->total_family_tree(subactive_family);
           for (const auto & f : subactive_family)
             f->processor_id() = elem->processor_id();

           // Then set ancestors
           Elem * parent = elem->parent();

           while (parent)
             {
               // invalidate the parent id, otherwise the min below
               // will not work if the current parent id is less
               // than all the children!
               parent->invalidate_processor_id();

               for (auto & child : parent->child_ref_range())
                 {
                   libmesh_assert(!child.is_remote());
                   libmesh_assert_not_equal_to (child.processor_id(), DofObject::invalid_processor_id);
                   parent->processor_id() = std::min(parent->processor_id(),
                                                     child.processor_id());
                 }
               parent = parent->parent();
             }
         }
     }

   // When the mesh is parallel we cannot guarantee that parents have access to
   // all their children.
   else
     {
       // Setting subactive processor ids is easy: we can guarantee
       // that children have access to all their parents.

       // Loop over all the active elements in the mesh
       for (auto & child : mesh.active_element_ptr_range())
         {
           std::vector<Elem *> subactive_family;
           child->total_family_tree(subactive_family);
           for (const auto & f : subactive_family)
             f->processor_id() = child->processor_id();
         }

       // When the mesh is parallel we cannot guarantee that parents have access to
       // all their children.

       // We will use a brute-force approach here.  Each processor finds its parent
       // elements and sets the parent pid to the minimum of its
       // semilocal descendants.
       // A global reduction is then performed to make sure the true minimum is found.
       // As noted, this is required because we cannot guarantee that a parent has
       // access to all its children on any single processor.
       libmesh_parallel_only(mesh.comm());
       libmesh_assert(MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
                                        mesh.unpartitioned_elements_end()) == 0);

       const dof_id_type max_elem_id = mesh.max_elem_id();

       std::vector<processor_id_type>
         parent_processor_ids (std::min(communication_blocksize,
                                        max_elem_id));

       for (dof_id_type blk=0, last_elem_id=0; last_elem_id<max_elem_id; blk++)
         {
           last_elem_id =
             std::min(static_cast<dof_id_type>((blk+1)*communication_blocksize),
                      max_elem_id);
           const dof_id_type first_elem_id = blk*communication_blocksize;

           std::fill (parent_processor_ids.begin(),
                      parent_processor_ids.end(),
                      DofObject::invalid_processor_id);

           // first build up local contributions to parent_processor_ids
           bool have_parent_in_block = false;

           for (auto & parent : as_range(mesh.ancestor_elements_begin(),
                                         mesh.ancestor_elements_end()))
             {
               const dof_id_type parent_idx = parent->id();
               libmesh_assert_less (parent_idx, max_elem_id);

               if ((parent_idx >= first_elem_id) &&
                   (parent_idx <  last_elem_id))
                 {
                   have_parent_in_block = true;
                   processor_id_type parent_pid = DofObject::invalid_processor_id;

                   std::vector<const Elem *> active_family;
                   parent->active_family_tree(active_family);
                   for (const auto & f : active_family)
                     parent_pid = std::min (parent_pid, f->processor_id());

                   const dof_id_type packed_idx = parent_idx - first_elem_id;
                   libmesh_assert_less (packed_idx, parent_processor_ids.size());

                   parent_processor_ids[packed_idx] = parent_pid;
                 }
             }

           // then find the global minimum
           mesh.comm().min (parent_processor_ids);

           // and assign the ids, if we have a parent in this block.
           if (have_parent_in_block)
             for (auto & parent : as_range(mesh.ancestor_elements_begin(),
                                           mesh.ancestor_elements_end()))
               {
                 const dof_id_type parent_idx = parent->id();

                 if ((parent_idx >= first_elem_id) &&
                     (parent_idx <  last_elem_id))
                   {
                     const dof_id_type packed_idx = parent_idx - first_elem_id;
                     libmesh_assert_less (packed_idx, parent_processor_ids.size());

                     const processor_id_type parent_pid =
                       parent_processor_ids[packed_idx];

                     libmesh_assert_not_equal_to (parent_pid, DofObject::invalid_processor_id);

                     parent->processor_id() = parent_pid;
                   }
               }
         }
     }

 #endif // LIBMESH_ENABLE_AMR
 }

 void
 Partitioner::processor_pairs_to_interface_nodes(MeshBase & mesh,
                                                 std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> & processor_pair_to_nodes)
 {
   // This function must be run on all processors at once
   libmesh_parallel_only(mesh.comm());

   processor_pair_to_nodes.clear();

   std::set<dof_id_type> mynodes;
   std::set<dof_id_type> neighbor_nodes;
   std::vector<dof_id_type> common_nodes;

   // Loop over all the active elements
   for (auto & elem : mesh.active_element_ptr_range())
     {
       libmesh_assert(elem);

       libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);

       auto n_nodes = elem->n_nodes();

       // prepare data for this element
       mynodes.clear();
       neighbor_nodes.clear();
       common_nodes.clear();

       for (unsigned int inode = 0; inode < n_nodes; inode++)
         mynodes.insert(elem->node_id(inode));

       for (auto i : elem->side_index_range())
         {
           auto neigh = elem->neighbor_ptr(i);
           if (neigh && !neigh->is_remote() && neigh->processor_id() != elem->processor_id())
             {
               neighbor_nodes.clear();
               common_nodes.clear();
               auto neigh_n_nodes = neigh->n_nodes();
               for (unsigned int inode = 0; inode < neigh_n_nodes; inode++)
                 neighbor_nodes.insert(neigh->node_id(inode));

               std::set_intersection(mynodes.begin(), mynodes.end(),
                                     neighbor_nodes.begin(), neighbor_nodes.end(),
                                     std::back_inserter(common_nodes));

               auto & map_set = processor_pair_to_nodes[std::make_pair(std::min(elem->processor_id(), neigh->processor_id()),
                                                                       std::max(elem->processor_id(), neigh->processor_id()))];
               for (auto global_node_id : common_nodes)
                 map_set.insert(global_node_id);
             }
         }
     }
 }

 void Partitioner::set_interface_node_processor_ids_linear(MeshBase & mesh)
 {
   // This function must be run on all processors at once
   libmesh_parallel_only(mesh.comm());

   std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;

   processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);

   for (auto & pmap : processor_pair_to_nodes)
     {
       std::size_t n_own_nodes = pmap.second.size()/2, i = 0;

       for (dof_id_type id : pmap.second)
         {
           auto & node = mesh.node_ref(id);
           if (i <= n_own_nodes)
             node.processor_id() = pmap.first.first;
           else
             node.processor_id() = pmap.first.second;
           i++;
         }
     }
 }

 void Partitioner::set_interface_node_processor_ids_BFS(MeshBase & mesh)
 {
   // This function must be run on all processors at once
   libmesh_parallel_only(mesh.comm());

   // I see occasional consistency failures when using this on a
   // distributed mesh
   libmesh_experimental();

   std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;

   processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);

   std::unordered_map<dof_id_type, std::vector<const Elem *>> nodes_to_elem_map;

   MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);

   std::vector<const Node *>  neighbors;
   std::set<dof_id_type> neighbors_order;
   std::vector<dof_id_type> common_nodes;
   std::queue<dof_id_type> nodes_queue;
   std::set<dof_id_type> visted_nodes;

   for (auto & pmap : processor_pair_to_nodes)
     {
       std::size_t n_own_nodes = pmap.second.size()/2;

       // Initialize node assignment
       for (dof_id_type id : pmap.second)
         mesh.node_ref(id).processor_id() = pmap.first.second;

       visted_nodes.clear();
       for (dof_id_type id : pmap.second)
         {
           mesh.node_ref(id).processor_id() = pmap.first.second;

           if (visted_nodes.count(id))
             continue;
           else
             {
               nodes_queue.push(id);
               visted_nodes.insert(id);
               if (visted_nodes.size() >= n_own_nodes)
                 break;
             }

           while (!nodes_queue.empty())
             {
               auto & node = mesh.node_ref(nodes_queue.front());
               nodes_queue.pop();

               neighbors.clear();
               MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
               neighbors_order.clear();
               for (auto & neighbor : neighbors)
                 neighbors_order.insert(neighbor->id());

               common_nodes.clear();
               std::set_intersection(pmap.second.begin(), pmap.second.end(),
                                     neighbors_order.begin(), neighbors_order.end(),
                                     std::back_inserter(common_nodes));

               for (auto c_node : common_nodes)
                 if (!visted_nodes.count(c_node))
                   {
                     nodes_queue.push(c_node);
                     visted_nodes.insert(c_node);
                     if (visted_nodes.size() >= n_own_nodes)
                       goto queue_done;
                   }

               if (visted_nodes.size() >= n_own_nodes)
                 goto queue_done;
             }
         }
     queue_done:
       for (auto node : visted_nodes)
         mesh.node_ref(node).processor_id() = pmap.first.first;
     }
 }

 void Partitioner::set_interface_node_processor_ids_petscpartitioner(MeshBase & mesh)
 {
   libmesh_ignore(mesh); // Only used if LIBMESH_HAVE_PETSC

   // This function must be run on all processors at once
   libmesh_parallel_only(mesh.comm());

 #if LIBMESH_HAVE_PETSC
   std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;

   processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);

   std::vector<std::vector<const Elem *>> nodes_to_elem_map;

   MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);

   std::vector<const Node *>  neighbors;
   std::set<dof_id_type> neighbors_order;
   std::vector<dof_id_type> common_nodes;

   std::vector<dof_id_type> rows;
   std::vector<dof_id_type> cols;

   std::map<dof_id_type, dof_id_type> global_to_local;

   for (auto & pmap : processor_pair_to_nodes)
     {
       unsigned int i = 0;

       rows.clear();
       rows.resize(pmap.second.size()+1);
       cols.clear();
       for (dof_id_type id : pmap.second)
         global_to_local[id] = i++;

       i = 0;
       for (auto id : pmap.second)
         {
           auto & node = mesh.node_ref(id);
           neighbors.clear();
           MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
           neighbors_order.clear();
           for (auto & neighbor : neighbors)
             neighbors_order.insert(neighbor->id());

           common_nodes.clear();
           std::set_intersection(pmap.second.begin(), pmap.second.end(),
                                 neighbors_order.begin(), neighbors_order.end(),
                                 std::back_inserter(common_nodes));

           rows[i+1] = rows[i] + cast_int<dof_id_type>(common_nodes.size());

           for (auto c_node : common_nodes)
             cols.push_back(global_to_local[c_node]);

           i++;
         }

       // Next we construct an IS from a MatPartitioning
       WrappedPetsc<IS> is;
       {
         PetscInt *adj_i, *adj_j;
         LibmeshPetscCall2(mesh.comm(), PetscCalloc1(rows.size(), &adj_i));
         LibmeshPetscCall2(mesh.comm(), PetscCalloc1(cols.size(), &adj_j));
         PetscInt rows_size = cast_int<PetscInt>(rows.size());
         for (PetscInt ii=0; ii<rows_size; ii++)
           adj_i[ii] = rows[ii];

         PetscInt cols_size = cast_int<PetscInt>(cols.size());
         for (PetscInt ii=0; ii<cols_size; ii++)
           adj_j[ii] = cols[ii];

         const PetscInt sz = cast_int<PetscInt>(pmap.second.size());

         // Create sparse matrix representing an adjacency list
         WrappedPetsc<Mat> adj;
         LibmeshPetscCall2(mesh.comm(), MatCreateMPIAdj(PETSC_COMM_SELF, sz, sz, adj_i, adj_j, nullptr, adj.get()));

         // Create MatPartitioning object
         WrappedPetsc<MatPartitioning> part;
         LibmeshPetscCall2(mesh.comm(), MatPartitioningCreate(PETSC_COMM_SELF, part.get()));

         // Apply MatPartitioning, storing results in "is"
         LibmeshPetscCall2(mesh.comm(), MatPartitioningSetAdjacency(part, adj));
         LibmeshPetscCall2(mesh.comm(), MatPartitioningSetNParts(part, 2));
         LibmeshPetscCall2(mesh.comm(), PetscObjectSetOptionsPrefix((PetscObject)(*part), "balance_"));
         LibmeshPetscCall2(mesh.comm(), MatPartitioningSetFromOptions(part));
         LibmeshPetscCall2(mesh.comm(), MatPartitioningApply(part, is.get()));
       }

       PetscInt local_size;
       const PetscInt *indices;
       LibmeshPetscCall2(mesh.comm(), ISGetLocalSize(is, &local_size));
       LibmeshPetscCall2(mesh.comm(), ISGetIndices(is, &indices));

       i = 0;
       for (auto id : pmap.second)
         {
           auto & node = mesh.node_ref(id);
           if (indices[i])
             node.processor_id() = pmap.first.second;
           else
             node.processor_id() = pmap.first.first;

           i++;
         }
       LibmeshPetscCall2(mesh.comm(), ISRestoreIndices(is, &indices));
     }
 #else
   libmesh_error_msg("PETSc is required");
 #endif
 }


 void Partitioner::set_node_processor_ids(MeshBase & mesh)
 {
   LOG_SCOPE("set_node_processor_ids()","Partitioner");

   // This function must be run on all processors at once
   libmesh_parallel_only(mesh.comm());

   // If we have any unpartitioned elements at this
   // stage there is a problem
   libmesh_assert (MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
                                     mesh.unpartitioned_elements_end()) == 0);

   // Start from scratch here: nodes we used to own may not be
   // eligible for us to own any more.
   for (auto & node : mesh.node_ptr_range())
     {
       node->processor_id() = DofObject::invalid_processor_id;
     }

   // Loop over all the active elements
   for (auto & elem : mesh.active_element_ptr_range())
     {
       libmesh_assert(elem);

       libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);

       // Consider updating the processor id on this element's nodes
       for (Node & node : elem->node_ref_range())
         {
           processor_id_type & pid = node.processor_id();
           pid = node.choose_processor_id(pid, elem->processor_id());
         }
     }

   // How we finish off the node partitioning depends on our command
   // line options.

   const bool load_balanced_nodes_linear =
       libMesh::on_command_line ("--load-balanced-nodes-linear");

   const bool load_balanced_nodes_bfs =
        libMesh::on_command_line ("--load-balanced-nodes-bfs");

   const bool load_balanced_nodes_petscpartition =
       libMesh::on_command_line ("--load-balanced-nodes-petscpartitioner");

   unsigned int n_load_balance_options = load_balanced_nodes_linear;
   n_load_balance_options += load_balanced_nodes_bfs;
   n_load_balance_options += load_balanced_nodes_petscpartition;
   libmesh_error_msg_if(n_load_balance_options > 1,
                        "Cannot perform more than one load balancing type at a time");

   if (load_balanced_nodes_linear)
     set_interface_node_processor_ids_linear(mesh);
   else if (load_balanced_nodes_bfs)
     set_interface_node_processor_ids_BFS(mesh);
   else if (load_balanced_nodes_petscpartition)
     set_interface_node_processor_ids_petscpartitioner(mesh);

    // Node balancing algorithm will response to assign owned nodes.
    // We still need to sync PIDs
   {
     // For inactive elements, we will have already gotten most of
     // these nodes, *except* for the case of a parent with a subset
     // of active descendants which are remote elements.  In that
     // case some of the parent nodes will not have been properly
     // handled yet on our processor.
     //
     // We don't want to inadvertently give one of them an incorrect
     // processor id, but if we're not in serial then we have to
     // assign them temporary pids to make querying work, so we'll
     // save our *valid* pids before assigning temporaries.
     //
     // Even in serial we'll want to check and make sure we're not
     // overwriting valid active node pids with pids from subactive
     // elements.
     std::unordered_set<dof_id_type> bad_pids;

     for (auto & node : mesh.node_ptr_range())
       if (node->processor_id() == DofObject::invalid_processor_id)
         bad_pids.insert(node->id());

     // If we assign our temporary ids by looping from finer elements
     // to coarser elements, we'll always get an id from the finest
     // ghost element we can see, which will usually be "closer" to
     // the true processor we want to query and so will reduce query
     // cycles that don't reach that processor.

     // But we can still end up with a query cycle that dead-ends, so
     // we need to prepare a "push" communication step here.

     const bool is_serial = mesh.is_serial();
     std::unordered_map
       <processor_id_type,
        std::unordered_map<dof_id_type, processor_id_type>>
       potential_pids;

     const unsigned int n_levels = MeshTools::n_levels(mesh);
     for (unsigned int level = n_levels; level > 0; --level)
       {
         for (auto & elem : as_range(mesh.level_elements_begin(level-1),
                                     mesh.level_elements_end(level-1)))
           {
             libmesh_assert_not_equal_to (elem->processor_id(),
                                          DofObject::invalid_processor_id);

             const processor_id_type elem_pid = elem->processor_id();

             // Consider updating the processor id on this element's nodes
             for (Node & node : elem->node_ref_range())
               {
                 processor_id_type & pid = node.processor_id();
                 if (bad_pids.count(node.id()))
                   pid = node.choose_processor_id(pid, elem_pid);
                 else if (!is_serial)
                   potential_pids[elem_pid][node.id()] = pid;
               }
           }
       }

     if (!is_serial)
       {
         std::unordered_map
           <processor_id_type,
            std::vector<std::pair<dof_id_type, processor_id_type>>>
           potential_pids_vecs;

         for (auto & pair : potential_pids)
           potential_pids_vecs[pair.first].assign(pair.second.begin(), pair.second.end());

         auto pids_action_functor =
           [& mesh, & bad_pids]
           (processor_id_type /* src_pid */,
            const std::vector<std::pair<dof_id_type, processor_id_type>> & data)
           {
             for (auto pair : data)
               {
                 Node & node = mesh.node_ref(pair.first);
                 processor_id_type & pid = node.processor_id();
                 if (const auto it = bad_pids.find(pair.first);
                     it != bad_pids.end())
                   {
                     pid = pair.second;
                     bad_pids.erase(it);
                   }
                 else
                   pid = node.choose_processor_id(pid, pair.second);
               }
           };

         Parallel::push_parallel_vector_data
           (mesh.comm(), potential_pids_vecs, pids_action_functor);

         // Using default libMesh options, we'll just need to sync
         // between processors now.  The catch here is that we can't
         // initially trust Node::choose_processor_id() because some
         // of those node processor ids are the temporary ones.
         CorrectProcIds correct_pids(mesh, bad_pids);
         Parallel::sync_node_data_by_element_id
           (mesh, mesh.elements_begin(), mesh.elements_end(),
            Parallel::SyncEverything(), Parallel::SyncEverything(),
            correct_pids);

         // But once we've got all the non-temporary pids synced, we
         // may need to sync again to get any pids on nodes only
         // connected to subactive elements, for which *only*
         // "temporary" pids are possible.
         bad_pids.clear();
         Parallel::sync_node_data_by_element_id
           (mesh,
            mesh.elements_begin(), mesh.elements_end(),
            Parallel::SyncEverything(), Parallel::SyncEverything(),
            correct_pids);
       }
   }

   // We can't assert that all nodes are connected to elements, because
   // a DistributedMesh with NodeConstraints might have pulled in some
   // remote nodes solely for evaluating those constraints.
   // MeshTools::libmesh_assert_connected_nodes(mesh);

 #ifdef DEBUG
   MeshTools::libmesh_assert_valid_procids<Node>(mesh);
   //MeshTools::libmesh_assert_canonical_node_procids(mesh);
 #endif
 }


 struct SyncLocalIDs
 {
   typedef dof_id_type datum;

   typedef std::unordered_map<dof_id_type, dof_id_type> map_type;

   SyncLocalIDs(map_type & _id_map) : id_map(_id_map) {}

   map_type & id_map;

   void gather_data (const std::vector<dof_id_type> & ids,
                     std::vector<datum> & local_ids) const
   {
     local_ids.resize(ids.size());

     for (auto i : index_range(ids))
       local_ids[i] = id_map[ids[i]];
   }

   void act_on_data (const std::vector<dof_id_type> & ids,
                     const std::vector<datum> & local_ids)
   {
     for (auto i : index_range(local_ids))
       id_map[ids[i]] = local_ids[i];
   }
 };

 void Partitioner::_find_global_index_by_pid_map(const MeshBase & mesh)
 {
   const dof_id_type n_active_local_elem = mesh.n_active_local_elem();

   // Find the number of active elements on each processor.  We cannot use
   // mesh.n_active_elem_on_proc(pid) since that only returns the number of
   // elements assigned to pid which are currently stored on the calling
   // processor. This will not in general be correct for parallel meshes
   // when (pid!=mesh.processor_id()).
   auto n_proc = mesh.n_processors();
   _n_active_elem_on_proc.resize(n_proc);
   mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc);

   std::vector<dof_id_type> n_active_elem_before_proc(mesh.n_processors());

   for (auto i : make_range(n_proc-1))
     n_active_elem_before_proc[i+1] =
       n_active_elem_before_proc[i] + _n_active_elem_on_proc[i];

   libMesh::BoundingBox bbox =
     MeshTools::create_bounding_box(mesh);

   _global_index_by_pid_map.clear();

   // create the mapping which is contiguous by processor
   MeshCommunication().find_local_indices (bbox,
                                           mesh.active_local_elements_begin(),
                                           mesh.active_local_elements_end(),
                                           _global_index_by_pid_map);

   SyncLocalIDs sync(_global_index_by_pid_map);

   Parallel::sync_dofobject_data_by_id
       (mesh.comm(), mesh.active_elements_begin(), mesh.active_elements_end(), sync);

   for (const auto & elem : mesh.active_element_ptr_range())
     {
       const processor_id_type pid = elem->processor_id();
       libmesh_assert_less (_global_index_by_pid_map[elem->id()], _n_active_elem_on_proc[pid]);

       _global_index_by_pid_map[elem->id()] += n_active_elem_before_proc[pid];
     }
 }

 void Partitioner::build_graph (const MeshBase & mesh)
 {
   LOG_SCOPE("build_graph()", "Partitioner");

   const dof_id_type n_active_local_elem  = mesh.n_active_local_elem();

   // If we have boundary elements in this mesh, we want to account for
   // the connectivity between them and interior elements.  We can find
   // interior elements from boundary elements, but we need to build up
   // a lookup map to do the reverse.
   typedef std::unordered_multimap<const Elem *, const Elem *> map_type;
   map_type interior_to_boundary_map;

   // If we have spline nodes in this mesh, we want to account for the
   // connectivity between them and integration elements.  We can find
   // spline nodes from integration elements, but need a reverse map
   // {integration_elements} = elems_constrained_by[spline_nodeelem]
   map_type elems_constrained_by;

   const auto & mesh_constrained_nodes = mesh.get_constraint_rows();

   for (const Elem * elem : mesh.active_element_ptr_range())
     {
       if (!mesh_constrained_nodes.empty()) // quick test for non-IGA cases
         {
           const auto end_it = mesh_constrained_nodes.end();

           // Use a set to avoid duplicates.  Use a well-defined method
           // of ordering that set to make debugging easier.
           std::set<const Elem *, CompareElemIdsByLevel> constraining_elems;
           for (const Node & node : elem->node_ref_range())
             {
               if (const auto row_it = mesh_constrained_nodes.find(&node);
                   row_it != end_it)
                 for (const auto & [pr, coef] : row_it->second)
                   {
                     libmesh_ignore(coef); // avoid gcc 7 warning
                     constraining_elems.insert(pr.first);
                   }
             }
           for (const Elem * constraining_elem : constraining_elems)
             elems_constrained_by.emplace(constraining_elem, elem);
         }

       // If we don't have an interior_parent and we don't have any
       // constrained nodes then there's nothing else to look up.
       if (elem->interior_parent())
         {
           // get all relevant interior elements
           std::set<const Elem *> neighbor_set;
           elem->find_interior_neighbors(neighbor_set);

           for (const auto & neighbor : neighbor_set)
             interior_to_boundary_map.emplace(neighbor, elem);
         }
     }

 #ifdef LIBMESH_ENABLE_AMR
   std::vector<const Elem *> neighbors_offspring;
 #endif

   // This is costly, and we only need to do it if the mesh has
   // changed since we last partitioned... but the mesh probably has
   // changed since we last partitioned, and if it hasn't we don't
   // have a reliable way to be sure of that.
   _find_global_index_by_pid_map(mesh);

   dof_id_type first_local_elem = 0;
   for (auto pid : make_range(mesh.processor_id()))
      first_local_elem += _n_active_elem_on_proc[pid];

   _dual_graph.clear();
   _dual_graph.resize(n_active_local_elem);
   _local_id_to_elem.resize(n_active_local_elem);

   // We may need to communicate constraint-row-based connections
   // between processors
   std::unordered_map<processor_id_type,
     std::vector<std::pair<dof_id_type, dof_id_type>>> connections_to_push;

   for (const auto & elem : mesh.active_local_element_ptr_range())
     {
       libmesh_assert (_global_index_by_pid_map.count(elem->id()));
       const dof_id_type global_index_by_pid =
         _global_index_by_pid_map[elem->id()];

       const dof_id_type local_index =
         global_index_by_pid - first_local_elem;
       libmesh_assert_less (local_index, n_active_local_elem);

       std::vector<dof_id_type> & graph_row = _dual_graph[local_index];

       // Save this off to make it easy to index later
       _local_id_to_elem[local_index] = const_cast<Elem*>(elem);

       // Loop over the element's neighbors.  An element
       // adjacency corresponds to a face neighbor
       for (auto neighbor : elem->neighbor_ptr_range())
         {
           if (neighbor != nullptr)
             {
               // If the neighbor is active treat it
               // as a connection
               if (neighbor->active())
                 {
                   libmesh_assert(_global_index_by_pid_map.count(neighbor->id()));
                   const dof_id_type neighbor_global_index_by_pid =
                     _global_index_by_pid_map[neighbor->id()];

                   graph_row.push_back(neighbor_global_index_by_pid);
                 }

 #ifdef LIBMESH_ENABLE_AMR

               // Otherwise we need to find all of the
               // neighbor's children that are connected to
               // us and add them
               else
                 {
                   // The side of the neighbor to which
                   // we are connected
                   const unsigned int ns =
                     neighbor->which_neighbor_am_i (elem);
                   libmesh_assert_less (ns, neighbor->n_neighbors());

                   // Get all the active children (& grandchildren, etc...)
                   // of the neighbor

                   // FIXME - this is the wrong thing, since we
                   // should be getting the active family tree on
                   // our side only.  But adding too many graph
                   // links may cause hanging nodes to tend to be
                   // on partition interiors, which would reduce
                   // communication overhead for constraint
                   // equations, so we'll leave it.

                   neighbor->active_family_tree (neighbors_offspring);

                   // Get all the neighbor's children that
                   // live on that side and are thus connected
                   // to us
                   for (const auto & child : neighbors_offspring)
                     {
                       // This does not assume a level-1 mesh.
                       // Note that since children have sides numbered
                       // coincident with the parent then this is a sufficient test.
                       if (child->neighbor_ptr(ns) == elem)
                         {
                           libmesh_assert (child->active());
                           libmesh_assert (_global_index_by_pid_map.count(child->id()));
                           const dof_id_type child_global_index_by_pid =
                             _global_index_by_pid_map[child->id()];

                           graph_row.push_back(child_global_index_by_pid);
                         }
                     }
                 }

 #endif /* ifdef LIBMESH_ENABLE_AMR */


             }
         }

       if ((elem->dim() < LIBMESH_DIM) &&
           elem->interior_parent())
         {
           // get all relevant interior elements
           std::set<const Elem *> neighbor_set;
           elem->find_interior_neighbors(neighbor_set);

           for (const auto & neighbor : neighbor_set)
             {
               const dof_id_type neighbor_global_index_by_pid =
                 _global_index_by_pid_map[neighbor->id()];

               graph_row.push_back(neighbor_global_index_by_pid);
             }
         }

       // Check for any boundary neighbors
       for (const auto & pr : as_range(interior_to_boundary_map.equal_range(elem)))
         {
           const Elem * neighbor = pr.second;

           const dof_id_type neighbor_global_index_by_pid =
             _global_index_by_pid_map[neighbor->id()];

           graph_row.push_back(neighbor_global_index_by_pid);
         }

       // Check for any constraining elements
       if (!mesh_constrained_nodes.empty()) // quick test for non-IGA cases
         {
           const auto end_it = mesh_constrained_nodes.end();

           // Use a set to avoid duplicates.  Use a well-defined method
           // of ordering that set to make debugging easier.
           std::set<const Elem *, CompareElemIdsByLevel> constraining_elems;
           for (const Node & node : elem->node_ref_range())
             {
               if (const auto row_it = mesh_constrained_nodes.find(&node);
                   row_it != end_it)
                 for (const auto & [pr, coef] : row_it->second)
                   {
                     libmesh_ignore(coef); // avoid gcc 7 warning
                     constraining_elems.insert(pr.first);
                   }
             }
           for (const Elem * constraining_elem : constraining_elems)
             {
               const dof_id_type constraining_global_index_by_pid =
                 _global_index_by_pid_map[constraining_elem->id()];

               graph_row.push_back(constraining_global_index_by_pid);

               // We can't be sure if the constraining element's owner sees
               // the assembly element, so to get a symmetric connectivity
               // graph we'll need to tell them about us to be safe.
               if (constraining_elem->processor_id() != mesh.processor_id())
                 connections_to_push[constraining_elem->processor_id()].emplace_back
                   (global_index_by_pid, constraining_global_index_by_pid);
             }
         }

       // Check for any constrained elements
       for (const auto & pr : as_range(elems_constrained_by.equal_range(elem)))
         {
           const Elem * constrained = pr.second;
           const dof_id_type constrained_global_index_by_pid =
             _global_index_by_pid_map[constrained->id()];

           graph_row.push_back(constrained_global_index_by_pid);

           // We can't be sure if the constrained element's owner sees
           // the assembly element, so to get a symmetric connectivity
           // graph we'll need to tell them about us to be safe.
           if (constrained->processor_id() != mesh.processor_id())
             connections_to_push[constrained->processor_id()].emplace_back
               (global_index_by_pid, constrained_global_index_by_pid);
         }
     }

   // Partitioners like Parmetis require a symmetric adjacency matrix,
   // but if we have an assembly element constrained by a spline
   // NodeElem owned by another processor, it's possible that that
   // processor doesn't see our assembly element.  Let's push those
   // entries, to ensure they're counted on both sides.
   auto symmetrize_entries =
     [this, first_local_elem]
     (processor_id_type /*src_pid*/,
      const std::vector<std::pair<dof_id_type, dof_id_type>> & incoming_entries)
     {
       for (auto [i, j] : incoming_entries)
         {
           libmesh_assert_greater_equal(j, first_local_elem);
           const std::size_t jl = j - first_local_elem;
           libmesh_assert_less(jl, _dual_graph.size());
           std::vector<dof_id_type> & graph_row = _dual_graph[jl];
           if (std::find(graph_row.begin(), graph_row.end(), i) == graph_row.end())
           {
 //            std::cerr << "Pushing back (" << j << ", " << i << ") from " << src_pid << std::endl;
             graph_row.push_back(i);
           }
         }
     };

   Parallel::push_parallel_vector_data(mesh.comm(),
                                       connections_to_push,
                                       symmetrize_entries);

   // *Now* we should have a symmetric adjacency matrix.  Let's check
   // that, so any failures get caught before they e.g. confuse
   // Parmetis in hard-to-debug ways.  That's a global communication so
   // we'll only do it in debug mode.
 #ifdef DEBUG
   auto n_proc = mesh.n_processors();
   std::vector<dof_id_type> first_local_index_on_proc(n_proc, 0);
   for (auto pid : make_range(1u,n_proc))
     first_local_index_on_proc[pid] = first_local_index_on_proc[pid-1] + _n_active_elem_on_proc[pid-1];

   libmesh_assert_equal_to(first_local_index_on_proc[mesh.processor_id()],
                           first_local_elem);

   std::unordered_map<processor_id_type, std::vector<std::pair<dof_id_type, dof_id_type>>> entries_to_send;
   for (auto il : index_range(_dual_graph))
     {
       const std::vector<dof_id_type> & graph_row = _dual_graph[il];

       const auto i = il + first_local_elem;

       for (auto j : graph_row)
         {
           // Stupid graph rows aren't sorted yet...
           processor_id_type target_pid = 0;
           while (target_pid+1 < n_proc &&
                  j >= first_local_index_on_proc[target_pid+1])
             ++target_pid;

           entries_to_send[target_pid].emplace_back(i,j);
         }
     }

   std::vector<std::tuple<processor_id_type, dof_id_type, dof_id_type>> bad_entries;

   auto check_incoming_entries =
     [this, first_local_elem, &bad_entries]
     (processor_id_type src_pid,
      const std::vector<std::pair<dof_id_type, dof_id_type>> & incoming_entries)
     {
       for (auto [i, j] : incoming_entries)
         {
           if (j < first_local_elem)
             {
               bad_entries.emplace_back(src_pid,i,j);
               continue;
             }
           const std::size_t jl = j - first_local_elem;
           if (jl >= _dual_graph.size())
             {
               bad_entries.emplace_back(src_pid,i,j);
               continue;
             }
           const std::vector<dof_id_type> & graph_row = _dual_graph[jl];
           if (std::find(graph_row.begin(), graph_row.end(), i) ==
               graph_row.end())
             {
               bad_entries.emplace_back(src_pid,i,j);
               continue;
             }
         }
     };

   // Keep any failures in sync in parallel, for easier debugging in
   // unit tests where the failure exception will be caught.
   Parallel::push_parallel_vector_data
     (mesh.comm(), entries_to_send, check_incoming_entries);
   bool bad_entries_exist = !bad_entries.empty();
   mesh.comm().max(bad_entries_exist);
   if (bad_entries_exist)
     {
 #if 0  // Optional verbosity for if this breaks again...
       if (!bad_entries.empty())
         {
           std::cerr << "Bad entries on processor " << mesh.processor_id() << ": ";
           for (auto [p, i, j] : bad_entries)
             std::cerr << '(' << p << ", " << i << ", " << j << "), ";
           std::cerr << std::endl;
         }
 #endif
       libmesh_error_msg("Asymmetric partitioner graph detected");
     }
 #endif
 }

 void Partitioner::assign_partitioning (MeshBase & mesh, const std::vector<dof_id_type> & parts)
 {
   LOG_SCOPE("assign_partitioning()", "Partitioner");

   // This function must be run on all processors at once
   libmesh_parallel_only(mesh.comm());

   dof_id_type first_local_elem = 0;
   for (auto pid : make_range(mesh.processor_id()))
     first_local_elem += _n_active_elem_on_proc[pid];

 #ifndef NDEBUG
   const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
 #endif

   std::map<processor_id_type, std::vector<dof_id_type>>
     requested_ids;

   // Results to gather from each processor - kept in a map so we
   // do only one loop over elements after all receives are done.
   std::map<processor_id_type, std::vector<processor_id_type>>
     filled_request;

   for (auto & elem : mesh.active_element_ptr_range())
     {
       // we need to get the index from the owning processor
       // (note we cannot assign it now -- we are iterating
       // over elements again and this will be bad!)
       requested_ids[elem->processor_id()].push_back(elem->id());
     }

   auto gather_functor =
     [this,
      & parts,
 #ifndef NDEBUG
      & mesh,
      n_active_local_elem,
 #endif
      first_local_elem]
     (processor_id_type, const std::vector<dof_id_type> & ids,
      std::vector<processor_id_type> & data)
     {
       const std::size_t ids_size = ids.size();
       data.resize(ids.size());

       for (std::size_t i=0; i != ids_size; i++)
         {
           const dof_id_type requested_elem_index = ids[i];

           libmesh_assert(_global_index_by_pid_map.count(requested_elem_index));

           const dof_id_type global_index_by_pid =
             _global_index_by_pid_map[requested_elem_index];

           const dof_id_type local_index =
             global_index_by_pid - first_local_elem;

           libmesh_assert_less (local_index, parts.size());
           libmesh_assert_less (local_index, n_active_local_elem);

           const processor_id_type elem_procid =
             cast_int<processor_id_type>(parts[local_index]);

           libmesh_assert_less (elem_procid, mesh.n_partitions());

           data[i] = elem_procid;
         }
     };

   auto action_functor =
     [&filled_request]
     (processor_id_type pid,
      const std::vector<dof_id_type> &,
      const std::vector<processor_id_type> & new_procids)
     {
       filled_request[pid] = new_procids;
     };

   // Trade requests with other processors
   const processor_id_type * ex = nullptr;
   Parallel::pull_parallel_vector_data
     (mesh.comm(), requested_ids, gather_functor, action_functor, ex);

   // and finally assign the partitioning.
   // note we are iterating in exactly the same order
   // used to build up the request, so we can expect the
   // required entries to be in the proper sequence.
   std::vector<unsigned int> counters(mesh.n_processors(), 0);
   for (auto & elem : mesh.active_element_ptr_range())
     {
       const processor_id_type current_pid = elem->processor_id();

       libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size());

       const processor_id_type elem_procid =
         filled_request[current_pid][counters[current_pid]++];

       libmesh_assert_less (elem_procid, mesh.n_partitions());
       elem->processor_id() = elem_procid;
     }
 }


 } // namespace libMesh
libMesh::MeshBase::element_iterator
The definition of the element_iterator struct.
Definition: mesh_base.h:2200

TIMPI::Communicator::allgather
void allgather(const T &send_data, std::vector< T, A > &recv_data) const

libMesh::MeshCommunication::find_global_indices
void find_global_indices(const Parallel::Communicator &communicator, const libMesh::BoundingBox &, const ForwardIterator &, const ForwardIterator &, std::vector< dof_id_type > &) const
This method determines a globally unique, partition-agnostic index for each object in the input range...
Definition: mesh_communication_global_indices.C:748

libMesh::INVALID_PARTITIONER
Definition: enum_partitioner_type.h:44

libMesh::SFC_PARTITIONER
Definition: enum_partitioner_type.h:36

libMesh::Elem::parent
const Elem * parent() const
Definition: elem.h:3031

libMesh::DofObject::invalid_processor_id
static constexpr processor_id_type invalid_processor_id
An invalid processor_id to distinguish DoFs that have not been assigned to a processor.
Definition: dof_object.h:493

libMesh::MeshBase::get_constraint_rows
constraint_rows_type & get_constraint_rows()
Constraint rows accessors.
Definition: mesh_base.h:1709

libMesh::Partitioner::set_interface_node_processor_ids_petscpartitioner
static void set_interface_node_processor_ids_petscpartitioner(MeshBase &mesh)
Nodes on the partitioning interface is partitioned into two groups using a PETSc partitioner for each...
Definition: partitioner.C:738

libMesh::MeshBase::n_active_elem
virtual dof_id_type n_active_elem() const =0

libMesh::Node
A Node is like a Point, but with more information.
Definition: node.h:52

libMesh::Partitioner::_global_index_by_pid_map
std::unordered_map< dof_id_type, dof_id_type > _global_index_by_pid_map
Maps active element ids into a contiguous range, as needed by parallel partitioner.
Definition: partitioner.h:286

libMesh::SUBDOMAIN_PARTITIONER
Definition: enum_partitioner_type.h:41

libMesh::MeshTools::libmesh_assert_valid_remote_elems
void libmesh_assert_valid_remote_elems(const MeshBase &mesh)
A function for verifying that active local elements&#39; neighbors are never remote elements.
Definition: mesh_tools.C:1294

libMesh::Partitioner::single_partition
bool single_partition(MeshBase &mesh)
Trivially "partitions" the mesh for one processor.
Definition: partitioner.C:298

libMesh::Partitioner::set_interface_node_processor_ids_BFS
static void set_interface_node_processor_ids_BFS(MeshBase &mesh)
Nodes on the partitioning interface is clustered into two groups BFS (Breadth First Search)scheme for...
Definition: partitioner.C:657

libMesh::MeshTools::n_elem
dof_id_type n_elem(const MeshBase::const_element_iterator &begin, const MeshBase::const_element_iterator &end)
Count up the number of elements of a specific type (as defined by an iterator range).
Definition: mesh_tools.C:976

libMesh::Partitioner::single_partition_range
bool single_partition_range(MeshBase::element_iterator it, MeshBase::element_iterator end)
Slightly generalized version of single_partition which acts on a range of elements defined by the pai...
Definition: partitioner.C:320

libMesh::MeshTools::create_bounding_box
libMesh::BoundingBox create_bounding_box(const MeshBase &mesh)
Definition: mesh_tools.C:566

libMesh::Partitioner::set_node_processor_ids
static void set_node_processor_ids(MeshBase &mesh)
This function is called after partitioning to set the processor IDs for the nodes.
Definition: partitioner.C:852

libMesh::SyncLocalIDs::act_on_data
void act_on_data(const std::vector< dof_id_type > &ids, const std::vector< datum > &local_ids)
Definition: partitioner.C:1060

libMesh::Partitioner::set_interface_node_processor_ids_linear
static void set_interface_node_processor_ids_linear(MeshBase &mesh)
Nodes on the partitioning interface is linearly assigned to each pair of processors.
Definition: partitioner.C:632

libMesh::Elem
This is the base class from which all geometric element types are derived.
Definition: elem.h:94

mesh
MeshBase & mesh
Definition: mesh_communication.C:1476

libMesh::MeshTools::find_nodal_neighbors
void find_nodal_neighbors(const MeshBase &mesh, const Node &n, const std::vector< std::vector< const Elem *>> &nodes_to_elem_map, std::vector< const Node *> &neighbors)
Given a mesh and a node in the mesh, the vector will be filled with every node directly attached to t...
Definition: mesh_tools.C:1043

libMesh::ParallelObject::comm
const Parallel::Communicator & comm() const
Definition: parallel_object.h:97

libMesh::Partitioner::assign_partitioning
void assign_partitioning(MeshBase &mesh, const std::vector< dof_id_type > &parts)
Assign the computed partitioning to the mesh.
Definition: partitioner.C:1467

libMesh::MeshTools::build_nodes_to_elem_map
void build_nodes_to_elem_map(const MeshBase &mesh, std::vector< std::vector< dof_id_type >> &nodes_to_elem_map)
After calling this function the input vector nodes_to_elem_map will contain the node to element conne...
Definition: mesh_tools.C:456

parallel_implementation.h

libMesh
The libMesh namespace provides an interface to certain functionality in the library.

libMesh::SyncLocalIDs
Definition: partitioner.C:1041

libMesh::Partitioner::build_graph
virtual void build_graph(const MeshBase &mesh)
Build a dual graph for partitioner.
Definition: partitioner.C:1112

distance
Real distance(const Point &p)
Definition: subdomains_ex3.C:51

libMesh::SyncLocalIDs::map_type
std::unordered_map< dof_id_type, dof_id_type > map_type
Definition: partitioner.C:1045

libMesh::Node::choose_processor_id
processor_id_type choose_processor_id(processor_id_type pid1, processor_id_type pid2) const
Return which of pid1 and pid2 would be preferred by the current load-balancing heuristic applied to t...
Definition: node.C:78

libMesh::Partitioner::_local_id_to_elem
std::vector< Elem * > _local_id_to_elem
Definition: partitioner.h:305

libMesh::processor_id_type
uint8_t processor_id_type
Definition: id_types.h:104

libMesh::MeshBase
This is the MeshBase class.
Definition: mesh_base.h:75

libMesh::Elem::child_ref_range
SimpleRange< ChildRefIter > child_ref_range()
Returns a range with all children of a parent element, usable in range-based for loops.
Definition: elem.h:2347

libMesh::Partitioner::_do_repartition
virtual void _do_repartition(MeshBase &mesh, const unsigned int n)
This is the actual re-partitioning method which can be overridden in derived classes.
Definition: partitioner.h:251

libMesh::CENTROID_PARTITIONER
Definition: enum_partitioner_type.h:34

libMesh::Partitioner::repartition
void repartition(MeshBase &mesh, const unsigned int n)
Repartitions the MeshBase into n parts.
Definition: partitioner.C:263

processor_id_type
uint8_t processor_id_type

libMesh::PartitionerType
PartitionerType
Defines an enum for mesh partitioner types.
Definition: enum_partitioner_type.h:33

libMesh::MeshBase::n_active_local_elem
dof_id_type n_active_local_elem() const
Definition: mesh_base.h:565

libMesh::ParallelObject::n_processors
processor_id_type n_processors() const
Definition: parallel_object.h:103

libMesh::MeshBase::is_serial
virtual bool is_serial() const
Definition: mesh_base.h:211

libMesh::libmesh_ignore
void libmesh_ignore(const Args &...)
Definition: libmesh_common.h:515

libMesh::SyncLocalIDs::id_map
map_type & id_map
Definition: partitioner.C:1049

n_nodes
const dof_id_type n_nodes
Definition: tecplot_io.C:67

libMesh::MeshCommunication
This is the MeshCommunication class.
Definition: mesh_communication.h:53

libMesh::Partitioner::build
static std::unique_ptr< Partitioner > build(const PartitionerType solver_package)
Builds a Partitioner of the type specified by partitioner_type.
Definition: partitioner.C:159

libMesh::DofObject::id
dof_id_type id() const
Definition: dof_object.h:828

TIMPI::Communicator::min
void min(const T &r, T &o, Request &req) const

libMesh::DofObject::invalid_id
static constexpr dof_id_type invalid_id
An invalid id to distinguish an uninitialized DofObject.
Definition: dof_object.h:482

libMesh::Partitioner::processor_pairs_to_interface_nodes
static void processor_pairs_to_interface_nodes(MeshBase &mesh, std::map< std::pair< processor_id_type, processor_id_type >, std::set< dof_id_type >> &processor_pair_to_nodes)
On the partitioning interface, a surface is shared by two and only two processors.
Definition: partitioner.C:579

libMesh::Parallel::sync_node_data_by_element_id
void sync_node_data_by_element_id(MeshBase &mesh, const MeshBase::const_element_iterator &range_begin, const MeshBase::const_element_iterator &range_end, const ElemCheckFunctor &elem_check, const NodeCheckFunctor &node_check, SyncFunctor &sync)
Synchronize data about a range of ghost nodes uniquely identified by an element id and local node id...
Definition: parallel_ghost_sync.h:780

libMesh::Parallel::SyncEverything
Definition: parallel_ghost_sync.h:225

libMesh::as_range
SimpleRange< IndexType > as_range(const std::pair< IndexType, IndexType > &p)
Helper function that allows us to treat a homogenous pair as a range.
Definition: simple_range.h:57

libMesh::MeshTools::n_levels
unsigned int n_levels(const MeshBase &mesh)
Definition: mesh_tools.C:809

libMesh::WrappedPetsc::get
T * get()
Definition: wrapped_petsc.h:117

libMesh::MeshBase::max_elem_id
virtual dof_id_type max_elem_id() const =0

libMesh::libmesh_assert
libmesh_assert(ctx)

libMesh::MeshCommunication::find_local_indices
void find_local_indices(const libMesh::BoundingBox &, const ForwardIterator &, const ForwardIterator &, std::unordered_map< dof_id_type, dof_id_type > &) const
This method determines a locally unique, contiguous index for each object in the input range...
Definition: mesh_communication_global_indices.C:710

libMesh::SyncLocalIDs::datum
dof_id_type datum
Definition: partitioner.C:1043

libMesh::is
PetscErrorCode PetscInt const PetscInt IS * is
Definition: petsc_dm_wrapper.C:62

libMesh::Partitioner::_find_global_index_by_pid_map
virtual void _find_global_index_by_pid_map(const MeshBase &mesh)
Construct contiguous global indices for the current partitioning.
Definition: partitioner.C:1068

libMesh::Parallel::sync_dofobject_data_by_id
void sync_dofobject_data_by_id(const Communicator &comm, const Iterator &range_begin, const Iterator &range_end, SyncFunctor &sync)
Request data about a range of ghost dofobjects uniquely identified by their id.
Definition: parallel_ghost_sync.h:357

libMesh::DofObject::invalidate_processor_id
void invalidate_processor_id()
Sets the processor id to invalid_processor_id.
Definition: dof_object.h:780

libMesh::MORTON_SFC_PARTITIONER
Definition: enum_partitioner_type.h:38

libMesh::Partitioner::_do_partition
virtual void _do_partition(MeshBase &mesh, const unsigned int n)=0
This is the actual partitioning method which must be overridden in derived classes.

libMesh::Utility::enum_to_string
std::string enum_to_string(const T e)

libMesh::Partitioner::partition_unpartitioned_elements
static void partition_unpartitioned_elements(MeshBase &mesh)
These functions assign processor IDs to newly-created elements (in parallel) which are currently assi...
Definition: partitioner.C:345

libMesh::MeshBase::n_partitions
unsigned int n_partitions() const
Definition: mesh_base.h:1351

libMesh::BoundingBox
Defines a Cartesian bounding box by the two corner extremum.
Definition: bounding_box.h:40

libMesh::LINEAR_PARTITIONER
Definition: enum_partitioner_type.h:35

TIMPI::Communicator::max
void max(const T &r, T &o, Request &req) const

libMesh::METIS_PARTITIONER
Definition: enum_partitioner_type.h:39

libMesh::Partitioner::communication_blocksize
static const dof_id_type communication_blocksize
The blocksize to use when doing blocked parallel communication.
Definition: partitioner.h:258

libMesh::WrappedPetsc< IS >

libMesh::Partitioner::partition
virtual void partition(MeshBase &mesh, const unsigned int n)
Partitions the MeshBase into n parts by setting processor_id() on Nodes and Elems.
Definition: partitioner.C:196

libMesh::SyncLocalIDs::gather_data
void gather_data(const std::vector< dof_id_type > &ids, std::vector< datum > &local_ids) const
Definition: partitioner.C:1051

libMesh::make_range
IntRange< T > make_range(T beg, T end)
The 2-parameter make_range() helper function returns an IntRange<T> when both input parameters are of...
Definition: int_range.h:140

libMesh::Partitioner::type
virtual PartitionerType type() const
Definition: partitioner.C:152

libMesh::HILBERT_SFC_PARTITIONER
Definition: enum_partitioner_type.h:37

libMesh::PARMETIS_PARTITIONER
Definition: enum_partitioner_type.h:40

libMesh::MeshBase::node_ref
virtual const Node & node_ref(const dof_id_type i) const
Definition: mesh_base.h:596

libMesh::Partitioner::set_parent_processor_ids
static void set_parent_processor_ids(MeshBase &mesh)
This function is called after partitioning to set the processor IDs for the inactive parent elements...
Definition: partitioner.C:426

libMesh::on_command_line
bool on_command_line(std::string arg)
Definition: libmesh.C:1058

libMesh::Elem::total_family_tree
void total_family_tree(std::vector< const Elem *> &family, bool reset=true) const
Same as the family_tree() member, but also adds any subactive descendants.
Definition: elem.C:2115

libMesh::MAPPED_SUBDOMAIN_PARTITIONER
Definition: enum_partitioner_type.h:42

libMesh::MeshBase::update_post_partitioning
virtual void update_post_partitioning()
Recalculate any cached data after elements and nodes have been repartitioned.
Definition: mesh_base.h:1177

libMesh::ParallelObject::processor_id
processor_id_type processor_id() const
Definition: parallel_object.h:114

libMesh::MeshBase::redistribute
virtual void redistribute()
Redistribute elements between processors.
Definition: mesh_base.C:1022

libMesh::DofObject::processor_id
processor_id_type processor_id() const
Definition: dof_object.h:905

parallel_sync.h

libMesh::Partitioner::_dual_graph
std::vector< std::vector< dof_id_type > > _dual_graph
A dual graph corresponds to the mesh, and it is typically used in paritioner.
Definition: partitioner.h:302

libMesh::MeshBase::set_n_partitions
unsigned int & set_n_partitions()
Definition: mesh_base.h:1867

libMesh::index_range
auto index_range(const T &sizable)
Helper function that returns an IntRange<std::size_t> representing all the indices of the passed-in v...
Definition: int_range.h:117

libMesh::Partitioner::_n_active_elem_on_proc
std::vector< dof_id_type > _n_active_elem_on_proc
The number of active elements on each processor.
Definition: partitioner.h:295

libMesh::SyncLocalIDs::SyncLocalIDs
SyncLocalIDs(map_type &_id_map)
Definition: partitioner.C:1047

libMesh::dof_id_type
uint8_t dof_id_type
Definition: id_types.h:67