LCOV - code coverage report
Current view: top level - src/partitioning - partitioner.C (source / functions) Hit Total Coverage
Test: libMesh/libmesh: #4229 (6a9aeb) with base 727f46 Lines: 434 646 67.2 %
Date: 2025-08-19 19:27:09 Functions: 31 43 72.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // The libMesh Finite Element Library.
       2             : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
       3             : 
       4             : // This library is free software; you can redistribute it and/or
       5             : // modify it under the terms of the GNU Lesser General Public
       6             : // License as published by the Free Software Foundation; either
       7             : // version 2.1 of the License, or (at your option) any later version.
       8             : 
       9             : // This library is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             : // Lesser General Public License for more details.
      13             : 
      14             : // You should have received a copy of the GNU Lesser General Public
      15             : // License along with this library; if not, write to the Free Software
      16             : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      17             : 
      18             : 
      19             : 
      20             : #include "libmesh/partitioner.h"
      21             : 
      22             : // libMesh includes
      23             : #include "libmesh/compare_elems_by_level.h"
      24             : #include "libmesh/elem.h"
      25             : #include "libmesh/enum_to_string.h"
      26             : #include "libmesh/int_range.h"
      27             : #include "libmesh/libmesh_logging.h"
      28             : #include "libmesh/mesh_base.h"
      29             : #include "libmesh/mesh_tools.h"
      30             : #include "libmesh/mesh_communication.h"
      31             : #include "libmesh/parallel_ghost_sync.h"
      32             : #include "libmesh/wrapped_petsc.h"
      33             : #include "libmesh/boundary_info.h"
      34             : 
      35             : // Subclasses to build()
      36             : #include "libmesh/enum_partitioner_type.h"
      37             : #include "libmesh/centroid_partitioner.h"
      38             : #include "libmesh/hilbert_sfc_partitioner.h"
      39             : #include "libmesh/linear_partitioner.h"
      40             : #include "libmesh/mapped_subdomain_partitioner.h"
      41             : #include "libmesh/metis_partitioner.h"
      42             : #include "libmesh/morton_sfc_partitioner.h"
      43             : #include "libmesh/parmetis_partitioner.h"
      44             : #include "libmesh/sfc_partitioner.h"
      45             : #include "libmesh/subdomain_partitioner.h"
      46             : 
      47             : // TIMPI includes
      48             : #include "timpi/parallel_implementation.h"
      49             : #include "timpi/parallel_sync.h"
      50             : 
      51             : // C/C++ includes
      52             : #ifdef LIBMESH_HAVE_PETSC
      53             : #include "libmesh/ignore_warnings.h"
      54             : #include "petscmat.h"
      55             : #include "libmesh/restore_warnings.h"
      56             : #include "libmesh/petsc_solver_exception.h"
      57             : #endif
      58             : 
      59             : 
      60             : namespace {
      61             : 
      62             : using namespace libMesh;
      63             : 
      64             : struct CorrectProcIds
      65             : {
      66             :   typedef processor_id_type datum;
      67             : 
      68         130 :   CorrectProcIds(MeshBase & _mesh,
      69       70944 :                  std::unordered_set<dof_id_type> & _bad_pids) :
      70       70944 :     mesh(_mesh), bad_pids(_bad_pids) {}
      71             : 
      72             :   MeshBase & mesh;
      73             :   std::unordered_set<dof_id_type> & bad_pids;
      74             : 
      75             :   // ------------------------------------------------------------
      76     1208638 :   void gather_data (const std::vector<dof_id_type> & ids,
      77             :                     std::vector<datum> & data)
      78             :   {
      79             :     // Find the processor id of each requested node
      80     1208638 :     data.resize(ids.size());
      81             : 
      82   117284566 :     for (auto i : index_range(ids))
      83             :       {
      84             :         // Look for this point in the mesh and make sure we have a
      85             :         // good pid for it before sending that on
      86   116150026 :         if (ids[i] != DofObject::invalid_id &&
      87   116060653 :             !bad_pids.count(ids[i]))
      88             :           {
      89   116040863 :             Node & node = mesh.node_ref(ids[i]);
      90             : 
      91             :             // Return the node's correct processor id,
      92   116040863 :             data[i] = node.processor_id();
      93             :           }
      94             :         else
      95       35065 :           data[i] = DofObject::invalid_processor_id;
      96             :       }
      97     1208638 :   }
      98             : 
      99             :   // ------------------------------------------------------------
     100     1208638 :   bool act_on_data (const std::vector<dof_id_type> & ids,
     101             :                     const std::vector<datum> proc_ids)
     102             :   {
     103         384 :     bool data_changed = false;
     104             : 
     105             :     // Set the ghost node processor ids we've now been informed of
     106   117284566 :     for (auto i : index_range(ids))
     107             :       {
     108   116075928 :         Node & node = mesh.node_ref(ids[i]);
     109             : 
     110       74098 :         processor_id_type & proc_id = node.processor_id();
     111   116075928 :         const processor_id_type new_proc_id = proc_ids[i];
     112             : 
     113   116075928 :         if (const auto it = bad_pids.find(ids[i]);
     114   116075928 :             (new_proc_id != proc_id || it != bad_pids.end()) &&
     115             :             new_proc_id != DofObject::invalid_processor_id)
     116             :           {
     117      514923 :             if (it != bad_pids.end())
     118             :               {
     119      253468 :                 proc_id = new_proc_id;
     120           0 :                 bad_pids.erase(it);
     121             :               }
     122             :             else
     123      261455 :               proc_id = node.choose_processor_id(proc_id, new_proc_id);
     124             : 
     125      514923 :             if (proc_id == new_proc_id)
     126           0 :               data_changed = true;
     127             :           }
     128             :       }
     129             : 
     130     1208638 :     return data_changed;
     131             :   }
     132             : };
     133             : 
     134             : }
     135             : 
     136             : namespace libMesh
     137             : {
     138             : 
     139             : 
     140             : 
     141             : // ------------------------------------------------------------
     142             : // Partitioner static data
     143             : const dof_id_type Partitioner::communication_blocksize =
     144             :   dof_id_type(1000000);
     145             : 
     146             : 
     147             : 
     148             : // ------------------------------------------------------------
     149             : // Partitioner implementation
     150             : 
     151             : 
     152           0 : PartitionerType Partitioner::type() const
     153             : {
     154           0 :   return INVALID_PARTITIONER;
     155             : }
     156             : 
     157             : 
     158             : std::unique_ptr<Partitioner>
     159      302994 : Partitioner::build (const PartitionerType partitioner_type)
     160             : {
     161      302994 :   switch (partitioner_type)
     162             :   {
     163          71 :     case CENTROID_PARTITIONER:
     164          71 :       return std::make_unique<CentroidPartitioner>();
     165         213 :     case LINEAR_PARTITIONER:
     166         213 :       return std::make_unique<LinearPartitioner>();
     167           0 :     case MAPPED_SUBDOMAIN_PARTITIONER:
     168           0 :       return std::make_unique<MappedSubdomainPartitioner>();
     169       49065 :     case METIS_PARTITIONER:
     170       49065 :       return std::make_unique<MetisPartitioner>();
     171      253432 :     case PARMETIS_PARTITIONER:
     172      253432 :       return std::make_unique<ParmetisPartitioner>();
     173          71 :     case HILBERT_SFC_PARTITIONER:
     174          71 :       return std::make_unique<HilbertSFCPartitioner>();
     175          71 :     case MORTON_SFC_PARTITIONER:
     176          71 :       return std::make_unique<MortonSFCPartitioner>();
     177          71 :     case SFC_PARTITIONER:
     178          71 :       return std::make_unique<SFCPartitioner>();
     179           0 :     case SUBDOMAIN_PARTITIONER:
     180           0 :       return std::make_unique<SubdomainPartitioner>();
     181           0 :     default:
     182           0 :       libmesh_error_msg("Invalid partitioner type: " <<
     183             :                         Utility::enum_to_string(partitioner_type));
     184             :   }
     185             : }
     186             : 
     187             : 
     188             : 
     189           0 : void Partitioner::partition (MeshBase & mesh)
     190             : {
     191           0 :   this->partition(mesh,mesh.n_processors());
     192           0 : }
     193             : 
     194             : 
     195             : 
     196      574482 : void Partitioner::partition (MeshBase & mesh,
     197             :                              const unsigned int n)
     198             : {
     199       11836 :   libmesh_parallel_only(mesh.comm());
     200             : 
     201             :   // BSK - temporary fix while redistribution is integrated 6/26/2008
     202             :   // Uncomment this to not repartition in parallel
     203             :   //   if (!mesh.is_serial())
     204             :   //     return;
     205             : 
     206             :   // we cannot partition into more pieces than we have
     207             :   // active elements!
     208             :   const unsigned int n_parts =
     209             :     static_cast<unsigned int>
     210      574482 :     (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
     211             : 
     212             :   // Set the number of partitions in the mesh
     213      574482 :   mesh.set_n_partitions()=n_parts;
     214             : 
     215      574482 :   if (n_parts == 1)
     216             :     {
     217      106486 :       this->single_partition (mesh);
     218      106486 :       return;
     219             :     }
     220             : 
     221             :   // First assign a temporary partitioning to any unpartitioned elements
     222      467996 :   Partitioner::partition_unpartitioned_elements(mesh, n_parts);
     223             : 
     224             :   // Call the partitioning function
     225      467996 :   this->_do_partition(mesh,n_parts);
     226             : 
     227             :   // Set the parent's processor ids
     228      467996 :   Partitioner::set_parent_processor_ids(mesh);
     229             : 
     230             :   // Redistribute elements if necessary, before setting node processor
     231             :   // ids, to make sure those will be set consistently
     232      467996 :   mesh.redistribute();
     233             : 
     234             : #ifdef DEBUG
     235        8596 :   MeshTools::libmesh_assert_valid_remote_elems(mesh);
     236             : 
     237             :   // Messed up elem processor_id()s can leave us without the child
     238             :   // elements we need to restrict vectors on a distributed mesh
     239        8596 :   MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
     240             : #endif
     241             : 
     242             :   // Set the node's processor ids
     243      467996 :   Partitioner::set_node_processor_ids(mesh);
     244             : 
     245             : #ifdef DEBUG
     246        8596 :   MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
     247             : #endif
     248             : 
     249             :   // Give derived Mesh classes a chance to update any cached data to
     250             :   // reflect the new partitioning
     251      467996 :   mesh.update_post_partitioning();
     252             : }
     253             : 
     254             : 
     255             : 
     256           0 : void Partitioner::repartition (MeshBase & mesh)
     257             : {
     258           0 :   this->repartition(mesh,mesh.n_processors());
     259           0 : }
     260             : 
     261             : 
     262             : 
     263           0 : void Partitioner::repartition (MeshBase & mesh,
     264             :                                const unsigned int n)
     265             : {
     266             :   // we cannot partition into more pieces than we have
     267             :   // active elements!
     268             :   const unsigned int n_parts =
     269             :     static_cast<unsigned int>
     270           0 :     (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));
     271             : 
     272             :   // Set the number of partitions in the mesh
     273           0 :   mesh.set_n_partitions()=n_parts;
     274             : 
     275           0 :   if (n_parts == 1)
     276             :     {
     277           0 :       this->single_partition (mesh);
     278           0 :       return;
     279             :     }
     280             : 
     281             :   // First assign a temporary partitioning to any unpartitioned elements
     282           0 :   Partitioner::partition_unpartitioned_elements(mesh, n_parts);
     283             : 
     284             :   // Call the partitioning function
     285           0 :   this->_do_repartition(mesh,n_parts);
     286             : 
     287             :   // Set the parent's processor ids
     288           0 :   Partitioner::set_parent_processor_ids(mesh);
     289             : 
     290             :   // Set the node's processor ids
     291           0 :   Partitioner::set_node_processor_ids(mesh);
     292             : }
     293             : 
     294             : 
     295             : 
     296             : 
     297             : 
     298      106486 : bool Partitioner::single_partition (MeshBase & mesh)
     299             : {
     300             :   bool changed_pid =
     301      209732 :     this->single_partition_range(mesh.elements_begin(),
     302      312978 :                                  mesh.elements_end());
     303             : 
     304             :   // If we have a distributed mesh with an empty rank (or where rank
     305             :   // 0 has only its own component of a disconnected mesh, I guess),
     306             :   // that rank might need to be informed of a change.
     307      106486 :   mesh.comm().max(changed_pid);
     308             : 
     309             :   // We may need to redistribute, in case someone (like our unit
     310             :   // tests) is doing something silly (like moving a whole
     311             :   // already-distributed mesh back onto rank 0).
     312      106486 :   if (changed_pid)
     313       99532 :     mesh.redistribute();
     314             : 
     315      106486 :   return changed_pid;
     316             : }
     317             : 
     318             : 
     319             : 
     320      106486 : bool Partitioner::single_partition_range (MeshBase::element_iterator it,
     321             :                                           MeshBase::element_iterator end)
     322             : {
     323        3240 :   LOG_SCOPE("single_partition_range()", "Partitioner");
     324             : 
     325        3240 :   bool changed_pid = false;
     326             : 
     327     2713442 :   for (auto & elem : as_range(it, end))
     328             :     {
     329     1258252 :       if (elem->processor_id())
     330        5080 :         changed_pid = true;
     331     1258252 :       elem->processor_id() = 0;
     332             : 
     333             :       // Assign all this element's nodes to processor 0 as well.
     334    10950055 :       for (Node & node : elem->node_ref_range())
     335             :         {
     336     9691803 :           if (node.processor_id())
     337       28898 :             changed_pid = true;
     338     9691803 :           node.processor_id() = 0;
     339             :         }
     340      100006 :     }
     341             : 
     342      109726 :   return changed_pid;
     343             : }
     344             : 
     345           0 : void Partitioner::partition_unpartitioned_elements (MeshBase & mesh)
     346             : {
     347           0 :   Partitioner::partition_unpartitioned_elements(mesh, mesh.n_processors());
     348           0 : }
     349             : 
     350             : 
     351             : 
     352      467996 : void Partitioner::partition_unpartitioned_elements (MeshBase & mesh,
     353             :                                                     const unsigned int n_subdomains)
     354             : {
     355      467996 :   MeshBase::element_iterator       it  = mesh.unpartitioned_elements_begin();
     356      467996 :   const MeshBase::element_iterator end = mesh.unpartitioned_elements_end();
     357             : 
     358      927396 :   const dof_id_type n_unpartitioned_elements = MeshTools::n_elem (it, end);
     359             : 
     360             :   // the unpartitioned elements must exist on all processors. If the range is empty on one
     361             :   // it is empty on all, and we can quit right here.
     362      467996 :   if (!n_unpartitioned_elements)
     363        2832 :     return;
     364             : 
     365             :   // find the target subdomain sizes
     366      208014 :   std::vector<dof_id_type> subdomain_bounds(mesh.n_processors());
     367             : 
     368     2162796 :   for (auto pid : make_range(mesh.n_processors()))
     369             :     {
     370       11528 :       dof_id_type tgt_subdomain_size = 0;
     371             : 
     372             :       // watch out for the case that n_subdomains < n_processors
     373     1966310 :       if (pid < n_subdomains)
     374             :         {
     375     1113685 :           tgt_subdomain_size = n_unpartitioned_elements/n_subdomains;
     376             : 
     377     1113685 :           if (pid < n_unpartitioned_elements%n_subdomains)
     378      327145 :             tgt_subdomain_size++;
     379             : 
     380             :         }
     381             : 
     382             :       //libMesh::out << "pid, #= " << pid << ", " << tgt_subdomain_size << std::endl;
     383     1966310 :       if (pid == 0)
     384      196486 :         subdomain_bounds[0] = tgt_subdomain_size;
     385             :       else
     386     1781352 :         subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size;
     387             :     }
     388             : 
     389        5764 :   libmesh_assert_equal_to (subdomain_bounds.back(), n_unpartitioned_elements);
     390             : 
     391             :   // create the unique mapping for all unpartitioned elements independent of partitioning
     392             :   // determine the global indexing for all the unpartitioned elements
     393       11528 :   std::vector<dof_id_type> global_indices;
     394             : 
     395             :   // Calling this on all processors a unique range in [0,n_unpartitioned_elements) is constructed.
     396             :   // Only the indices for the elements we pass in are returned in the array.
     397      196486 :   MeshCommunication().find_global_indices (mesh.comm(),
     398      196486 :                                            MeshTools::create_bounding_box(mesh), it, end,
     399             :                                            global_indices);
     400             : 
     401        5764 :   dof_id_type cnt=0;
     402    12430784 :   for (auto & elem : as_range(it, end))
     403             :     {
     404      357974 :       libmesh_assert_less (cnt, global_indices.size());
     405             :       const dof_id_type global_index =
     406    12401550 :         global_indices[cnt++];
     407             : 
     408      357974 :       libmesh_assert_less (global_index, subdomain_bounds.back());
     409      357974 :       libmesh_assert_less (global_index, n_unpartitioned_elements);
     410             : 
     411             :       const processor_id_type subdomain_id =
     412             :         cast_int<processor_id_type>
     413    11685602 :         (std::distance(subdomain_bounds.begin(),
     414             :                        std::upper_bound(subdomain_bounds.begin(),
     415             :                                         subdomain_bounds.end(),
     416      357974 :                                         global_index)));
     417      357974 :       libmesh_assert_less (subdomain_id, n_subdomains);
     418             : 
     419    12043576 :       elem->processor_id() = subdomain_id;
     420             :       //libMesh::out << "assigning " << global_index << " to " << subdomain_id << std::endl;
     421      184958 :     }
     422             : }
     423             : 
     424             : 
     425             : 
     426      467996 : void Partitioner::set_parent_processor_ids(MeshBase & mesh)
     427             : {
     428             :   // Ignore the parameter when !LIBMESH_ENABLE_AMR
     429        8596 :   libmesh_ignore(mesh);
     430             : 
     431       17192 :   LOG_SCOPE("set_parent_processor_ids()", "Partitioner");
     432             : 
     433             : #ifdef LIBMESH_ENABLE_AMR
     434             : 
     435             :   // If the mesh is serial we have access to all the elements,
     436             :   // in particular all the active ones.  We can therefore set
     437             :   // the parent processor ids indirectly through their children, and
     438             :   // set the subactive processor ids while examining their active
     439             :   // ancestors.
     440             :   // By convention a parent is assigned to the minimum processor
     441             :   // of all its children, and a subactive is assigned to the processor
     442             :   // of its active ancestor.
     443      467996 :   if (mesh.is_serial())
     444             :     {
     445    44989100 :       for (auto & elem : mesh.active_element_ptr_range())
     446             :         {
     447             :           // First set descendents
     448     2132096 :           std::vector<Elem *> subactive_family;
     449    23156474 :           elem->total_family_tree(subactive_family);
     450    47371564 :           for (const auto & f : subactive_family)
     451    24215090 :             f->processor_id() = elem->processor_id();
     452             : 
     453             :           // Then set ancestors
     454    23156474 :           Elem * parent = elem->parent();
     455             : 
     456    52458616 :           while (parent)
     457             :             {
     458             :               // invalidate the parent id, otherwise the min below
     459             :               // will not work if the current parent id is less
     460             :               // than all the children!
     461     1892666 :               parent->invalidate_processor_id();
     462             : 
     463   153124364 :               for (auto & child : parent->child_ref_range())
     464             :                 {
     465     9133660 :                   libmesh_assert(!child.is_remote());
     466     9133660 :                   libmesh_assert_not_equal_to (child.processor_id(), DofObject::invalid_processor_id);
     467   123822222 :                   parent->processor_id() = std::min(parent->processor_id(),
     468     9133660 :                                                     child.processor_id());
     469             :                 }
     470     3785356 :               parent = parent->parent();
     471             :             }
     472      391405 :         }
     473             :     }
     474             : 
     475             :   // When the mesh is parallel we cannot guarantee that parents have access to
     476             :   // all their children.
     477             :   else
     478             :     {
     479             :       // Setting subactive processor ids is easy: we can guarantee
     480             :       // that children have access to all their parents.
     481             : 
     482             :       // Loop over all the active elements in the mesh
     483     9573948 :       for (auto & child : mesh.active_element_ptr_range())
     484             :         {
     485       19772 :           std::vector<Elem *> subactive_family;
     486     4737287 :           child->total_family_tree(subactive_family);
     487     9477883 :           for (const auto & f : subactive_family)
     488     4740596 :             f->processor_id() = child->processor_id();
     489       59399 :         }
     490             : 
     491             :       // When the mesh is parallel we cannot guarantee that parents have access to
     492             :       // all their children.
     493             : 
     494             :       // We will use a brute-force approach here.  Each processor finds its parent
     495             :       // elements and sets the parent pid to the minimum of its
     496             :       // semilocal descendants.
     497             :       // A global reduction is then performed to make sure the true minimum is found.
     498             :       // As noted, this is required because we cannot guarantee that a parent has
     499             :       // access to all its children on any single processor.
     500         116 :       libmesh_parallel_only(mesh.comm());
     501         116 :       libmesh_assert(MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
     502             :                                        mesh.unpartitioned_elements_end()) == 0);
     503             : 
     504       59631 :       const dof_id_type max_elem_id = mesh.max_elem_id();
     505             : 
     506             :       std::vector<processor_id_type>
     507         232 :         parent_processor_ids (std::min(communication_blocksize,
     508       59747 :                                        max_elem_id));
     509             : 
     510      119262 :       for (dof_id_type blk=0, last_elem_id=0; last_elem_id<max_elem_id; blk++)
     511             :         {
     512       59631 :           last_elem_id =
     513       59747 :             std::min(static_cast<dof_id_type>((blk+1)*communication_blocksize),
     514         116 :                      max_elem_id);
     515       59631 :           const dof_id_type first_elem_id = blk*communication_blocksize;
     516             : 
     517         116 :           std::fill (parent_processor_ids.begin(),
     518             :                      parent_processor_ids.end(),
     519             :                      DofObject::invalid_processor_id);
     520             : 
     521             :           // first build up local contributions to parent_processor_ids
     522         116 :           bool have_parent_in_block = false;
     523             : 
     524      119818 :           for (auto & parent : as_range(mesh.ancestor_elements_begin(),
     525     1552226 :                                         mesh.ancestor_elements_end()))
     526             :             {
     527      657303 :               const dof_id_type parent_idx = parent->id();
     528         672 :               libmesh_assert_less (parent_idx, max_elem_id);
     529             : 
     530      657975 :               if ((parent_idx >= first_elem_id) &&
     531      657303 :                   (parent_idx <  last_elem_id))
     532             :                 {
     533         672 :                   have_parent_in_block = true;
     534      657303 :                   processor_id_type parent_pid = DofObject::invalid_processor_id;
     535             : 
     536         672 :                   std::vector<const Elem *> active_family;
     537      657303 :                   parent->active_family_tree(active_family);
     538     7656314 :                   for (const auto & f : active_family)
     539     7748505 :                     parent_pid = std::min (parent_pid, f->processor_id());
     540             : 
     541      657303 :                   const dof_id_type packed_idx = parent_idx - first_elem_id;
     542         672 :                   libmesh_assert_less (packed_idx, parent_processor_ids.size());
     543             : 
     544      657975 :                   parent_processor_ids[packed_idx] = parent_pid;
     545             :                 }
     546       59399 :             }
     547             : 
     548             :           // then find the global minimum
     549       59631 :           mesh.comm().min (parent_processor_ids);
     550             : 
     551             :           // and assign the ids, if we have a parent in this block.
     552       59631 :           if (have_parent_in_block)
     553       43744 :             for (auto & parent : as_range(mesh.ancestor_elements_begin(),
     554     1400078 :                                           mesh.ancestor_elements_end()))
     555             :               {
     556      657303 :                 const dof_id_type parent_idx = parent->id();
     557             : 
     558      657975 :                 if ((parent_idx >= first_elem_id) &&
     559      656631 :                     (parent_idx <  last_elem_id))
     560             :                   {
     561      657303 :                     const dof_id_type packed_idx = parent_idx - first_elem_id;
     562         672 :                     libmesh_assert_less (packed_idx, parent_processor_ids.size());
     563             : 
     564             :                     const processor_id_type parent_pid =
     565      657303 :                       parent_processor_ids[packed_idx];
     566             : 
     567         672 :                     libmesh_assert_not_equal_to (parent_pid, DofObject::invalid_processor_id);
     568             : 
     569      657303 :                     parent->processor_id() = parent_pid;
     570             :                   }
     571       21476 :               }
     572             :         }
     573             :     }
     574             : 
     575             : #endif // LIBMESH_ENABLE_AMR
     576      467996 : }
     577             : 
     578             : void
     579           0 : Partitioner::processor_pairs_to_interface_nodes(MeshBase & mesh,
     580             :                                                 std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> & processor_pair_to_nodes)
     581             : {
     582             :   // This function must be run on all processors at once
     583           0 :   libmesh_parallel_only(mesh.comm());
     584             : 
     585           0 :   processor_pair_to_nodes.clear();
     586             : 
     587           0 :   std::set<dof_id_type> mynodes;
     588           0 :   std::set<dof_id_type> neighbor_nodes;
     589           0 :   std::vector<dof_id_type> common_nodes;
     590             : 
     591             :   // Loop over all the active elements
     592           0 :   for (auto & elem : mesh.active_element_ptr_range())
     593             :     {
     594           0 :       libmesh_assert(elem);
     595             : 
     596           0 :       libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
     597             : 
     598           0 :       auto n_nodes = elem->n_nodes();
     599             : 
     600             :       // prepare data for this element
     601           0 :       mynodes.clear();
     602           0 :       neighbor_nodes.clear();
     603           0 :       common_nodes.clear();
     604             : 
     605           0 :       for (unsigned int inode = 0; inode < n_nodes; inode++)
     606           0 :         mynodes.insert(elem->node_id(inode));
     607             : 
     608           0 :       for (auto i : elem->side_index_range())
     609             :         {
     610           0 :           auto neigh = elem->neighbor_ptr(i);
     611           0 :           if (neigh && !neigh->is_remote() && neigh->processor_id() != elem->processor_id())
     612             :             {
     613           0 :               neighbor_nodes.clear();
     614           0 :               common_nodes.clear();
     615           0 :               auto neigh_n_nodes = neigh->n_nodes();
     616           0 :               for (unsigned int inode = 0; inode < neigh_n_nodes; inode++)
     617           0 :                 neighbor_nodes.insert(neigh->node_id(inode));
     618             : 
     619           0 :               std::set_intersection(mynodes.begin(), mynodes.end(),
     620             :                                     neighbor_nodes.begin(), neighbor_nodes.end(),
     621           0 :                                     std::back_inserter(common_nodes));
     622             : 
     623           0 :               auto & map_set = processor_pair_to_nodes[std::make_pair(std::min(elem->processor_id(), neigh->processor_id()),
     624           0 :                                                                       std::max(elem->processor_id(), neigh->processor_id()))];
     625           0 :               for (auto global_node_id : common_nodes)
     626           0 :                 map_set.insert(global_node_id);
     627             :             }
     628             :         }
     629           0 :     }
     630           0 : }
     631             : 
     632           0 : void Partitioner::set_interface_node_processor_ids_linear(MeshBase & mesh)
     633             : {
     634             :   // This function must be run on all processors at once
     635           0 :   libmesh_parallel_only(mesh.comm());
     636             : 
     637           0 :   std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
     638             : 
     639           0 :   processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
     640             : 
     641           0 :   for (auto & pmap : processor_pair_to_nodes)
     642             :     {
     643           0 :       std::size_t n_own_nodes = pmap.second.size()/2, i = 0;
     644             : 
     645           0 :       for (dof_id_type id : pmap.second)
     646             :         {
     647           0 :           auto & node = mesh.node_ref(id);
     648           0 :           if (i <= n_own_nodes)
     649           0 :             node.processor_id() = pmap.first.first;
     650             :           else
     651           0 :             node.processor_id() = pmap.first.second;
     652           0 :           i++;
     653             :         }
     654             :     }
     655           0 : }
     656             : 
     657           0 : void Partitioner::set_interface_node_processor_ids_BFS(MeshBase & mesh)
     658             : {
     659             :   // This function must be run on all processors at once
     660           0 :   libmesh_parallel_only(mesh.comm());
     661             : 
     662             :   // I see occasional consistency failures when using this on a
     663             :   // distributed mesh
     664             :   libmesh_experimental();
     665             : 
     666           0 :   std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
     667             : 
     668           0 :   processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
     669             : 
     670           0 :   std::unordered_map<dof_id_type, std::vector<const Elem *>> nodes_to_elem_map;
     671             : 
     672           0 :   MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
     673             : 
     674           0 :   std::vector<const Node *>  neighbors;
     675           0 :   std::set<dof_id_type> neighbors_order;
     676           0 :   std::vector<dof_id_type> common_nodes;
     677           0 :   std::queue<dof_id_type> nodes_queue;
     678           0 :   std::set<dof_id_type> visted_nodes;
     679             : 
     680           0 :   for (auto & pmap : processor_pair_to_nodes)
     681             :     {
     682           0 :       std::size_t n_own_nodes = pmap.second.size()/2;
     683             : 
     684             :       // Initialize node assignment
     685           0 :       for (dof_id_type id : pmap.second)
     686           0 :         mesh.node_ref(id).processor_id() = pmap.first.second;
     687             : 
     688           0 :       visted_nodes.clear();
     689           0 :       for (dof_id_type id : pmap.second)
     690             :         {
     691           0 :           mesh.node_ref(id).processor_id() = pmap.first.second;
     692             : 
     693           0 :           if (visted_nodes.count(id))
     694           0 :             continue;
     695             :           else
     696             :             {
     697           0 :               nodes_queue.push(id);
     698           0 :               visted_nodes.insert(id);
     699           0 :               if (visted_nodes.size() >= n_own_nodes)
     700           0 :                 break;
     701             :             }
     702             : 
     703           0 :           while (!nodes_queue.empty())
     704             :             {
     705           0 :               auto & node = mesh.node_ref(nodes_queue.front());
     706           0 :               nodes_queue.pop();
     707             : 
     708           0 :               neighbors.clear();
     709           0 :               MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
     710           0 :               neighbors_order.clear();
     711           0 :               for (auto & neighbor : neighbors)
     712           0 :                 neighbors_order.insert(neighbor->id());
     713             : 
     714           0 :               common_nodes.clear();
     715           0 :               std::set_intersection(pmap.second.begin(), pmap.second.end(),
     716             :                                     neighbors_order.begin(), neighbors_order.end(),
     717           0 :                                     std::back_inserter(common_nodes));
     718             : 
     719           0 :               for (auto c_node : common_nodes)
     720           0 :                 if (!visted_nodes.count(c_node))
     721             :                   {
     722           0 :                     nodes_queue.push(c_node);
     723           0 :                     visted_nodes.insert(c_node);
     724           0 :                     if (visted_nodes.size() >= n_own_nodes)
     725           0 :                       goto queue_done;
     726             :                   }
     727             : 
     728           0 :               if (visted_nodes.size() >= n_own_nodes)
     729           0 :                 goto queue_done;
     730             :             }
     731             :         }
     732           0 :     queue_done:
     733           0 :       for (auto node : visted_nodes)
     734           0 :         mesh.node_ref(node).processor_id() = pmap.first.first;
     735             :     }
     736           0 : }
     737             : 
     738           0 : void Partitioner::set_interface_node_processor_ids_petscpartitioner(MeshBase & mesh)
     739             : {
     740           0 :   libmesh_ignore(mesh); // Only used if LIBMESH_HAVE_PETSC
     741             : 
     742             :   // This function must be run on all processors at once
     743           0 :   libmesh_parallel_only(mesh.comm());
     744             : 
     745             : #if LIBMESH_HAVE_PETSC
     746           0 :   std::map<std::pair<processor_id_type, processor_id_type>, std::set<dof_id_type>> processor_pair_to_nodes;
     747             : 
     748           0 :   processor_pairs_to_interface_nodes(mesh, processor_pair_to_nodes);
     749             : 
     750           0 :   std::vector<std::vector<const Elem *>> nodes_to_elem_map;
     751             : 
     752           0 :   MeshTools::build_nodes_to_elem_map(mesh, nodes_to_elem_map);
     753             : 
     754           0 :   std::vector<const Node *>  neighbors;
     755           0 :   std::set<dof_id_type> neighbors_order;
     756           0 :   std::vector<dof_id_type> common_nodes;
     757             : 
     758           0 :   std::vector<dof_id_type> rows;
     759           0 :   std::vector<dof_id_type> cols;
     760             : 
     761           0 :   std::map<dof_id_type, dof_id_type> global_to_local;
     762             : 
     763           0 :   for (auto & pmap : processor_pair_to_nodes)
     764             :     {
     765           0 :       unsigned int i = 0;
     766             : 
     767           0 :       rows.clear();
     768           0 :       rows.resize(pmap.second.size()+1);
     769           0 :       cols.clear();
     770           0 :       for (dof_id_type id : pmap.second)
     771           0 :         global_to_local[id] = i++;
     772             : 
     773           0 :       i = 0;
     774           0 :       for (auto id : pmap.second)
     775             :         {
     776           0 :           auto & node = mesh.node_ref(id);
     777           0 :           neighbors.clear();
     778           0 :           MeshTools::find_nodal_neighbors(mesh, node, nodes_to_elem_map, neighbors);
     779           0 :           neighbors_order.clear();
     780           0 :           for (auto & neighbor : neighbors)
     781           0 :             neighbors_order.insert(neighbor->id());
     782             : 
     783           0 :           common_nodes.clear();
     784           0 :           std::set_intersection(pmap.second.begin(), pmap.second.end(),
     785             :                                 neighbors_order.begin(), neighbors_order.end(),
     786           0 :                                 std::back_inserter(common_nodes));
     787             : 
     788           0 :           rows[i+1] = rows[i] + cast_int<dof_id_type>(common_nodes.size());
     789             : 
     790           0 :           for (auto c_node : common_nodes)
     791           0 :             cols.push_back(global_to_local[c_node]);
     792             : 
     793           0 :           i++;
     794             :         }
     795             : 
     796             :       // Next we construct an IS from a MatPartitioning
     797           0 :       WrappedPetsc<IS> is;
     798             :       {
     799             :         PetscInt *adj_i, *adj_j;
     800           0 :         LibmeshPetscCall2(mesh.comm(), PetscCalloc1(rows.size(), &adj_i));
     801           0 :         LibmeshPetscCall2(mesh.comm(), PetscCalloc1(cols.size(), &adj_j));
     802           0 :         PetscInt rows_size = cast_int<PetscInt>(rows.size());
     803           0 :         for (PetscInt ii=0; ii<rows_size; ii++)
     804           0 :           adj_i[ii] = rows[ii];
     805             : 
     806           0 :         PetscInt cols_size = cast_int<PetscInt>(cols.size());
     807           0 :         for (PetscInt ii=0; ii<cols_size; ii++)
     808           0 :           adj_j[ii] = cols[ii];
     809             : 
     810           0 :         const PetscInt sz = cast_int<PetscInt>(pmap.second.size());
     811             : 
     812             :         // Create sparse matrix representing an adjacency list
     813           0 :         WrappedPetsc<Mat> adj;
     814           0 :         LibmeshPetscCall2(mesh.comm(), MatCreateMPIAdj(PETSC_COMM_SELF, sz, sz, adj_i, adj_j, nullptr, adj.get()));
     815             : 
     816             :         // Create MatPartitioning object
     817           0 :         WrappedPetsc<MatPartitioning> part;
     818           0 :         LibmeshPetscCall2(mesh.comm(), MatPartitioningCreate(PETSC_COMM_SELF, part.get()));
     819             : 
     820             :         // Apply MatPartitioning, storing results in "is"
     821           0 :         LibmeshPetscCall2(mesh.comm(), MatPartitioningSetAdjacency(part, adj));
     822           0 :         LibmeshPetscCall2(mesh.comm(), MatPartitioningSetNParts(part, 2));
     823           0 :         LibmeshPetscCall2(mesh.comm(), PetscObjectSetOptionsPrefix((PetscObject)(*part), "balance_"));
     824           0 :         LibmeshPetscCall2(mesh.comm(), MatPartitioningSetFromOptions(part));
     825           0 :         LibmeshPetscCall2(mesh.comm(), MatPartitioningApply(part, is.get()));
     826             :       }
     827             : 
     828             :       PetscInt local_size;
     829             :       const PetscInt *indices;
     830           0 :       LibmeshPetscCall2(mesh.comm(), ISGetLocalSize(is, &local_size));
     831           0 :       LibmeshPetscCall2(mesh.comm(), ISGetIndices(is, &indices));
     832             : 
     833           0 :       i = 0;
     834           0 :       for (auto id : pmap.second)
     835             :         {
     836           0 :           auto & node = mesh.node_ref(id);
     837           0 :           if (indices[i])
     838           0 :             node.processor_id() = pmap.first.second;
     839             :           else
     840           0 :             node.processor_id() = pmap.first.first;
     841             : 
     842           0 :           i++;
     843             :         }
     844           0 :       LibmeshPetscCall2(mesh.comm(), ISRestoreIndices(is, &indices));
     845             :     }
     846             : #else
     847           0 :   libmesh_error_msg("PETSc is required");
     848             : #endif
     849           0 : }
     850             : 
     851             : 
     852      493936 : void Partitioner::set_node_processor_ids(MeshBase & mesh)
     853             : {
     854       20392 :   LOG_SCOPE("set_node_processor_ids()","Partitioner");
     855             : 
     856             :   // This function must be run on all processors at once
     857       10196 :   libmesh_parallel_only(mesh.comm());
     858             : 
     859             :   // If we have any unpartitioned elements at this
     860             :   // stage there is a problem
     861       10196 :   libmesh_assert (MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
     862             :                                     mesh.unpartitioned_elements_end()) == 0);
     863             : 
     864             :   // Start from scratch here: nodes we used to own may not be
     865             :   // eligible for us to own any more.
     866    77490756 :   for (auto & node : mesh.node_ptr_range())
     867             :     {
     868    76513080 :       node->processor_id() = DofObject::invalid_processor_id;
     869      473544 :     }
     870             : 
     871             :   // Loop over all the active elements
     872    67318690 :   for (auto & elem : mesh.active_element_ptr_range())
     873             :     {
     874     1669900 :       libmesh_assert(elem);
     875             : 
     876     1669900 :       libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);
     877             : 
     878             :       // Consider updating the processor id on this element's nodes
     879   295315318 :       for (Node & node : elem->node_ref_range())
     880             :         {
     881    11830293 :           processor_id_type & pid = node.processor_id();
     882   258805005 :           pid = node.choose_processor_id(pid, elem->processor_id());
     883             :         }
     884      473544 :     }
     885             : 
     886             :   // How we finish off the node partitioning depends on our command
     887             :   // line options.
     888             : 
     889             :   const bool load_balanced_nodes_linear =
     890      493936 :       libMesh::on_command_line ("--load-balanced-nodes-linear");
     891             : 
     892             :   const bool load_balanced_nodes_bfs =
     893      493936 :        libMesh::on_command_line ("--load-balanced-nodes-bfs");
     894             : 
     895             :   const bool load_balanced_nodes_petscpartition =
     896      493936 :       libMesh::on_command_line ("--load-balanced-nodes-petscpartitioner");
     897             : 
     898      493936 :   unsigned int n_load_balance_options = load_balanced_nodes_linear;
     899      493936 :   n_load_balance_options += load_balanced_nodes_bfs;
     900      493936 :   n_load_balance_options += load_balanced_nodes_petscpartition;
     901      493936 :   libmesh_error_msg_if(n_load_balance_options > 1,
     902             :                        "Cannot perform more than one load balancing type at a time");
     903             : 
     904      493936 :   if (load_balanced_nodes_linear)
     905           0 :     set_interface_node_processor_ids_linear(mesh);
     906      493936 :   else if (load_balanced_nodes_bfs)
     907           0 :     set_interface_node_processor_ids_BFS(mesh);
     908      493936 :   else if (load_balanced_nodes_petscpartition)
     909           0 :     set_interface_node_processor_ids_petscpartitioner(mesh);
     910             : 
     911             :    // Node balancing algorithm will response to assign owned nodes.
     912             :    // We still need to sync PIDs
     913             :   {
     914             :     // For inactive elements, we will have already gotten most of
     915             :     // these nodes, *except* for the case of a parent with a subset
     916             :     // of active descendants which are remote elements.  In that
     917             :     // case some of the parent nodes will not have been properly
     918             :     // handled yet on our processor.
     919             :     //
     920             :     // We don't want to inadvertently give one of them an incorrect
     921             :     // processor id, but if we're not in serial then we have to
     922             :     // assign them temporary pids to make querying work, so we'll
     923             :     // save our *valid* pids before assigning temporaries.
     924             :     //
     925             :     // Even in serial we'll want to check and make sure we're not
     926             :     // overwriting valid active node pids with pids from subactive
     927             :     // elements.
     928       20392 :     std::unordered_set<dof_id_type> bad_pids;
     929             : 
     930   147608618 :     for (auto & node : mesh.node_ptr_range())
     931    76513080 :       if (node->processor_id() == DofObject::invalid_processor_id)
     932     1562221 :         bad_pids.insert(node->id());
     933             : 
     934             :     // If we assign our temporary ids by looping from finer elements
     935             :     // to coarser elements, we'll always get an id from the finest
     936             :     // ghost element we can see, which will usually be "closer" to
     937             :     // the true processor we want to query and so will reduce query
     938             :     // cycles that don't reach that processor.
     939             : 
     940             :     // But we can still end up with a query cycle that dead-ends, so
     941             :     // we need to prepare a "push" communication step here.
     942             : 
     943      493936 :     const bool is_serial = mesh.is_serial();
     944             :     std::unordered_map
     945             :       <processor_id_type,
     946             :        std::unordered_map<dof_id_type, processor_id_type>>
     947       20392 :       potential_pids;
     948             : 
     949      493936 :     const unsigned int n_levels = MeshTools::n_levels(mesh);
     950     1172111 :     for (unsigned int level = n_levels; level > 0; --level)
     951             :       {
     952     3367484 :         for (auto & elem : as_range(mesh.level_elements_begin(level-1),
     953    83218324 :                                     mesh.level_elements_end(level-1)))
     954             :           {
     955     2028952 :             libmesh_assert_not_equal_to (elem->processor_id(),
     956             :                                          DofObject::invalid_processor_id);
     957             : 
     958    41285110 :             const processor_id_type elem_pid = elem->processor_id();
     959             : 
     960             :             // Consider updating the processor id on this element's nodes
     961   327134044 :             for (Node & node : elem->node_ref_range())
     962             :               {
     963    13548801 :                 processor_id_type & pid = node.processor_id();
     964   285848934 :                 if (bad_pids.count(node.id()))
     965     3945556 :                   pid = node.choose_processor_id(pid, elem_pid);
     966   281903378 :                 else if (!is_serial)
     967    52937350 :                   potential_pids[elem_pid][node.id()] = pid;
     968             :               }
     969      642539 :           }
     970             :       }
     971             : 
     972      493936 :     if (!is_serial)
     973             :       {
     974             :         std::unordered_map
     975             :           <processor_id_type,
     976             :            std::vector<std::pair<dof_id_type, processor_id_type>>>
     977         130 :           potential_pids_vecs;
     978             : 
     979      499649 :         for (auto & pair : potential_pids)
     980      856930 :           potential_pids_vecs[pair.first].assign(pair.second.begin(), pair.second.end());
     981             : 
     982             :         auto pids_action_functor =
     983      428225 :           [& mesh, & bad_pids]
     984             :           (processor_id_type /* src_pid */,
     985    19177097 :            const std::vector<std::pair<dof_id_type, processor_id_type>> & data)
     986             :           {
     987    19507327 :             for (auto pair : data)
     988             :               {
     989    19078622 :                 Node & node = mesh.node_ref(pair.first);
     990       32745 :                 processor_id_type & pid = node.processor_id();
     991    19078622 :                 if (const auto it = bad_pids.find(pair.first);
     992       32745 :                     it != bad_pids.end())
     993             :                   {
     994       14298 :                     pid = pair.second;
     995           0 :                     bad_pids.erase(it);
     996             :                   }
     997             :                 else
     998    19064324 :                   pid = node.choose_processor_id(pid, pair.second);
     999             :               }
    1000       71294 :           };
    1001             : 
    1002             :         Parallel::push_parallel_vector_data
    1003       70944 :           (mesh.comm(), potential_pids_vecs, pids_action_functor);
    1004             : 
    1005             :         // Using default libMesh options, we'll just need to sync
    1006             :         // between processors now.  The catch here is that we can't
    1007             :         // initially trust Node::choose_processor_id() because some
    1008             :         // of those node processor ids are the temporary ones.
    1009         130 :         CorrectProcIds correct_pids(mesh, bad_pids);
    1010             :         Parallel::sync_node_data_by_element_id
    1011      212572 :           (mesh, mesh.elements_begin(), mesh.elements_end(),
    1012       70814 :            Parallel::SyncEverything(), Parallel::SyncEverything(),
    1013             :            correct_pids);
    1014             : 
    1015             :         // But once we've got all the non-temporary pids synced, we
    1016             :         // may need to sync again to get any pids on nodes only
    1017             :         // connected to subactive elements, for which *only*
    1018             :         // "temporary" pids are possible.
    1019         130 :         bad_pids.clear();
    1020             :         Parallel::sync_node_data_by_element_id
    1021      141888 :           (mesh,
    1022      212962 :            mesh.elements_begin(), mesh.elements_end(),
    1023       70944 :            Parallel::SyncEverything(), Parallel::SyncEverything(),
    1024             :            correct_pids);
    1025             :       }
    1026             :   }
    1027             : 
    1028             :   // We can't assert that all nodes are connected to elements, because
    1029             :   // a DistributedMesh with NodeConstraints might have pulled in some
    1030             :   // remote nodes solely for evaluating those constraints.
    1031             :   // MeshTools::libmesh_assert_connected_nodes(mesh);
    1032             : 
    1033             : #ifdef DEBUG
    1034       10196 :   MeshTools::libmesh_assert_valid_procids<Node>(mesh);
    1035             :   //MeshTools::libmesh_assert_canonical_node_procids(mesh);
    1036             : #endif
    1037      493936 : }
    1038             : 
    1039             : 
    1040             : 
    1041             : struct SyncLocalIDs
    1042             : {
    1043             :   typedef dof_id_type datum;
    1044             : 
    1045             :   typedef std::unordered_map<dof_id_type, dof_id_type> map_type;
    1046             : 
    1047      460230 :   SyncLocalIDs(map_type & _id_map) : id_map(_id_map) {}
    1048             : 
    1049             :   map_type & id_map;
    1050             : 
    1051     2405834 :   void gather_data (const std::vector<dof_id_type> & ids,
    1052             :                     std::vector<datum> & local_ids) const
    1053             :   {
    1054     2406510 :     local_ids.resize(ids.size());
    1055             : 
    1056    27662472 :     for (auto i : index_range(ids))
    1057    25256638 :       local_ids[i] = id_map[ids[i]];
    1058     2405834 :   }
    1059             : 
    1060     2405834 :   void act_on_data (const std::vector<dof_id_type> & ids,
    1061             :                     const std::vector<datum> & local_ids)
    1062             :   {
    1063    27662472 :     for (auto i : index_range(local_ids))
    1064    25271312 :       id_map[ids[i]] = local_ids[i];
    1065     2405834 :   }
    1066             : };
    1067             : 
    1068      460230 : void Partitioner::_find_global_index_by_pid_map(const MeshBase & mesh)
    1069             : {
    1070      460230 :   const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
    1071             : 
    1072             :   // Find the number of active elements on each processor.  We cannot use
    1073             :   // mesh.n_active_elem_on_proc(pid) since that only returns the number of
    1074             :   // elements assigned to pid which are currently stored on the calling
    1075             :   // processor. This will not in general be correct for parallel meshes
    1076             :   // when (pid!=mesh.processor_id()).
    1077        1376 :   auto n_proc = mesh.n_processors();
    1078      460230 :   _n_active_elem_on_proc.resize(n_proc);
    1079      460230 :   mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc);
    1080             : 
    1081      461606 :   std::vector<dof_id_type> n_active_elem_before_proc(mesh.n_processors());
    1082             : 
    1083     4941982 :   for (auto i : make_range(n_proc-1))
    1084     4481752 :     n_active_elem_before_proc[i+1] =
    1085     4483128 :       n_active_elem_before_proc[i] + _n_active_elem_on_proc[i];
    1086             : 
    1087             :   libMesh::BoundingBox bbox =
    1088      460230 :     MeshTools::create_bounding_box(mesh);
    1089             : 
    1090         688 :   _global_index_by_pid_map.clear();
    1091             : 
    1092             :   // create the mapping which is contiguous by processor
    1093      460230 :   MeshCommunication().find_local_indices (bbox,
    1094      920460 :                                           mesh.active_local_elements_begin(),
    1095      919772 :                                           mesh.active_local_elements_end(),
    1096      460230 :                                           _global_index_by_pid_map);
    1097             : 
    1098         688 :   SyncLocalIDs sync(_global_index_by_pid_map);
    1099             : 
    1100             :   Parallel::sync_dofobject_data_by_id
    1101      919772 :       (mesh.comm(), mesh.active_elements_begin(), mesh.active_elements_end(), sync);
    1102             : 
    1103    31165204 :   for (const auto & elem : mesh.active_element_ptr_range())
    1104             :     {
    1105    30245432 :       const processor_id_type pid = elem->processor_id();
    1106       35680 :       libmesh_assert_less (_global_index_by_pid_map[elem->id()], _n_active_elem_on_proc[pid]);
    1107             : 
    1108    30281112 :       _global_index_by_pid_map[elem->id()] += n_active_elem_before_proc[pid];
    1109      458854 :     }
    1110      460230 : }
    1111             : 
    1112      230115 : void Partitioner::build_graph (const MeshBase & mesh)
    1113             : {
    1114         688 :   LOG_SCOPE("build_graph()", "Partitioner");
    1115             : 
    1116         344 :   const dof_id_type n_active_local_elem  = mesh.n_active_local_elem();
    1117             : 
    1118             :   // If we have boundary elements in this mesh, we want to account for
    1119             :   // the connectivity between them and interior elements.  We can find
    1120             :   // interior elements from boundary elements, but we need to build up
    1121             :   // a lookup map to do the reverse.
    1122             :   typedef std::unordered_multimap<const Elem *, const Elem *> map_type;
    1123         688 :   map_type interior_to_boundary_map;
    1124             : 
    1125             :   // If we have spline nodes in this mesh, we want to account for the
    1126             :   // connectivity between them and integration elements.  We can find
    1127             :   // spline nodes from integration elements, but need a reverse map
    1128             :   // {integration_elements} = elems_constrained_by[spline_nodeelem]
    1129         688 :   map_type elems_constrained_by;
    1130             : 
    1131         344 :   const auto & mesh_constrained_nodes = mesh.get_constraint_rows();
    1132             : 
    1133    45774514 :   for (const Elem * elem : mesh.active_element_ptr_range())
    1134             :     {
    1135    15122716 :       if (!mesh_constrained_nodes.empty()) // quick test for non-IGA cases
    1136             :         {
    1137           0 :           const auto end_it = mesh_constrained_nodes.end();
    1138             : 
    1139             :           // Use a set to avoid duplicates.  Use a well-defined method
    1140             :           // of ordering that set to make debugging easier.
    1141           0 :           std::set<const Elem *, CompareElemIdsByLevel> constraining_elems;
    1142     1209187 :           for (const Node & node : elem->node_ref_range())
    1143             :             {
    1144      980466 :               if (const auto row_it = mesh_constrained_nodes.find(&node);
    1145           0 :                   row_it != end_it)
    1146     3946461 :                 for (const auto & [pr, coef] : row_it->second)
    1147             :                   {
    1148           0 :                     libmesh_ignore(coef); // avoid gcc 7 warning
    1149     3092771 :                     constraining_elems.insert(pr.first);
    1150             :                   }
    1151             :             }
    1152     1583959 :           for (const Elem * constraining_elem : constraining_elems)
    1153           0 :             elems_constrained_by.emplace(constraining_elem, elem);
    1154             :         }
    1155             : 
    1156             :       // If we don't have an interior_parent and we don't have any
    1157             :       // constrained nodes then there's nothing else to look up.
    1158    15122716 :       if (elem->interior_parent())
    1159             :         {
    1160             :           // get all relevant interior elements
    1161           0 :           std::set<const Elem *> neighbor_set;
    1162        1094 :           elem->find_interior_neighbors(neighbor_set);
    1163             : 
    1164        2188 :           for (const auto & neighbor : neighbor_set)
    1165           0 :             interior_to_boundary_map.emplace(neighbor, elem);
    1166             :         }
    1167      229427 :     }
    1168             : 
    1169             : #ifdef LIBMESH_ENABLE_AMR
    1170         688 :   std::vector<const Elem *> neighbors_offspring;
    1171             : #endif
    1172             : 
    1173             :   // This is costly, and we only need to do it if the mesh has
    1174             :   // changed since we last partitioned... but the mesh probably has
    1175             :   // changed since we last partitioned, and if it hasn't we don't
    1176             :   // have a reliable way to be sure of that.
    1177      230115 :   _find_global_index_by_pid_map(mesh);
    1178             : 
    1179         344 :   dof_id_type first_local_elem = 0;
    1180     1350553 :   for (auto pid : make_range(mesh.processor_id()))
    1181     1120610 :      first_local_elem += _n_active_elem_on_proc[pid];
    1182             : 
    1183      229771 :   _dual_graph.clear();
    1184      230115 :   _dual_graph.resize(n_active_local_elem);
    1185      230115 :   _local_id_to_elem.resize(n_active_local_elem);
    1186             : 
    1187             :   // We may need to communicate constraint-row-based connections
    1188             :   // between processors
    1189             :   std::unordered_map<processor_id_type,
    1190         688 :     std::vector<std::pair<dof_id_type, dof_id_type>>> connections_to_push;
    1191             : 
    1192     5427674 :   for (const auto & elem : mesh.active_local_element_ptr_range())
    1193             :     {
    1194       10503 :       libmesh_assert (_global_index_by_pid_map.count(elem->id()));
    1195             :       const dof_id_type global_index_by_pid =
    1196     2494397 :         _global_index_by_pid_map[elem->id()];
    1197             : 
    1198     2494397 :       const dof_id_type local_index =
    1199       10503 :         global_index_by_pid - first_local_elem;
    1200       10503 :       libmesh_assert_less (local_index, n_active_local_elem);
    1201             : 
    1202       21006 :       std::vector<dof_id_type> & graph_row = _dual_graph[local_index];
    1203             : 
    1204             :       // Save this off to make it easy to index later
    1205     2494397 :       _local_id_to_elem[local_index] = const_cast<Elem*>(elem);
    1206             : 
    1207             :       // Loop over the element's neighbors.  An element
    1208             :       // adjacency corresponds to a face neighbor
    1209    13088368 :       for (auto neighbor : elem->neighbor_ptr_range())
    1210             :         {
    1211    10583468 :           if (neighbor != nullptr)
    1212             :             {
    1213             :               // If the neighbor is active treat it
    1214             :               // as a connection
    1215       44176 :               if (neighbor->active())
    1216             :                 {
    1217       44016 :                   libmesh_assert(_global_index_by_pid_map.count(neighbor->id()));
    1218             :                   const dof_id_type neighbor_global_index_by_pid =
    1219     9640091 :                     _global_index_by_pid_map[neighbor->id()];
    1220             : 
    1221     9640091 :                   graph_row.push_back(neighbor_global_index_by_pid);
    1222             :                 }
    1223             : 
    1224             : #ifdef LIBMESH_ENABLE_AMR
    1225             : 
    1226             :               // Otherwise we need to find all of the
    1227             :               // neighbor's children that are connected to
    1228             :               // us and add them
    1229             :               else
    1230             :                 {
    1231             :                   // The side of the neighbor to which
    1232             :                   // we are connected
    1233             :                   const unsigned int ns =
    1234       44297 :                     neighbor->which_neighbor_am_i (elem);
    1235         160 :                   libmesh_assert_less (ns, neighbor->n_neighbors());
    1236             : 
    1237             :                   // Get all the active children (& grandchildren, etc...)
    1238             :                   // of the neighbor
    1239             : 
    1240             :                   // FIXME - this is the wrong thing, since we
    1241             :                   // should be getting the active family tree on
    1242             :                   // our side only.  But adding too many graph
    1243             :                   // links may cause hanging nodes to tend to be
    1244             :                   // on partition interiors, which would reduce
    1245             :                   // communication overhead for constraint
    1246             :                   // equations, so we'll leave it.
    1247             : 
    1248       44297 :                   neighbor->active_family_tree (neighbors_offspring);
    1249             : 
    1250             :                   // Get all the neighbor's children that
    1251             :                   // live on that side and are thus connected
    1252             :                   // to us
    1253      293888 :                   for (const auto & child : neighbors_offspring)
    1254             :                     {
    1255             :                       // This does not assume a level-1 mesh.
    1256             :                       // Note that since children have sides numbered
    1257             :                       // coincident with the parent then this is a sufficient test.
    1258      250345 :                       if (child->neighbor_ptr(ns) == elem)
    1259             :                         {
    1260         320 :                           libmesh_assert (child->active());
    1261         320 :                           libmesh_assert (_global_index_by_pid_map.count(child->id()));
    1262             :                           const dof_id_type child_global_index_by_pid =
    1263      118243 :                             _global_index_by_pid_map[child->id()];
    1264             : 
    1265      118243 :                           graph_row.push_back(child_global_index_by_pid);
    1266             :                         }
    1267             :                     }
    1268             :                 }
    1269             : 
    1270             : #endif /* ifdef LIBMESH_ENABLE_AMR */
    1271             : 
    1272             : 
    1273             :             }
    1274             :         }
    1275             : 
    1276     3351142 :       if ((elem->dim() < LIBMESH_DIM) &&
    1277      856745 :           elem->interior_parent())
    1278             :         {
    1279             :           // get all relevant interior elements
    1280           0 :           std::set<const Elem *> neighbor_set;
    1281         896 :           elem->find_interior_neighbors(neighbor_set);
    1282             : 
    1283        1792 :           for (const auto & neighbor : neighbor_set)
    1284             :             {
    1285             :               const dof_id_type neighbor_global_index_by_pid =
    1286         896 :                 _global_index_by_pid_map[neighbor->id()];
    1287             : 
    1288         896 :               graph_row.push_back(neighbor_global_index_by_pid);
    1289             :             }
    1290             :         }
    1291             : 
    1292             :       // Check for any boundary neighbors
    1293     2495293 :       for (const auto & pr : as_range(interior_to_boundary_map.equal_range(elem)))
    1294             :         {
    1295         896 :           const Elem * neighbor = pr.second;
    1296             : 
    1297             :           const dof_id_type neighbor_global_index_by_pid =
    1298         896 :             _global_index_by_pid_map[neighbor->id()];
    1299             : 
    1300         896 :           graph_row.push_back(neighbor_global_index_by_pid);
    1301             :         }
    1302             : 
    1303             :       // Check for any constraining elements
    1304     2494397 :       if (!mesh_constrained_nodes.empty()) // quick test for non-IGA cases
    1305             :         {
    1306           0 :           const auto end_it = mesh_constrained_nodes.end();
    1307             : 
    1308             :           // Use a set to avoid duplicates.  Use a well-defined method
    1309             :           // of ordering that set to make debugging easier.
    1310           0 :           std::set<const Elem *, CompareElemIdsByLevel> constraining_elems;
    1311      146202 :           for (const Node & node : elem->node_ref_range())
    1312             :             {
    1313      118363 :               if (const auto row_it = mesh_constrained_nodes.find(&node);
    1314           0 :                   row_it != end_it)
    1315      555100 :                 for (const auto & [pr, coef] : row_it->second)
    1316             :                   {
    1317           0 :                     libmesh_ignore(coef); // avoid gcc 7 warning
    1318      451304 :                     constraining_elems.insert(pr.first);
    1319             :                   }
    1320             :             }
    1321      240695 :           for (const Elem * constraining_elem : constraining_elems)
    1322             :             {
    1323             :               const dof_id_type constraining_global_index_by_pid =
    1324      212856 :                 _global_index_by_pid_map[constraining_elem->id()];
    1325             : 
    1326      212856 :               graph_row.push_back(constraining_global_index_by_pid);
    1327             : 
    1328             :               // We can't be sure if the constraining element's owner sees
    1329             :               // the assembly element, so to get a symmetric connectivity
    1330             :               // graph we'll need to tell them about us to be safe.
    1331      212856 :               if (constraining_elem->processor_id() != mesh.processor_id())
    1332      237014 :                 connections_to_push[constraining_elem->processor_id()].emplace_back
    1333      118507 :                   (global_index_by_pid, constraining_global_index_by_pid);
    1334             :             }
    1335             :         }
    1336             : 
    1337             :       // Check for any constrained elements
    1338     2670770 :       for (const auto & pr : as_range(elems_constrained_by.equal_range(elem)))
    1339             :         {
    1340      176373 :           const Elem * constrained = pr.second;
    1341             :           const dof_id_type constrained_global_index_by_pid =
    1342      176373 :             _global_index_by_pid_map[constrained->id()];
    1343             : 
    1344      176373 :           graph_row.push_back(constrained_global_index_by_pid);
    1345             : 
    1346             :           // We can't be sure if the constrained element's owner sees
    1347             :           // the assembly element, so to get a symmetric connectivity
    1348             :           // graph we'll need to tell them about us to be safe.
    1349      176373 :           if (constrained->processor_id() != mesh.processor_id())
    1350      164048 :             connections_to_push[constrained->processor_id()].emplace_back
    1351       82024 :               (global_index_by_pid, constrained_global_index_by_pid);
    1352             :         }
    1353      229427 :     }
    1354             : 
    1355             :   // Partitioners like Parmetis require a symmetric adjacency matrix,
    1356             :   // but if we have an assembly element constrained by a spline
    1357             :   // NodeElem owned by another processor, it's possible that that
    1358             :   // processor doesn't see our assembly element.  Let's push those
    1359             :   // entries, to ensure they're counted on both sides.
    1360             :   auto symmetrize_entries =
    1361        4059 :     [this, first_local_elem]
    1362             :     (processor_id_type /*src_pid*/,
    1363      200531 :      const std::vector<std::pair<dof_id_type, dof_id_type>> & incoming_entries)
    1364             :     {
    1365      204590 :       for (auto [i, j] : incoming_entries)
    1366             :         {
    1367           0 :           libmesh_assert_greater_equal(j, first_local_elem);
    1368      200531 :           const std::size_t jl = j - first_local_elem;
    1369           0 :           libmesh_assert_less(jl, _dual_graph.size());
    1370           0 :           std::vector<dof_id_type> & graph_row = _dual_graph[jl];
    1371      200531 :           if (std::find(graph_row.begin(), graph_row.end(), i) == graph_row.end())
    1372             :           {
    1373             : //            std::cerr << "Pushing back (" << j << ", " << i << ") from " << src_pid << std::endl;
    1374       36483 :             graph_row.push_back(i);
    1375             :           }
    1376             :         }
    1377      229771 :     };
    1378             : 
    1379      230115 :   Parallel::push_parallel_vector_data(mesh.comm(),
    1380             :                                       connections_to_push,
    1381             :                                       symmetrize_entries);
    1382             : 
    1383             :   // *Now* we should have a symmetric adjacency matrix.  Let's check
    1384             :   // that, so any failures get caught before they e.g. confuse
    1385             :   // Parmetis in hard-to-debug ways.  That's a global communication so
    1386             :   // we'll only do it in debug mode.
    1387             : #ifdef DEBUG
    1388         344 :   auto n_proc = mesh.n_processors();
    1389         688 :   std::vector<dof_id_type> first_local_index_on_proc(n_proc, 0);
    1390         688 :   for (auto pid : make_range(1u,n_proc))
    1391         344 :     first_local_index_on_proc[pid] = first_local_index_on_proc[pid-1] + _n_active_elem_on_proc[pid-1];
    1392             : 
    1393         344 :   libmesh_assert_equal_to(first_local_index_on_proc[mesh.processor_id()],
    1394             :                           first_local_elem);
    1395             : 
    1396         688 :   std::unordered_map<processor_id_type, std::vector<std::pair<dof_id_type, dof_id_type>>> entries_to_send;
    1397       10847 :   for (auto il : index_range(_dual_graph))
    1398             :     {
    1399       10503 :       const std::vector<dof_id_type> & graph_row = _dual_graph[il];
    1400             : 
    1401       10503 :       const auto i = il + first_local_elem;
    1402             : 
    1403       54839 :       for (auto j : graph_row)
    1404             :         {
    1405             :           // Stupid graph rows aren't sorted yet...
    1406       44336 :           processor_id_type target_pid = 0;
    1407      110637 :           while (target_pid+1 < n_proc &&
    1408       44336 :                  j >= first_local_index_on_proc[target_pid+1])
    1409       21965 :             ++target_pid;
    1410             : 
    1411       44336 :           entries_to_send[target_pid].emplace_back(i,j);
    1412             :         }
    1413             :     }
    1414             : 
    1415         688 :   std::vector<std::tuple<processor_id_type, dof_id_type, dof_id_type>> bad_entries;
    1416             : 
    1417             :   auto check_incoming_entries =
    1418             :     [this, first_local_elem, &bad_entries]
    1419             :     (processor_id_type src_pid,
    1420      178001 :      const std::vector<std::pair<dof_id_type, dof_id_type>> & incoming_entries)
    1421             :     {
    1422       44993 :       for (auto [i, j] : incoming_entries)
    1423             :         {
    1424       44336 :           if (j < first_local_elem)
    1425             :             {
    1426           0 :               bad_entries.emplace_back(src_pid,i,j);
    1427           0 :               continue;
    1428             :             }
    1429       88672 :           const std::size_t jl = j - first_local_elem;
    1430       44336 :           if (jl >= _dual_graph.size())
    1431             :             {
    1432           0 :               bad_entries.emplace_back(src_pid,i,j);
    1433           0 :               continue;
    1434             :             }
    1435       44336 :           const std::vector<dof_id_type> & graph_row = _dual_graph[jl];
    1436       44336 :           if (std::find(graph_row.begin(), graph_row.end(), i) ==
    1437       88672 :               graph_row.end())
    1438             :             {
    1439           0 :               bad_entries.emplace_back(src_pid,i,j);
    1440           0 :               continue;
    1441             :             }
    1442             :         }
    1443         657 :     };
    1444             : 
    1445             :   // Keep any failures in sync in parallel, for easier debugging in
    1446             :   // unit tests where the failure exception will be caught.
    1447             :   Parallel::push_parallel_vector_data
    1448         344 :     (mesh.comm(), entries_to_send, check_incoming_entries);
    1449         344 :   bool bad_entries_exist = !bad_entries.empty();
    1450         344 :   mesh.comm().max(bad_entries_exist);
    1451         344 :   if (bad_entries_exist)
    1452             :     {
    1453             : #if 0  // Optional verbosity for if this breaks again...
    1454             :       if (!bad_entries.empty())
    1455             :         {
    1456             :           std::cerr << "Bad entries on processor " << mesh.processor_id() << ": ";
    1457             :           for (auto [p, i, j] : bad_entries)
    1458             :             std::cerr << '(' << p << ", " << i << ", " << j << "), ";
    1459             :           std::cerr << std::endl;
    1460             :         }
    1461             : #endif
    1462           0 :       libmesh_error_msg("Asymmetric partitioner graph detected");
    1463             :     }
    1464             : #endif
    1465      230115 : }
    1466             : 
    1467       42816 : void Partitioner::assign_partitioning (MeshBase & mesh, const std::vector<dof_id_type> & parts)
    1468             : {
    1469         532 :   LOG_SCOPE("assign_partitioning()", "Partitioner");
    1470             : 
    1471             :   // This function must be run on all processors at once
    1472         266 :   libmesh_parallel_only(mesh.comm());
    1473             : 
    1474         266 :   dof_id_type first_local_elem = 0;
    1475      216895 :   for (auto pid : make_range(mesh.processor_id()))
    1476      174212 :     first_local_elem += _n_active_elem_on_proc[pid];
    1477             : 
    1478             : #ifndef NDEBUG
    1479         266 :   const dof_id_type n_active_local_elem = mesh.n_active_local_elem();
    1480             : #endif
    1481             : 
    1482             :   std::map<processor_id_type, std::vector<dof_id_type>>
    1483         532 :     requested_ids;
    1484             : 
    1485             :   // Results to gather from each processor - kept in a map so we
    1486             :   // do only one loop over elements after all receives are done.
    1487             :   std::map<processor_id_type, std::vector<processor_id_type>>
    1488         532 :     filled_request;
    1489             : 
    1490    13006978 :   for (auto & elem : mesh.active_element_ptr_range())
    1491             :     {
    1492             :       // we need to get the index from the owning processor
    1493             :       // (note we cannot assign it now -- we are iterating
    1494             :       // over elements again and this will be bad!)
    1495    12938898 :       requested_ids[elem->processor_id()].push_back(elem->id());
    1496       42284 :     }
    1497             : 
    1498             :   auto gather_functor =
    1499      344737 :     [this,
    1500             :      & parts,
    1501             : #ifndef NDEBUG
    1502             :      & mesh,
    1503             :      n_active_local_elem,
    1504             : #endif
    1505             :      first_local_elem]
    1506             :     (processor_id_type, const std::vector<dof_id_type> & ids,
    1507    25930718 :      std::vector<processor_id_type> & data)
    1508             :     {
    1509        1064 :       const std::size_t ids_size = ids.size();
    1510      345801 :       data.resize(ids.size());
    1511             : 
    1512    13267413 :       for (std::size_t i=0; i != ids_size; i++)
    1513             :         {
    1514    12938898 :           const dof_id_type requested_elem_index = ids[i];
    1515             : 
    1516       17286 :           libmesh_assert(_global_index_by_pid_map.count(requested_elem_index));
    1517             : 
    1518             :           const dof_id_type global_index_by_pid =
    1519    12921612 :             _global_index_by_pid_map[requested_elem_index];
    1520             : 
    1521    12921612 :           const dof_id_type local_index =
    1522       17286 :             global_index_by_pid - first_local_elem;
    1523             : 
    1524       17286 :           libmesh_assert_less (local_index, parts.size());
    1525       17286 :           libmesh_assert_less (local_index, n_active_local_elem);
    1526             : 
    1527             :           const processor_id_type elem_procid =
    1528    12921612 :             cast_int<processor_id_type>(parts[local_index]);
    1529             : 
    1530       17286 :           libmesh_assert_less (elem_procid, mesh.n_partitions());
    1531             : 
    1532    12938898 :           data[i] = elem_procid;
    1533             :         }
    1534      388351 :     };
    1535             : 
    1536             :   auto action_functor =
    1537      344737 :     [&filled_request]
    1538             :     (processor_id_type pid,
    1539             :      const std::vector<dof_id_type> &,
    1540      345269 :      const std::vector<processor_id_type> & new_procids)
    1541             :     {
    1542      345801 :       filled_request[pid] = new_procids;
    1543       43348 :     };
    1544             : 
    1545             :   // Trade requests with other processors
    1546         266 :   const processor_id_type * ex = nullptr;
    1547             :   Parallel::pull_parallel_vector_data
    1548       42816 :     (mesh.comm(), requested_ids, gather_functor, action_functor, ex);
    1549             : 
    1550             :   // and finally assign the partitioning.
    1551             :   // note we are iterating in exactly the same order
    1552             :   // used to build up the request, so we can expect the
    1553             :   // required entries to be in the proper sequence.
    1554       43348 :   std::vector<unsigned int> counters(mesh.n_processors(), 0);
    1555    13006978 :   for (auto & elem : mesh.active_element_ptr_range())
    1556             :     {
    1557    12921612 :       const processor_id_type current_pid = elem->processor_id();
    1558             : 
    1559       17286 :       libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size());
    1560             : 
    1561             :       const processor_id_type elem_procid =
    1562    12921612 :         filled_request[current_pid][counters[current_pid]++];
    1563             : 
    1564       17286 :       libmesh_assert_less (elem_procid, mesh.n_partitions());
    1565    12921612 :       elem->processor_id() = elem_procid;
    1566       42284 :     }
    1567       42816 : }
    1568             : 
    1569             : 
    1570             : } // namespace libMesh

Generated by: LCOV version 1.14