LCOV - code coverage report
Current view: top level - src/partitioner - CopyMeshPartitioner.C (source / functions) Hit Total Coverage
Test: idaholab/moose framework: 2bf808 Lines: 69 81 85.2 %
Date: 2025-07-17 01:28:37 Functions: 6 6 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://mooseframework.inl.gov
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : #include "CopyMeshPartitioner.h"
      11             : 
      12             : #include "MooseApp.h"
      13             : #include "MooseMesh.h"
      14             : #include "MooseRandom.h"
      15             : 
      16             : #include "libmesh/elem.h"
      17             : #include "libmesh/parallel_algebra.h"
      18             : 
      19             : // TIMPI includes
      20             : #include "timpi/communicator.h"
      21             : #include "timpi/parallel_sync.h"
      22             : 
      23             : registerMooseObject("MooseApp", CopyMeshPartitioner);
      24             : 
      25             : InputParameters
      26       14381 : CopyMeshPartitioner::validParams()
      27             : {
      28       14381 :   InputParameters params = MoosePartitioner::validParams();
      29       14381 :   params.addClassDescription("Assigns element to match the partitioning of another mesh. If in a "
      30             :                              "child application, defaults to the parent app mesh if the other mesh "
      31             :                              "is not specified programmatically.");
      32       14381 :   params.addPrivateParam<MooseMesh *>("source_mesh");
      33             : 
      34       14381 :   return params;
      35           0 : }
      36             : 
      37          84 : CopyMeshPartitioner::CopyMeshPartitioner(const InputParameters & params) : MoosePartitioner(params)
      38             : {
      39          84 :   if (isParamValid("source_mesh"))
      40          24 :     _base_mesh = getParam<MooseMesh *>("source_mesh");
      41          60 :   else if (!_app.isUltimateMaster() && _app.masterMesh())
      42          60 :     _base_mesh = _app.masterMesh();
      43             :   else
      44           0 :     mooseError("Expecting either a parent app with a mesh to copy the partitioning from, a "
      45             :                "'source_mesh' (private) parameter to be set programmatically.");
      46          84 : }
      47             : 
      48             : std::unique_ptr<Partitioner>
      49          52 : CopyMeshPartitioner::clone() const
      50             : {
      51          52 :   return _app.getFactory().clone(*this);
      52             : }
      53             : 
      54             : void
      55          18 : CopyMeshPartitioner::_do_partition(MeshBase & mesh, const unsigned int /*n*/)
      56             : {
      57             :   mooseAssert(_base_mesh, "Should have a base mesh to copy the partitioning from");
      58             : 
      59             :   // First check that it makes sense to copy the partitioning
      60             :   // If we use the same number of procs to partition this mesh than to partition the source mesh, we
      61             :   // are effectively matching regions to processes the same way in both meshes. Makes sense.
      62             :   // If we use more procs, this will leave some processes with no elements. It is not ideal, let's
      63             :   // just give a warning. This does not happen in practice with MultiApps.
      64             :   // If we use fewer procs (N_source > N_current), we error. If we could avoid erroring, then
      65             :   // either:
      66             :   // - the elements on our mesh always get matched to elements from only N_current processes among
      67             :   //   the N_source processes. We can accomodate that
      68             :   // - the elements on our mesh get matched to more processes than we have. We would
      69             :   //   need a heuristic (for example a nested partitioner) to re-distribute these elements. We won't
      70             :   //   support it
      71          18 :   std::set<unsigned int> pids_used;
      72             : 
      73             :   // We cannot get the point locator only on some ranks of the base mesh, so we must error here
      74          18 :   if (_base_mesh->comm().size() > mesh.comm().size())
      75           0 :     mooseError("This partitioner does not support using fewer ranks to partition the mesh than are "
      76             :                "used to partition the source mesh (the mesh we are copying the partitioning from)");
      77             :   mooseAssert(_base_mesh->comm().rank() == mesh.comm().rank(),
      78             :               "Should be the same rank on both mesh MPI communicators");
      79             : 
      80             :   // Get point locator
      81          18 :   auto pl = _base_mesh->getPointLocator();
      82          18 :   if (!_base_mesh->getMesh().is_serial())
      83           2 :     pl->enable_out_of_mesh_mode();
      84             : 
      85             :   // We use the pull_parallel_vector_data algorithm.
      86             :   // This code is very similar to partitioner::assign_partitioning in libMesh partitioner.C
      87             :   // We need to define three things:
      88             :   // - the list of requests: the elements in our local mesh we are trying to assign pids to
      89             :   // - how to gather results: not all processors know which element belongs where, they will each
      90             :   //   process the requests sent to them and send back the elem pids if they know them.
      91             :   // - how to fill the requests: from the data gathered and sent back, we need to decide which data
      92             :   //   is good and save it into the output map
      93             : 
      94             :   // For each processor id, all the elements in our mesh that will request to know their new
      95             :   // processor id. If the target mesh is replicated, that's all the elements from that mesh, but the
      96             :   // source mesh may be distributed so we still need the communication. If a distributed mesh, then
      97             :   // that's only the local and ghosted elements.
      98             :   // We send the target element vertex average, enough to find it in the source mesh.
      99             :   // We send the target element pid, to know where to save the result in the filled requests
     100             :   // We send an index for the ordering of the requests to facilitate retrieving the results
     101             :   // NOTE: in partitioner.C they manage to unpack the filled requests in the same order that
     102             :   //       the requests are sent, thus saving the need for that last index. Possible rework..
     103             :   std::map<processor_id_type, std::vector<std::tuple<Point, processor_id_type, unsigned int>>>
     104          18 :       requested_elements;
     105          18 :   const bool distributed_mesh = !_base_mesh->getMesh().is_serial();
     106          18 :   unsigned int request_i = 0;
     107        1052 :   for (auto & elem : mesh.active_element_ptr_range())
     108             :   {
     109             :     // we don't know which processor owns this point.
     110             :     // As a first try, we add this to every processor id
     111             :     // TODO: a simple bounding box heuristic would do fine!
     112        1034 :     const auto elem_pt = elem->vertex_average();
     113        1034 :     if (distributed_mesh)
     114          42 :       for (const auto pid : make_range(mesh.comm().size()))
     115          28 :         requested_elements[pid].push_back({elem_pt, elem->processor_id(), request_i});
     116             :     // source mesh is replicated, let's just ask the sending processor what the processor id is
     117             :     else
     118        1020 :       requested_elements[processor_id()].push_back({elem_pt, processor_id(), request_i});
     119        1034 :     request_i++;
     120          18 :   }
     121             : 
     122             :   // For each requests, find which process is able to fill the request
     123             :   // Every processor fills these requests as best it can.
     124             :   auto gather_functor =
     125          20 :       [&pl](
     126             :           processor_id_type /*pid*/,
     127             :           const std::vector<std::tuple<Point, processor_id_type, unsigned int>> & incoming_elements,
     128        1048 :           std::vector<processor_id_type> & outgoing_pids)
     129             :   {
     130          20 :     outgoing_pids.resize(incoming_elements.size(), libMesh::invalid_uint);
     131             : 
     132             :     // Try the pl on the incoming element
     133        1068 :     for (const auto i : index_range(incoming_elements))
     134             :     {
     135        1048 :       const auto & elem = (*pl)(Point(std::get<0>(incoming_elements[i])));
     136        1048 :       if (elem)
     137        1043 :         outgoing_pids[i] = elem->processor_id();
     138             :     }
     139          20 :   };
     140             : 
     141             :   // Results to gather from each processor
     142             :   // For each processor id, we have a vector indexed by element with the new processor id
     143             :   // Note that the filled_requests should match the ordering of the requested_elements
     144          18 :   std::map<processor_id_type, std::vector<processor_id_type>> filled_request;
     145          54 :   for (const auto i : make_range(mesh.comm().size()))
     146          36 :     filled_request[i].resize(requested_elements.count(i) ? requested_elements[i].size() : 0);
     147             : 
     148             :   // How to act on the exchanged data, and fill the filled_request (output we sought)
     149             :   auto action_functor =
     150          20 :       [&filled_request](
     151             :           processor_id_type,
     152             :           const std::vector<std::tuple<Point, processor_id_type, unsigned int>> & elems,
     153        1043 :           const std::vector<processor_id_type> & new_procids)
     154             :   {
     155        1068 :     for (const auto i : index_range(new_procids))
     156        1048 :       if (new_procids[i] != libMesh::invalid_uint)
     157             :       {
     158        1043 :         const auto elem_pid = std::get<1>(elems[i]);
     159        1043 :         const auto request_i = std::get<2>(elems[i]);
     160        1043 :         filled_request[elem_pid][request_i] = new_procids[i];
     161             :       }
     162          20 :   };
     163             : 
     164             :   // Trade requests with other processors
     165             :   // NOTE: We could try to use base mesh communicator because that's where we gather the information
     166             :   // However, if that mesh communicator has more processes than we do, that would be trouble.
     167          18 :   const processor_id_type * ex = nullptr;
     168          18 :   Parallel::pull_parallel_vector_data(
     169          18 :       mesh.comm(), requested_elements, gather_functor, action_functor, ex);
     170             : 
     171             :   // Assign the partitioning.
     172          18 :   request_i = 0;
     173        2086 :   for (auto & elem : mesh.active_element_ptr_range())
     174             :   {
     175        1034 :     const processor_id_type current_pid = elem->processor_id();
     176        1034 :     const auto lookup_pid = distributed_mesh ? current_pid : processor_id();
     177        1034 :     const processor_id_type elem_procid = filled_request[lookup_pid][request_i++];
     178             : 
     179        1034 :     elem->processor_id() = elem_procid;
     180             :     // Keep track of processor ids used in case we need to do a pass at re-assigning
     181        1034 :     pids_used.insert(elem_procid);
     182          18 :   }
     183             : 
     184             :   // Synchronize the pids used across all ranks
     185             :   // NOTE: we could have gathered this earlier
     186          18 :   std::vector<unsigned int> pids_used_vec(pids_used.begin(), pids_used.end());
     187          18 :   mesh.comm().allgather(pids_used_vec);
     188          18 :   pids_used.insert(pids_used_vec.begin(), pids_used_vec.end());
     189             : 
     190             :   // Check the pids used
     191             :   // We cannot use more process ids to partition the mesh than the current app is using
     192          18 :   const auto max_pid = mesh.comm().size();
     193          18 :   if (pids_used.size() > max_pid)
     194           0 :     mooseError("Partitioning copy used more regions (" + std::to_string(pids_used.size()) +
     195           0 :                ") than the number of parallel processes (" + std::to_string(mesh.comm().size()) +
     196             :                ")");
     197          18 :   if (pids_used.size() < max_pid)
     198           6 :     mooseWarning(
     199             :         "Some parallel (MPI) processes were not assigned any element during partitioning. These "
     200             :         "processes will not perform any work.");
     201             : 
     202             :   // The pids are not too many, but their numbering is not contiguous, renumber the process id
     203             :   // assignment
     204          18 :   if (*pids_used.rbegin() > max_pid)
     205             :   {
     206           0 :     std::unordered_map<unsigned int, unsigned int> source_to_current_pid_map;
     207             : 
     208             :     // TODO: This is a naive remapping. There might be remapping that have optimal locality. E.g.
     209             :     // the mpi ranks are on the same node, limiting communications. Once we can have multiple
     210             :     // partitioners, we should look into having a nested partitioner handle the outliers
     211           0 :     unsigned int i = 0;
     212           0 :     for (const auto pid_set : pids_used)
     213           0 :       source_to_current_pid_map[pid_set] = i++;
     214             : 
     215           0 :     for (auto & elem_ptr : mesh.active_element_ptr_range())
     216           0 :       elem_ptr->processor_id() = source_to_current_pid_map[elem_ptr->processor_id()];
     217           0 :   }
     218          18 : }

Generated by: LCOV version 1.14