Line data Source code
1 : //* This file is part of the MOOSE framework 2 : //* https://mooseframework.inl.gov 3 : //* 4 : //* All rights reserved, see COPYRIGHT for full restrictions 5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT 6 : //* 7 : //* Licensed under LGPL 2.1, please see LICENSE for details 8 : //* https://www.gnu.org/licenses/lgpl-2.1.html 9 : 10 : #include "CopyMeshPartitioner.h" 11 : 12 : #include "MooseApp.h" 13 : #include "MooseMesh.h" 14 : #include "MooseRandom.h" 15 : 16 : #include "libmesh/elem.h" 17 : #include "libmesh/parallel_algebra.h" 18 : 19 : // TIMPI includes 20 : #include "timpi/communicator.h" 21 : #include "timpi/parallel_sync.h" 22 : 23 : registerMooseObject("MooseApp", CopyMeshPartitioner); 24 : 25 : InputParameters 26 14381 : CopyMeshPartitioner::validParams() 27 : { 28 14381 : InputParameters params = MoosePartitioner::validParams(); 29 14381 : params.addClassDescription("Assigns element to match the partitioning of another mesh. If in a " 30 : "child application, defaults to the parent app mesh if the other mesh " 31 : "is not specified programmatically."); 32 14381 : params.addPrivateParam<MooseMesh *>("source_mesh"); 33 : 34 14381 : return params; 35 0 : } 36 : 37 84 : CopyMeshPartitioner::CopyMeshPartitioner(const InputParameters & params) : MoosePartitioner(params) 38 : { 39 84 : if (isParamValid("source_mesh")) 40 24 : _base_mesh = getParam<MooseMesh *>("source_mesh"); 41 60 : else if (!_app.isUltimateMaster() && _app.masterMesh()) 42 60 : _base_mesh = _app.masterMesh(); 43 : else 44 0 : mooseError("Expecting either a parent app with a mesh to copy the partitioning from, a " 45 : "'source_mesh' (private) parameter to be set programmatically."); 46 84 : } 47 : 48 : std::unique_ptr<Partitioner> 49 52 : CopyMeshPartitioner::clone() const 50 : { 51 52 : return _app.getFactory().clone(*this); 52 : } 53 : 54 : void 55 18 : CopyMeshPartitioner::_do_partition(MeshBase & mesh, const unsigned int /*n*/) 56 : { 57 : mooseAssert(_base_mesh, "Should have a base mesh to copy the partitioning from"); 58 : 59 : // First check that it makes sense to copy the partitioning 60 : // If we use the same number of procs to partition this mesh than to partition the source mesh, we 61 : // are effectively matching regions to processes the same way in both meshes. Makes sense. 62 : // If we use more procs, this will leave some processes with no elements. It is not ideal, let's 63 : // just give a warning. This does not happen in practice with MultiApps. 64 : // If we use fewer procs (N_source > N_current), we error. If we could avoid erroring, then 65 : // either: 66 : // - the elements on our mesh always get matched to elements from only N_current processes among 67 : // the N_source processes. We can accomodate that 68 : // - the elements on our mesh get matched to more processes than we have. We would 69 : // need a heuristic (for example a nested partitioner) to re-distribute these elements. We won't 70 : // support it 71 18 : std::set<unsigned int> pids_used; 72 : 73 : // We cannot get the point locator only on some ranks of the base mesh, so we must error here 74 18 : if (_base_mesh->comm().size() > mesh.comm().size()) 75 0 : mooseError("This partitioner does not support using fewer ranks to partition the mesh than are " 76 : "used to partition the source mesh (the mesh we are copying the partitioning from)"); 77 : mooseAssert(_base_mesh->comm().rank() == mesh.comm().rank(), 78 : "Should be the same rank on both mesh MPI communicators"); 79 : 80 : // Get point locator 81 18 : auto pl = _base_mesh->getPointLocator(); 82 18 : if (!_base_mesh->getMesh().is_serial()) 83 2 : pl->enable_out_of_mesh_mode(); 84 : 85 : // We use the pull_parallel_vector_data algorithm. 86 : // This code is very similar to partitioner::assign_partitioning in libMesh partitioner.C 87 : // We need to define three things: 88 : // - the list of requests: the elements in our local mesh we are trying to assign pids to 89 : // - how to gather results: not all processors know which element belongs where, they will each 90 : // process the requests sent to them and send back the elem pids if they know them. 91 : // - how to fill the requests: from the data gathered and sent back, we need to decide which data 92 : // is good and save it into the output map 93 : 94 : // For each processor id, all the elements in our mesh that will request to know their new 95 : // processor id. If the target mesh is replicated, that's all the elements from that mesh, but the 96 : // source mesh may be distributed so we still need the communication. If a distributed mesh, then 97 : // that's only the local and ghosted elements. 98 : // We send the target element vertex average, enough to find it in the source mesh. 99 : // We send the target element pid, to know where to save the result in the filled requests 100 : // We send an index for the ordering of the requests to facilitate retrieving the results 101 : // NOTE: in partitioner.C they manage to unpack the filled requests in the same order that 102 : // the requests are sent, thus saving the need for that last index. Possible rework.. 103 : std::map<processor_id_type, std::vector<std::tuple<Point, processor_id_type, unsigned int>>> 104 18 : requested_elements; 105 18 : const bool distributed_mesh = !_base_mesh->getMesh().is_serial(); 106 18 : unsigned int request_i = 0; 107 1052 : for (auto & elem : mesh.active_element_ptr_range()) 108 : { 109 : // we don't know which processor owns this point. 110 : // As a first try, we add this to every processor id 111 : // TODO: a simple bounding box heuristic would do fine! 112 1034 : const auto elem_pt = elem->vertex_average(); 113 1034 : if (distributed_mesh) 114 42 : for (const auto pid : make_range(mesh.comm().size())) 115 28 : requested_elements[pid].push_back({elem_pt, elem->processor_id(), request_i}); 116 : // source mesh is replicated, let's just ask the sending processor what the processor id is 117 : else 118 1020 : requested_elements[processor_id()].push_back({elem_pt, processor_id(), request_i}); 119 1034 : request_i++; 120 18 : } 121 : 122 : // For each requests, find which process is able to fill the request 123 : // Every processor fills these requests as best it can. 124 : auto gather_functor = 125 20 : [&pl]( 126 : processor_id_type /*pid*/, 127 : const std::vector<std::tuple<Point, processor_id_type, unsigned int>> & incoming_elements, 128 1048 : std::vector<processor_id_type> & outgoing_pids) 129 : { 130 20 : outgoing_pids.resize(incoming_elements.size(), libMesh::invalid_uint); 131 : 132 : // Try the pl on the incoming element 133 1068 : for (const auto i : index_range(incoming_elements)) 134 : { 135 1048 : const auto & elem = (*pl)(Point(std::get<0>(incoming_elements[i]))); 136 1048 : if (elem) 137 1043 : outgoing_pids[i] = elem->processor_id(); 138 : } 139 20 : }; 140 : 141 : // Results to gather from each processor 142 : // For each processor id, we have a vector indexed by element with the new processor id 143 : // Note that the filled_requests should match the ordering of the requested_elements 144 18 : std::map<processor_id_type, std::vector<processor_id_type>> filled_request; 145 54 : for (const auto i : make_range(mesh.comm().size())) 146 36 : filled_request[i].resize(requested_elements.count(i) ? requested_elements[i].size() : 0); 147 : 148 : // How to act on the exchanged data, and fill the filled_request (output we sought) 149 : auto action_functor = 150 20 : [&filled_request]( 151 : processor_id_type, 152 : const std::vector<std::tuple<Point, processor_id_type, unsigned int>> & elems, 153 1043 : const std::vector<processor_id_type> & new_procids) 154 : { 155 1068 : for (const auto i : index_range(new_procids)) 156 1048 : if (new_procids[i] != libMesh::invalid_uint) 157 : { 158 1043 : const auto elem_pid = std::get<1>(elems[i]); 159 1043 : const auto request_i = std::get<2>(elems[i]); 160 1043 : filled_request[elem_pid][request_i] = new_procids[i]; 161 : } 162 20 : }; 163 : 164 : // Trade requests with other processors 165 : // NOTE: We could try to use base mesh communicator because that's where we gather the information 166 : // However, if that mesh communicator has more processes than we do, that would be trouble. 167 18 : const processor_id_type * ex = nullptr; 168 18 : Parallel::pull_parallel_vector_data( 169 18 : mesh.comm(), requested_elements, gather_functor, action_functor, ex); 170 : 171 : // Assign the partitioning. 172 18 : request_i = 0; 173 2086 : for (auto & elem : mesh.active_element_ptr_range()) 174 : { 175 1034 : const processor_id_type current_pid = elem->processor_id(); 176 1034 : const auto lookup_pid = distributed_mesh ? current_pid : processor_id(); 177 1034 : const processor_id_type elem_procid = filled_request[lookup_pid][request_i++]; 178 : 179 1034 : elem->processor_id() = elem_procid; 180 : // Keep track of processor ids used in case we need to do a pass at re-assigning 181 1034 : pids_used.insert(elem_procid); 182 18 : } 183 : 184 : // Synchronize the pids used across all ranks 185 : // NOTE: we could have gathered this earlier 186 18 : std::vector<unsigned int> pids_used_vec(pids_used.begin(), pids_used.end()); 187 18 : mesh.comm().allgather(pids_used_vec); 188 18 : pids_used.insert(pids_used_vec.begin(), pids_used_vec.end()); 189 : 190 : // Check the pids used 191 : // We cannot use more process ids to partition the mesh than the current app is using 192 18 : const auto max_pid = mesh.comm().size(); 193 18 : if (pids_used.size() > max_pid) 194 0 : mooseError("Partitioning copy used more regions (" + std::to_string(pids_used.size()) + 195 0 : ") than the number of parallel processes (" + std::to_string(mesh.comm().size()) + 196 : ")"); 197 18 : if (pids_used.size() < max_pid) 198 6 : mooseWarning( 199 : "Some parallel (MPI) processes were not assigned any element during partitioning. These " 200 : "processes will not perform any work."); 201 : 202 : // The pids are not too many, but their numbering is not contiguous, renumber the process id 203 : // assignment 204 18 : if (*pids_used.rbegin() > max_pid) 205 : { 206 0 : std::unordered_map<unsigned int, unsigned int> source_to_current_pid_map; 207 : 208 : // TODO: This is a naive remapping. There might be remapping that have optimal locality. E.g. 209 : // the mpi ranks are on the same node, limiting communications. Once we can have multiple 210 : // partitioners, we should look into having a nested partitioner handle the outliers 211 0 : unsigned int i = 0; 212 0 : for (const auto pid_set : pids_used) 213 0 : source_to_current_pid_map[pid_set] = i++; 214 : 215 0 : for (auto & elem_ptr : mesh.active_element_ptr_range()) 216 0 : elem_ptr->processor_id() = source_to_current_pid_map[elem_ptr->processor_id()]; 217 0 : } 218 18 : }