Line data Source code
1 : //* This file is part of the MOOSE framework 2 : //* https://mooseframework.inl.gov 3 : //* 4 : //* All rights reserved, see COPYRIGHT for full restrictions 5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT 6 : //* 7 : //* Licensed under LGPL 2.1, please see LICENSE for details 8 : //* https://www.gnu.org/licenses/lgpl-2.1.html 9 : 10 : #include "CopyMeshPartitioner.h" 11 : 12 : #include "MooseApp.h" 13 : #include "MooseMesh.h" 14 : #include "MooseRandom.h" 15 : 16 : #include "libmesh/elem.h" 17 : #include "libmesh/parallel_algebra.h" 18 : 19 : // TIMPI includes 20 : #include "timpi/communicator.h" 21 : #include "timpi/parallel_sync.h" 22 : 23 : registerMooseObject("MooseApp", CopyMeshPartitioner); 24 : 25 : InputParameters 26 3189 : CopyMeshPartitioner::validParams() 27 : { 28 3189 : InputParameters params = MoosePartitioner::validParams(); 29 6378 : params.addClassDescription("Assigns element to match the partitioning of another mesh. If in a " 30 : "child application, defaults to the parent app mesh if the other mesh " 31 : "is not specified programmatically."); 32 3189 : params.addPrivateParam<MooseMesh *>("source_mesh"); 33 : 34 3189 : return params; 35 0 : } 36 : 37 96 : CopyMeshPartitioner::CopyMeshPartitioner(const InputParameters & params) : MoosePartitioner(params) 38 : { 39 288 : if (isParamValid("source_mesh")) 40 108 : _base_mesh = getParam<MooseMesh *>("source_mesh"); 41 60 : else if (!_app.isUltimateMaster() && _app.masterMesh()) 42 60 : _base_mesh = _app.masterMesh(); 43 : else 44 0 : mooseError("Expecting either a parent app with a mesh to copy the partitioning from, a " 45 : "'source_mesh' (private) parameter to be set programmatically."); 46 96 : } 47 : 48 : std::unique_ptr<Partitioner> 49 64 : CopyMeshPartitioner::clone() const 50 : { 51 64 : return _app.getFactory().clone(*this); 52 : } 53 : 54 : void 55 24 : CopyMeshPartitioner::_do_partition(MeshBase & mesh, const unsigned int /*n*/) 56 : { 57 : mooseAssert(_base_mesh, "Should have a base mesh to copy the partitioning from"); 58 : 59 : // First check that it makes sense to copy the partitioning 60 : // If we use the same number of procs to partition this mesh than to partition the source mesh, we 61 : // are effectively matching regions to processes the same way in both meshes. Makes sense. 62 : // If we use more procs, this will leave some processes with no elements. It is not ideal, let's 63 : // just give a warning. This does not happen in practice with MultiApps. 64 : // If we use fewer procs (N_source > N_current), we error. If we could avoid erroring, then 65 : // either: 66 : // - the elements on our mesh always get matched to elements from only N_current processes among 67 : // the N_source processes. We can accomodate that 68 : // - the elements on our mesh get matched to more processes than we have. We would 69 : // need a heuristic (for example a nested partitioner) to re-distribute these elements. We won't 70 : // support it 71 24 : std::set<unsigned int> pids_used; 72 : 73 : // We cannot get the point locator only on some ranks of the base mesh, so we must error here 74 24 : if (_base_mesh->comm().size() > mesh.comm().size()) 75 0 : mooseError("This partitioner does not support using fewer ranks to partition the mesh than are " 76 : "used to partition the source mesh (the mesh we are copying the partitioning from)"); 77 : mooseAssert(_base_mesh->comm().rank() == mesh.comm().rank(), 78 : "Should be the same rank on both mesh MPI communicators"); 79 : 80 : // Get point locator 81 24 : auto pl = _base_mesh->getPointLocator(); 82 24 : if (!_base_mesh->getMesh().is_serial()) 83 4 : pl->enable_out_of_mesh_mode(); 84 : 85 : // We use the pull_parallel_vector_data algorithm. 86 : // This code is very similar to partitioner::assign_partitioning in libMesh partitioner.C 87 : // We need to define three things: 88 : // - the list of requests: the elements in our local mesh we are trying to assign pids to 89 : // - how to gather results: not all processors know which element belongs where, they will each 90 : // process the requests sent to them and send back the elem pids if they know them. 91 : // - how to fill the requests: from the data gathered and sent back, we need to decide which data 92 : // is good and save it into the output map 93 : 94 : // For each processor id, all the elements in our mesh that will request to know their new 95 : // processor id. If the target mesh is replicated, that's all the elements from that mesh, but the 96 : // source mesh may be distributed so we still need the communication. If a distributed mesh, then 97 : // that's only the local and ghosted elements. 98 : // We send the target element vertex average, enough to find it in the source mesh. 99 : // We send the target element pid, to know where to save the result in the filled requests 100 : // We send an index for the ordering of the requests to facilitate retrieving the results 101 : // NOTE: in partitioner.C they manage to unpack the filled requests in the same order that 102 : // the requests are sent, thus saving the need for that last index. Possible rework.. 103 : std::map<processor_id_type, std::vector<std::tuple<Point, processor_id_type, unsigned int>>> 104 24 : requested_elements; 105 24 : const bool distributed_mesh = !_base_mesh->getMesh().is_serial(); 106 24 : unsigned int request_i = 0; 107 1107 : for (auto & elem : mesh.active_element_ptr_range()) 108 : { 109 : // we don't know which processor owns this point. 110 : // As a first try, we add this to every processor id 111 : // TODO: a simple bounding box heuristic would do fine! 112 1083 : const auto elem_pt = elem->vertex_average(); 113 1083 : if (distributed_mesh) 114 81 : for (const auto pid : make_range(mesh.comm().size())) 115 54 : requested_elements[pid].push_back({elem_pt, elem->processor_id(), request_i}); 116 : // source mesh is replicated, let's just ask the sending processor what the processor id is 117 : else 118 1056 : requested_elements[processor_id()].push_back({elem_pt, processor_id(), request_i}); 119 1083 : request_i++; 120 24 : } 121 : 122 : // For each requests, find which process is able to fill the request 123 : // Every processor fills these requests as best it can. 124 : auto gather_functor = 125 28 : [&pl]( 126 : processor_id_type /*pid*/, 127 : const std::vector<std::tuple<Point, processor_id_type, unsigned int>> & incoming_elements, 128 : std::vector<processor_id_type> & outgoing_pids) 129 : { 130 28 : outgoing_pids.resize(incoming_elements.size(), libMesh::invalid_uint); 131 : 132 : // Try the pl on the incoming element 133 1138 : for (const auto i : index_range(incoming_elements)) 134 : { 135 1110 : const auto & elem = (*pl)(Point(std::get<0>(incoming_elements[i]))); 136 1110 : if (elem) 137 1102 : outgoing_pids[i] = elem->processor_id(); 138 : } 139 28 : }; 140 : 141 : // Results to gather from each processor 142 : // For each processor id, we have a vector indexed by element with the new processor id 143 : // Note that the filled_requests should match the ordering of the requested_elements 144 24 : std::map<processor_id_type, std::vector<processor_id_type>> filled_request; 145 72 : for (const auto i : make_range(mesh.comm().size())) 146 48 : filled_request[i].resize(requested_elements.count(i) ? requested_elements[i].size() : 0); 147 : 148 : // How to act on the exchanged data, and fill the filled_request (output we sought) 149 : auto action_functor = 150 28 : [&filled_request]( 151 : processor_id_type, 152 : const std::vector<std::tuple<Point, processor_id_type, unsigned int>> & elems, 153 : const std::vector<processor_id_type> & new_procids) 154 : { 155 1138 : for (const auto i : index_range(new_procids)) 156 1110 : if (new_procids[i] != libMesh::invalid_uint) 157 : { 158 1102 : const auto elem_pid = std::get<1>(elems[i]); 159 1102 : const auto request_i = std::get<2>(elems[i]); 160 1102 : filled_request[elem_pid][request_i] = new_procids[i]; 161 : } 162 28 : }; 163 : 164 : // Trade requests with other processors 165 : // NOTE: We could try to use base mesh communicator because that's where we gather the information 166 : // However, if that mesh communicator has more processes than we do, that would be trouble. 167 24 : const processor_id_type * ex = nullptr; 168 24 : Parallel::pull_parallel_vector_data( 169 24 : mesh.comm(), requested_elements, gather_functor, action_functor, ex); 170 : 171 : // Assign the partitioning. 172 24 : request_i = 0; 173 1107 : for (auto & elem : mesh.active_element_ptr_range()) 174 : { 175 1083 : const processor_id_type current_pid = elem->processor_id(); 176 1083 : const auto lookup_pid = distributed_mesh ? current_pid : processor_id(); 177 1083 : const processor_id_type elem_procid = filled_request[lookup_pid][request_i++]; 178 : 179 1083 : elem->processor_id() = elem_procid; 180 : // Keep track of processor ids used in case we need to do a pass at re-assigning 181 1083 : pids_used.insert(elem_procid); 182 24 : } 183 : 184 : // Synchronize the pids used across all ranks 185 : // NOTE: we could have gathered this earlier 186 24 : std::vector<unsigned int> pids_used_vec(pids_used.begin(), pids_used.end()); 187 24 : mesh.comm().allgather(pids_used_vec); 188 24 : pids_used.insert(pids_used_vec.begin(), pids_used_vec.end()); 189 : 190 : // Check the pids used 191 : // We cannot use more process ids to partition the mesh than the current app is using 192 24 : const auto max_pid = mesh.comm().size(); 193 24 : if (pids_used.size() > max_pid) 194 0 : mooseError("Partitioning copy used more regions (" + std::to_string(pids_used.size()) + 195 0 : ") than the number of parallel processes (" + std::to_string(mesh.comm().size()) + 196 : ")"); 197 24 : if (pids_used.size() < max_pid) 198 6 : mooseWarning( 199 : "Some parallel (MPI) processes were not assigned any element during partitioning. These " 200 : "processes will not perform any work."); 201 : 202 : // The pids are not too many, but their numbering is not contiguous, renumber the process id 203 : // assignment 204 24 : if (*pids_used.rbegin() > max_pid) 205 : { 206 0 : std::unordered_map<unsigned int, unsigned int> source_to_current_pid_map; 207 : 208 : // TODO: This is a naive remapping. There might be remapping that have optimal locality. E.g. 209 : // the mpi ranks are on the same node, limiting communications. Once we can have multiple 210 : // partitioners, we should look into having a nested partitioner handle the outliers 211 0 : unsigned int i = 0; 212 0 : for (const auto pid_set : pids_used) 213 0 : source_to_current_pid_map[pid_set] = i++; 214 : 215 0 : for (auto & elem_ptr : mesh.active_element_ptr_range()) 216 0 : elem_ptr->processor_id() = source_to_current_pid_map[elem_ptr->processor_id()]; 217 0 : } 218 24 : }