Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 :
20 : // Local includes
21 : #include "libmesh/mesh_communication.h"
22 :
23 : // libMesh includes
24 : #include "libmesh/libmesh_config.h"
25 : #include "libmesh/libmesh_common.h"
26 : #include "libmesh/libmesh_logging.h"
27 : #include "libmesh/mesh_base.h"
28 : #include "libmesh/mesh_tools.h"
29 : #include "libmesh/parallel_hilbert.h"
30 : #include "libmesh/parallel_sort.h"
31 : #include "libmesh/elem.h"
32 : #include "libmesh/elem_range.h"
33 : #include "libmesh/node_range.h"
34 :
35 : // TIMPI includes
36 : #include "timpi/parallel_implementation.h"
37 : #include "timpi/parallel_sync.h"
38 :
39 : // C/C++ includes
40 : #ifdef LIBMESH_HAVE_LIBHILBERT
41 : # include "hilbert.h"
42 : #endif
43 :
44 :
45 : #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
46 :
47 : namespace { // anonymous namespace for helper functions
48 :
49 : using namespace libMesh;
50 :
51 :
52 : // Trying to divide per mesh, not per point
53 1123994 : Point invert_bbox (const BoundingBox & bbox)
54 : {
55 15358 : Point p;
56 :
57 1154710 : p(0) = (bbox.first(0) == bbox.second(0)) ? 0. :
58 1139352 : 1/(bbox.second(0)-bbox.first(0));
59 :
60 : #if LIBMESH_DIM > 1
61 1153688 : p(1) = (bbox.first(1) == bbox.second(1)) ? 0. :
62 1061084 : 1/(bbox.second(1)-bbox.first(1));
63 : #endif
64 :
65 : #if LIBMESH_DIM > 2
66 1154276 : p(2) = (bbox.first(2) == bbox.second(2)) ? 0. :
67 577059 : 1/(bbox.second(2)-bbox.first(2));
68 : #endif
69 :
70 1123994 : return p;
71 : }
72 :
73 :
74 :
75 : // Utility function to map (x,y,z) in [bbox.min, bbox.max]^3 into
76 : // [0,max_inttype]^3 for computing Hilbert keys
77 90961287 : void get_hilbert_coords (const Point & p,
78 : const BoundingBox & bbox,
79 : const Point & bboxinv,
80 : CFixBitVec icoords[3])
81 : {
82 : static const Hilbert::inttype max_inttype = static_cast<Hilbert::inttype>(-1);
83 :
84 : const Real // put (x,y,z) in [0,1]^3 (don't divide by 0)
85 90961287 : x = (p(0)-bbox.first(0)) * bboxinv(0),
86 :
87 : #if LIBMESH_DIM > 1
88 90961287 : y = (p(1)-bbox.first(1)) * bboxinv(1),
89 : #else
90 : y = 0.,
91 : #endif
92 :
93 : #if LIBMESH_DIM > 2
94 90961287 : z = (p(2)-bbox.first(2)) * bboxinv(2);
95 : #else
96 : z = 0.;
97 : #endif
98 :
99 : // (icoords) in [0,max_inttype]^3
100 90961287 : icoords[0] = static_cast<Hilbert::inttype>(x*max_inttype);
101 90961287 : icoords[1] = static_cast<Hilbert::inttype>(y*max_inttype);
102 90961287 : icoords[2] = static_cast<Hilbert::inttype>(z*max_inttype);
103 90961287 : }
104 :
105 :
106 :
107 : Parallel::DofObjectKey
108 69627463 : get_dofobject_key (const Elem & e,
109 : const BoundingBox & bbox,
110 : const Point & bboxinv)
111 : {
112 : static const unsigned int sizeof_inttype = sizeof(Hilbert::inttype);
113 :
114 4294548 : Hilbert::HilbertIndices index;
115 278509852 : CFixBitVec icoords[3];
116 4294548 : Hilbert::BitVecType bv;
117 69627463 : get_hilbert_coords (e.vertex_average(), bbox, bboxinv, icoords);
118 4294548 : Hilbert::coordsToIndex (icoords, 8*sizeof_inttype, 3, bv);
119 69627463 : index = bv;
120 :
121 : #ifdef LIBMESH_ENABLE_UNIQUE_ID
122 73922011 : return std::make_pair(index, e.unique_id());
123 : #else
124 : return index;
125 : #endif
126 : }
127 :
128 :
129 :
130 : // Compute the hilbert index
131 : Parallel::DofObjectKey
132 21333824 : get_dofobject_key (const Node & n,
133 : const BoundingBox & bbox,
134 : const Point & bboxinv)
135 : {
136 : static const unsigned int sizeof_inttype = sizeof(Hilbert::inttype);
137 :
138 2286785 : Hilbert::HilbertIndices index;
139 85335296 : CFixBitVec icoords[3];
140 2286785 : Hilbert::BitVecType bv;
141 21333824 : get_hilbert_coords (n, bbox, bboxinv, icoords);
142 2286785 : Hilbert::coordsToIndex (icoords, 8*sizeof_inttype, 3, bv);
143 21333824 : index = bv;
144 :
145 : #ifdef LIBMESH_ENABLE_UNIQUE_ID
146 23620609 : return std::make_pair(index, n.unique_id());
147 : #else
148 : return index;
149 : #endif
150 : }
151 :
152 : // Helper class for threaded Hilbert key computation
153 : class ComputeHilbertKeys
154 : {
155 : public:
156 1124 : ComputeHilbertKeys (const BoundingBox & bbox,
157 : const Point & bboxinv,
158 38980 : std::vector<Parallel::DofObjectKey> & keys) :
159 36732 : _bbox(bbox),
160 36732 : _bboxinv(bboxinv),
161 38980 : _keys(keys)
162 1124 : {}
163 :
164 : // computes the hilbert index for a node
165 21146 : void operator() (const ConstNodeRange & range) const
166 : {
167 2232 : std::size_t pos = range.first_idx();
168 5053610 : for (const auto & node : range)
169 : {
170 457357 : libmesh_assert(node);
171 457357 : libmesh_assert_less (pos, _keys.size());
172 5032464 : _keys[pos++] = get_dofobject_key (*node, _bbox, _bboxinv);
173 : }
174 21146 : }
175 :
176 : // computes the hilbert index for an element
177 21146 : void operator() (const ConstElemRange & range) const
178 : {
179 2232 : std::size_t pos = range.first_idx();
180 6281382 : for (const auto & elem : range)
181 : {
182 570240 : libmesh_assert(elem);
183 570240 : libmesh_assert_less (pos, _keys.size());
184 6260236 : _keys[pos++] = get_dofobject_key (*elem, _bbox, _bboxinv);
185 : }
186 21146 : }
187 :
188 : private:
189 : const BoundingBox _bbox;
190 : const Point _bboxinv;
191 : std::vector<Parallel::DofObjectKey> & _keys;
192 : };
193 :
194 : }
195 : #endif // defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
196 :
197 :
198 : namespace libMesh
199 : {
200 :
201 : // ------------------------------------------------------------
202 : // MeshCommunication class members
203 : #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
204 19490 : void MeshCommunication::assign_global_indices (MeshBase & mesh) const
205 : {
206 1124 : LOG_SCOPE ("assign_global_indices()", "MeshCommunication");
207 :
208 : // This method determines partition-agnostic global indices
209 : // for nodes and elements.
210 :
211 : // Algorithm:
212 : // (1) compute the Hilbert key for each local node/element
213 : // (2) perform a parallel sort of the Hilbert key
214 : // (3) get the min/max value on each processor
215 : // (4) determine the position in the global ranking for
216 : // each local object
217 :
218 1124 : const Parallel::Communicator & communicator (mesh.comm());
219 :
220 : #ifdef DEBUG
221 : // This is going to be a mess if geometry is out of sync
222 562 : MeshTools::libmesh_assert_equal_points(mesh);
223 562 : MeshTools::libmesh_assert_equal_connectivity(mesh);
224 : #endif
225 :
226 : // Global bounding box. We choose the nodal bounding box for
227 : // backwards compatibility; the element bounding box may be looser
228 : // on curved elements.
229 : const BoundingBox bbox =
230 19490 : MeshTools::create_nodal_bounding_box (mesh);
231 :
232 18928 : const Point bboxinv = invert_bbox(bbox);
233 :
234 : //-------------------------------------------------------------
235 : // (1) compute Hilbert keys
236 : std::vector<Parallel::DofObjectKey>
237 1124 : node_keys, elem_keys;
238 :
239 : {
240 : // Nodes first
241 : {
242 20614 : ConstNodeRange nr (mesh.local_nodes_begin(),
243 77960 : mesh.local_nodes_end());
244 19490 : node_keys.resize (nr.size());
245 19490 : Threads::parallel_for (nr, ComputeHilbertKeys (bbox, bboxinv, node_keys));
246 :
247 : // // It's O(N^2) to check that these keys don't duplicate before the
248 : // // sort...
249 : //
250 : // MeshBase::const_node_iterator nodei = mesh.local_nodes_begin();
251 : // for (std::size_t i = 0; i != node_keys.size(); ++i, ++nodei)
252 : // {
253 : // MeshBase::const_node_iterator nodej = mesh.local_nodes_begin();
254 : // for (std::size_t j = 0; j != i; ++j, ++nodej)
255 : // {
256 : // if (node_keys[i] == node_keys[j])
257 : // {
258 : // CFixBitVec icoords[3], jcoords[3];
259 : // get_hilbert_coords(**nodej, bbox, bboxinv, jcoords);
260 : // libMesh::err << "node " << (*nodej)->id() << ", " << static_cast<Point &>(**nodej) << " has HilbertIndices " << node_keys[j] << std::endl;
261 : // get_hilbert_coords(**nodei, bbox, bboxinv, icoords);
262 : // libMesh::err << "node " << (*nodei)->id() << ", " << static_cast<Point &>(**nodei) << " has HilbertIndices " << node_keys[i] << std::endl;
263 : // libmesh_error_msg("Error: nodes with duplicate Hilbert keys!");
264 : // }
265 : // }
266 : // }
267 : }
268 :
269 : // Elements next
270 : {
271 20614 : ConstElemRange er (mesh.local_elements_begin(),
272 77960 : mesh.local_elements_end());
273 19490 : elem_keys.resize (er.size());
274 19490 : Threads::parallel_for (er, ComputeHilbertKeys (bbox, bboxinv, elem_keys));
275 :
276 : // // For elements, the keys can be (and in the case of TRI, are
277 : // // expected to be) duplicates, but only if the elements are at
278 : // // different levels
279 : // MeshBase::const_element_iterator elemi = mesh.local_elements_begin();
280 : // for (std::size_t i = 0; i != elem_keys.size(); ++i, ++elemi)
281 : // {
282 : // MeshBase::const_element_iterator elemj = mesh.local_elements_begin();
283 : // for (std::size_t j = 0; j != i; ++j, ++elemj)
284 : // {
285 : // if ((elem_keys[i] == elem_keys[j]) &&
286 : // ((*elemi)->level() == (*elemj)->level()))
287 : // {
288 : // libMesh::err << "level " << (*elemj)->level()
289 : // << " elem\n" << (**elemj)
290 : // << " vertex average " << (*elemj)->vertex_average()
291 : // << " has HilbertIndices " << elem_keys[j]
292 : // << " or " << get_dofobject_key((**elemj), bbox, bboxinv)
293 : // << std::endl;
294 : // libMesh::err << "level " << (*elemi)->level()
295 : // << " elem\n" << (**elemi)
296 : // << " vertex average " << (*elemi)->vertex_average()
297 : // << " has HilbertIndices " << elem_keys[i]
298 : // << " or " << get_dofobject_key((**elemi), bbox, bboxinv)
299 : // << std::endl;
300 : // libmesh_error_msg("Error: level " << (*elemi)->level() << " elements with duplicate Hilbert keys!");
301 : // }
302 : // }
303 : // }
304 : }
305 : } // done computing Hilbert keys
306 :
307 :
308 :
309 : //-------------------------------------------------------------
310 : // (2) parallel sort the Hilbert keys
311 : Parallel::Sort<Parallel::DofObjectKey> node_sorter (communicator,
312 20614 : node_keys);
313 19490 : node_sorter.sort(); /* done with node_keys */ //node_keys.clear();
314 :
315 : const std::vector<Parallel::DofObjectKey> & my_node_bin =
316 19490 : node_sorter.bin();
317 :
318 : Parallel::Sort<Parallel::DofObjectKey> elem_sorter (communicator,
319 20614 : elem_keys);
320 19490 : elem_sorter.sort(); /* done with elem_keys */ //elem_keys.clear();
321 :
322 : const std::vector<Parallel::DofObjectKey> & my_elem_bin =
323 19490 : elem_sorter.bin();
324 :
325 :
326 :
327 : //-------------------------------------------------------------
328 : // (3) get the max value on each processor
329 : std::vector<Parallel::DofObjectKey>
330 20052 : node_upper_bounds(communicator.size()),
331 20052 : elem_upper_bounds(communicator.size());
332 :
333 : { // limit scope of temporaries
334 20052 : std::vector<Parallel::DofObjectKey> recvbuf(2*communicator.size());
335 : std::vector<unsigned short int> /* do not use a vector of bools here since it is not always so! */
336 20052 : empty_nodes (communicator.size()),
337 20052 : empty_elem (communicator.size());
338 20052 : std::vector<Parallel::DofObjectKey> my_max(2);
339 :
340 20052 : communicator.allgather (static_cast<unsigned short int>(my_node_bin.empty()), empty_nodes);
341 19490 : communicator.allgather (static_cast<unsigned short int>(my_elem_bin.empty()), empty_elem);
342 :
343 19490 : if (!my_node_bin.empty()) my_max[0] = my_node_bin.back();
344 19490 : if (!my_elem_bin.empty()) my_max[1] = my_elem_bin.back();
345 :
346 19490 : communicator.allgather (my_max, /* identical_buffer_sizes = */ true);
347 :
348 : // Be careful here. The *_upper_bounds will be used to find the processor
349 : // a given object belongs to. So, if a processor contains no objects (possible!)
350 : // then copy the bound from the lower processor id.
351 211908 : for (auto p : make_range(communicator.size()))
352 : {
353 193542 : node_upper_bounds[p] = my_max[2*p+0];
354 193542 : elem_upper_bounds[p] = my_max[2*p+1];
355 :
356 192418 : if (p > 0) // default hilbert index value is the OK upper bound for processor 0.
357 : {
358 173490 : if (empty_nodes[p]) node_upper_bounds[p] = node_upper_bounds[p-1];
359 173490 : if (empty_elem[p]) elem_upper_bounds[p] = elem_upper_bounds[p-1];
360 : }
361 : }
362 : }
363 :
364 :
365 :
366 : //-------------------------------------------------------------
367 : // (4) determine the position in the global ranking for
368 : // each local object
369 : {
370 : //----------------------------------------------
371 : // Nodes first -- all nodes, not just local ones
372 : {
373 : // Request sets to send to each processor
374 : std::map<processor_id_type, std::vector<Parallel::DofObjectKey>>
375 1124 : requested_ids;
376 : // Results to gather from each processor - kept in a map so we
377 : // do only one loop over nodes after all receives are done.
378 : std::map<dof_id_type, std::vector<dof_id_type>>
379 1124 : filled_request;
380 :
381 : // build up list of requests
382 14510350 : for (const auto & node : mesh.node_ptr_range())
383 : {
384 914714 : libmesh_assert(node);
385 : const Parallel::DofObjectKey hi =
386 8150680 : get_dofobject_key (*node, bbox, bboxinv);
387 : const processor_id_type pid =
388 : cast_int<processor_id_type>
389 8150680 : (std::distance (node_upper_bounds.begin(),
390 : std::lower_bound(node_upper_bounds.begin(),
391 : node_upper_bounds.end(),
392 8150680 : hi)));
393 :
394 914714 : libmesh_assert_less (pid, communicator.size());
395 :
396 8150680 : requested_ids[pid].push_back(hi);
397 18366 : }
398 :
399 : // The number of objects in my_node_bin on each processor
400 20052 : std::vector<dof_id_type> node_bin_sizes(communicator.size());
401 20052 : communicator.allgather (static_cast<dof_id_type>(my_node_bin.size()), node_bin_sizes);
402 :
403 : // The offset of my first global index
404 562 : dof_id_type my_offset = 0;
405 105954 : for (auto pid : make_range(communicator.rank()))
406 86745 : my_offset += node_bin_sizes[pid];
407 :
408 : auto gather_functor =
409 86619 : [
410 : #ifndef NDEBUG
411 : & node_upper_bounds,
412 : & communicator,
413 : #endif
414 : & my_node_bin,
415 : my_offset
416 : ]
417 : (processor_id_type,
418 : const std::vector<Parallel::DofObjectKey> & keys,
419 14555926 : std::vector<dof_id_type> & global_ids)
420 : {
421 : // Fill the requests
422 2248 : const std::size_t keys_size = keys.size();
423 88867 : global_ids.reserve(keys_size);
424 8239547 : for (std::size_t idx=0; idx != keys_size; idx++)
425 : {
426 1829428 : const Parallel::DofObjectKey & hi = keys[idx];
427 914714 : libmesh_assert_less_equal (hi, node_upper_bounds[communicator.rank()]);
428 :
429 : // find the requested index in my node bin
430 : std::vector<Parallel::DofObjectKey>::const_iterator pos =
431 8150680 : std::lower_bound (my_node_bin.begin(), my_node_bin.end(), hi);
432 914714 : libmesh_assert (pos != my_node_bin.end());
433 914714 : libmesh_assert_equal_to (*pos, hi);
434 :
435 : // Finally, assign the global index based off the position of the index
436 : // in my array, properly offset.
437 9980108 : global_ids.push_back(cast_int<dof_id_type>(std::distance(my_node_bin.begin(), pos) + my_offset));
438 : }
439 107795 : };
440 :
441 : auto action_functor =
442 86619 : [&filled_request]
443 : (processor_id_type pid,
444 : const std::vector<Parallel::DofObjectKey> &,
445 87743 : const std::vector<dof_id_type> & global_ids)
446 : {
447 88867 : filled_request[pid] = global_ids;
448 107233 : };
449 :
450 : // Trade requests with other processors
451 562 : const dof_id_type * ex = nullptr;
452 : Parallel::pull_parallel_vector_data
453 19490 : (communicator, requested_ids, gather_functor, action_functor, ex);
454 :
455 : // We now have all the filled requests, so we can loop through our
456 : // nodes once and assign the global index to each one.
457 : {
458 : std::map<dof_id_type, std::vector<dof_id_type>::const_iterator>
459 1124 : next_obj_on_proc;
460 108357 : for (auto & p : filled_request)
461 88867 : next_obj_on_proc[p.first] = p.second.begin();
462 :
463 8212584 : for (auto & node : mesh.node_ptr_range())
464 : {
465 914714 : libmesh_assert(node);
466 : const Parallel::DofObjectKey hi =
467 8150680 : get_dofobject_key (*node, bbox, bboxinv);
468 : const processor_id_type pid =
469 : cast_int<processor_id_type>
470 8150680 : (std::distance (node_upper_bounds.begin(),
471 : std::lower_bound(node_upper_bounds.begin(),
472 : node_upper_bounds.end(),
473 1852914 : hi)));
474 :
475 914714 : libmesh_assert_less (pid, communicator.size());
476 914714 : libmesh_assert (next_obj_on_proc[pid] != filled_request[pid].end());
477 :
478 8150680 : const dof_id_type global_index = *next_obj_on_proc[pid];
479 914714 : libmesh_assert_less (global_index, mesh.n_nodes());
480 8150680 : node->set_id() = global_index;
481 :
482 8150680 : ++next_obj_on_proc[pid];
483 18366 : }
484 : }
485 : }
486 :
487 : //---------------------------------------------------
488 : // elements next -- all elements, not just local ones
489 : {
490 : // Request sets to send to each processor
491 : std::map<processor_id_type, std::vector<Parallel::DofObjectKey>>
492 1124 : requested_ids;
493 : // Results to gather from each processor - kept in a map so we
494 : // do only one loop over elements after all receives are done.
495 : std::map<dof_id_type, std::vector<dof_id_type>>
496 1124 : filled_request;
497 :
498 16104832 : for (const auto & elem : mesh.element_ptr_range())
499 : {
500 1140480 : libmesh_assert(elem);
501 : const Parallel::DofObjectKey hi =
502 9173687 : get_dofobject_key (*elem, bbox, bboxinv);
503 : const processor_id_type pid =
504 : cast_int<processor_id_type>
505 9173687 : (std::distance (elem_upper_bounds.begin(),
506 : std::lower_bound(elem_upper_bounds.begin(),
507 : elem_upper_bounds.end(),
508 9173687 : hi)));
509 :
510 1140480 : libmesh_assert_less (pid, communicator.size());
511 :
512 9173687 : requested_ids[pid].push_back(hi);
513 18366 : }
514 :
515 : // The number of objects in my_elem_bin on each processor
516 20052 : std::vector<dof_id_type> elem_bin_sizes(communicator.size());
517 20052 : communicator.allgather (static_cast<dof_id_type>(my_elem_bin.size()), elem_bin_sizes);
518 :
519 : // The offset of my first global index
520 562 : dof_id_type my_offset = 0;
521 105954 : for (auto pid : make_range(communicator.rank()))
522 86745 : my_offset += elem_bin_sizes[pid];
523 :
524 : auto gather_functor =
525 59247 : [
526 : #ifndef NDEBUG
527 : & elem_upper_bounds,
528 : & communicator,
529 : #endif
530 : & my_elem_bin,
531 : my_offset
532 : ]
533 : (processor_id_type,
534 : const std::vector<Parallel::DofObjectKey> & keys,
535 17159295 : std::vector<dof_id_type> & global_ids)
536 : {
537 : // Fill the requests
538 2248 : const std::size_t keys_size = keys.size();
539 61495 : global_ids.reserve(keys_size);
540 9235182 : for (std::size_t idx=0; idx != keys_size; idx++)
541 : {
542 2280960 : const Parallel::DofObjectKey & hi = keys[idx];
543 1140480 : libmesh_assert_less_equal (hi, elem_upper_bounds[communicator.rank()]);
544 :
545 : // find the requested index in my elem bin
546 : std::vector<Parallel::DofObjectKey>::const_iterator pos =
547 9173687 : std::lower_bound (my_elem_bin.begin(), my_elem_bin.end(), hi);
548 1140480 : libmesh_assert (pos != my_elem_bin.end());
549 1140480 : libmesh_assert_equal_to (*pos, hi);
550 :
551 : // Finally, assign the global index based off the position of the index
552 : // in my array, properly offset.
553 11454647 : global_ids.push_back (cast_int<dof_id_type>(std::distance(my_elem_bin.begin(), pos) + my_offset));
554 : }
555 80423 : };
556 :
557 : auto action_functor =
558 59247 : [&filled_request]
559 : (processor_id_type pid,
560 : const std::vector<Parallel::DofObjectKey> &,
561 60371 : const std::vector<dof_id_type> & global_ids)
562 : {
563 61495 : filled_request[pid] = global_ids;
564 79861 : };
565 :
566 : // Trade requests with other processors
567 562 : const dof_id_type * ex = nullptr;
568 : Parallel::pull_parallel_vector_data
569 19490 : (communicator, requested_ids, gather_functor, action_functor, ex);
570 :
571 : // We now have all the filled requests, so we can loop through our
572 : // elements once and assign the global index to each one.
573 : {
574 : std::vector<std::vector<dof_id_type>::const_iterator>
575 20052 : next_obj_on_proc; next_obj_on_proc.reserve(communicator.size());
576 211908 : for (auto pid : make_range(communicator.size()))
577 383712 : next_obj_on_proc.push_back(filled_request[pid].begin());
578 :
579 16104832 : for (auto & elem : mesh.element_ptr_range())
580 : {
581 1140480 : libmesh_assert(elem);
582 : const Parallel::DofObjectKey hi =
583 9173687 : get_dofobject_key (*elem, bbox, bboxinv);
584 : const processor_id_type pid =
585 : cast_int<processor_id_type>
586 9173687 : (std::distance (elem_upper_bounds.begin(),
587 : std::lower_bound(elem_upper_bounds.begin(),
588 : elem_upper_bounds.end(),
589 1140480 : hi)));
590 :
591 1140480 : libmesh_assert_less (pid, communicator.size());
592 1140480 : libmesh_assert (next_obj_on_proc[pid] != filled_request[pid].end());
593 :
594 9173687 : const dof_id_type global_index = *next_obj_on_proc[pid];
595 1140480 : libmesh_assert_less (global_index, mesh.n_elem());
596 9173687 : elem->set_id() = global_index;
597 :
598 1140480 : ++next_obj_on_proc[pid];
599 18366 : }
600 : }
601 : }
602 : }
603 37856 : }
604 : #else // LIBMESH_HAVE_LIBHILBERT, LIBMESH_HAVE_MPI
605 : void MeshCommunication::assign_global_indices (MeshBase &) const
606 : {
607 : }
608 : #endif // LIBMESH_HAVE_LIBHILBERT, LIBMESH_HAVE_MPI
609 :
610 :
611 : #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
612 0 : void MeshCommunication::check_for_duplicate_global_indices (MeshBase & mesh) const
613 : {
614 0 : LOG_SCOPE ("check_for_duplicate_global_indices()", "MeshCommunication");
615 :
616 : // Global bounding box. We choose the nodal bounding box for
617 : // backwards compatibility; the element bounding box may be looser
618 : // on curved elements.
619 : const BoundingBox bbox =
620 0 : MeshTools::create_nodal_bounding_box (mesh);
621 :
622 0 : const Point bboxinv = invert_bbox(bbox);
623 :
624 : std::vector<Parallel::DofObjectKey>
625 0 : node_keys, elem_keys;
626 :
627 : {
628 : // Nodes first
629 : {
630 0 : ConstNodeRange nr (mesh.local_nodes_begin(),
631 0 : mesh.local_nodes_end());
632 0 : node_keys.resize (nr.size());
633 0 : Threads::parallel_for (nr, ComputeHilbertKeys (bbox, bboxinv, node_keys));
634 :
635 : // It's O(N^2) to check that these keys don't duplicate before the
636 : // sort...
637 0 : MeshBase::const_node_iterator nodei = mesh.local_nodes_begin();
638 0 : for (std::size_t i = 0; i != node_keys.size(); ++i, ++nodei)
639 : {
640 0 : MeshBase::const_node_iterator nodej = mesh.local_nodes_begin();
641 0 : for (std::size_t j = 0; j != i; ++j, ++nodej)
642 : {
643 0 : if (node_keys[i] == node_keys[j])
644 : {
645 0 : CFixBitVec icoords[3], jcoords[3];
646 0 : get_hilbert_coords(**nodej, bbox, bboxinv, jcoords);
647 : libMesh::err <<
648 0 : "node " << (*nodej)->id() << ", " <<
649 0 : *(const Point *)(*nodej) << " has HilbertIndices " <<
650 0 : node_keys[j] << std::endl;
651 0 : get_hilbert_coords(**nodei, bbox, bboxinv, icoords);
652 : libMesh::err <<
653 0 : "node " << (*nodei)->id() << ", " <<
654 0 : *(const Point *)(*nodei) << " has HilbertIndices " <<
655 0 : node_keys[i] << std::endl;
656 0 : libmesh_error_msg("Error: nodes with duplicate Hilbert keys!");
657 : }
658 : }
659 : }
660 : }
661 :
662 : // Elements next
663 : {
664 0 : ConstElemRange er (mesh.local_elements_begin(),
665 0 : mesh.local_elements_end());
666 0 : elem_keys.resize (er.size());
667 0 : Threads::parallel_for (er, ComputeHilbertKeys (bbox, bboxinv, elem_keys));
668 :
669 : // For elements, the keys can be (and in the case of TRI, are
670 : // expected to be) duplicates, but only if the elements are at
671 : // different levels
672 0 : MeshBase::const_element_iterator elemi = mesh.local_elements_begin();
673 0 : for (std::size_t i = 0; i != elem_keys.size(); ++i, ++elemi)
674 : {
675 0 : MeshBase::const_element_iterator elemj = mesh.local_elements_begin();
676 0 : for (std::size_t j = 0; j != i; ++j, ++elemj)
677 : {
678 0 : if ((elem_keys[i] == elem_keys[j]) &&
679 0 : ((*elemi)->level() == (*elemj)->level()))
680 : {
681 : libMesh::err <<
682 0 : "level " << (*elemj)->level() << " elem\n" <<
683 0 : (**elemj) << " vertex average " <<
684 0 : (*elemj)->vertex_average() << " has HilbertIndices " <<
685 0 : elem_keys[j] << " or " <<
686 0 : get_dofobject_key((**elemj), bbox, bboxinv) <<
687 0 : std::endl;
688 : libMesh::err <<
689 0 : "level " << (*elemi)->level() << " elem\n" <<
690 0 : (**elemi) << " vertex average " <<
691 0 : (*elemi)->vertex_average() << " has HilbertIndices " <<
692 0 : elem_keys[i] << " or " <<
693 0 : get_dofobject_key((**elemi), bbox, bboxinv) <<
694 0 : std::endl;
695 0 : libmesh_error_msg("Error: level " << (*elemi)->level() << " elements with duplicate Hilbert keys!");
696 : }
697 : }
698 : }
699 : }
700 : } // done checking Hilbert keys
701 0 : }
702 : #else // LIBMESH_HAVE_LIBHILBERT, LIBMESH_HAVE_MPI
703 : void MeshCommunication::check_for_duplicate_global_indices (MeshBase &) const
704 : {
705 : }
706 : #endif // LIBMESH_HAVE_LIBHILBERT, LIBMESH_HAVE_MPI
707 :
708 : #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
709 : template <typename ForwardIterator>
710 460230 : void MeshCommunication::find_local_indices (const BoundingBox & bbox,
711 : const ForwardIterator & begin,
712 : const ForwardIterator & end,
713 : std::unordered_map<dof_id_type, dof_id_type> & index_map) const
714 : {
715 1376 : LOG_SCOPE ("find_local_indices()", "MeshCommunication");
716 :
717 : // This method determines id-agnostic local indices
718 : // for nodes and elements by sorting Hilbert keys.
719 :
720 688 : index_map.clear();
721 :
722 459542 : const Point bboxinv = invert_bbox(bbox);
723 :
724 : //-------------------------------------------------------------
725 : // (1) compute Hilbert keys
726 : // These aren't trivial to compute, and we will need them again.
727 : // But the binsort will sort the input vector, trashing the order
728 : // that we'd like to rely on. So, two vectors...
729 1376 : std::map<Parallel::DofObjectKey, dof_id_type> hilbert_keys;
730 : {
731 1376 : LOG_SCOPE("local_hilbert_indices", "MeshCommunication");
732 5449024 : for (ForwardIterator it=begin; it!=end; ++it)
733 : {
734 4988794 : const Parallel::DofObjectKey hi(get_dofobject_key ((**it), bbox, bboxinv));
735 4988794 : hilbert_keys.emplace(hi, (*it)->id());
736 : }
737 : }
738 :
739 : {
740 688 : dof_id_type cnt = 0;
741 5449024 : for (auto key_val : hilbert_keys)
742 4988794 : index_map[key_val.second] = cnt++;
743 : }
744 460230 : }
745 :
746 :
747 : template <typename ForwardIterator>
748 659632 : void MeshCommunication::find_global_indices (const Parallel::Communicator & communicator,
749 : const BoundingBox & bbox,
750 : const ForwardIterator & begin,
751 : const ForwardIterator & end,
752 : std::vector<dof_id_type> & index_map) const
753 : {
754 28216 : LOG_SCOPE ("find_global_indices()", "MeshCommunication");
755 :
756 : // This method determines partition-agnostic global indices
757 : // for nodes and elements.
758 :
759 : // Algorithm:
760 : // (1) compute the Hilbert key for each local node/element
761 : // (2) perform a parallel sort of the Hilbert key
762 : // (3) get the min/max value on each processor
763 : // (4) determine the position in the global ranking for
764 : // each local object
765 14108 : index_map.clear();
766 659632 : std::size_t n_objects = std::distance (begin, end);
767 659632 : index_map.reserve(n_objects);
768 :
769 645524 : const Point bboxinv = invert_bbox(bbox);
770 :
771 : //-------------------------------------------------------------
772 : // (1) compute Hilbert keys
773 : // These aren't trivial to compute, and we will need them again.
774 : // But the binsort will sort the input vector, trashing the order
775 : // that we'd like to rely on. So, two vectors...
776 : std::vector<Parallel::DofObjectKey>
777 28216 : sorted_hilbert_keys,
778 28216 : hilbert_keys;
779 659632 : sorted_hilbert_keys.reserve(n_objects);
780 659632 : hilbert_keys.reserve(n_objects);
781 : {
782 28216 : LOG_SCOPE("compute_hilbert_indices()", "MeshCommunication");
783 28216 : const processor_id_type my_pid = communicator.rank();
784 77877064 : for (ForwardIterator it=begin; it!=end; ++it)
785 : {
786 40031059 : const Parallel::DofObjectKey hi(get_dofobject_key (**it, bbox, bboxinv));
787 40031059 : hilbert_keys.push_back(hi);
788 :
789 40031059 : const processor_id_type pid = (*it)->processor_id();
790 :
791 40031059 : if (pid == my_pid)
792 5036537 : sorted_hilbert_keys.push_back(hi);
793 :
794 : // someone needs to take care of unpartitioned objects!
795 41453403 : if ((my_pid == 0) &&
796 38608717 : (pid == DofObject::invalid_processor_id))
797 1752532 : sorted_hilbert_keys.push_back(hi);
798 : }
799 : }
800 :
801 : //-------------------------------------------------------------
802 : // (2) parallel sort the Hilbert keys
803 28216 : START_LOG ("parallel_sort()", "MeshCommunication");
804 687848 : Parallel::Sort<Parallel::DofObjectKey> sorter (communicator,
805 : sorted_hilbert_keys);
806 659632 : sorter.sort();
807 28216 : STOP_LOG ("parallel_sort()", "MeshCommunication");
808 659632 : const std::vector<Parallel::DofObjectKey> & my_bin = sorter.bin();
809 :
810 : // The number of objects in my_bin on each processor
811 673740 : std::vector<unsigned int> bin_sizes(communicator.size());
812 673740 : communicator.allgather (static_cast<unsigned int>(my_bin.size()), bin_sizes);
813 :
814 : // The offset of my first global index
815 14108 : unsigned int my_offset = 0;
816 3731530 : for (auto pid : make_range(communicator.rank()))
817 3078952 : my_offset += bin_sizes[pid];
818 :
819 : //-------------------------------------------------------------
820 : // (3) get the max value on each processor
821 : std::vector<Parallel::DofObjectKey>
822 673740 : upper_bounds(1);
823 :
824 659632 : if (!my_bin.empty())
825 28212 : upper_bounds[0] = my_bin.back();
826 :
827 659632 : communicator.allgather (upper_bounds, /* identical_buffer_sizes = */ true);
828 :
829 : // Be careful here. The *_upper_bounds will be used to find the processor
830 : // a given object belongs to. So, if a processor contains no objects (possible!)
831 : // then copy the bound from the lower processor id.
832 6803428 : for (auto p : IntRange<processor_id_type>(1, communicator.size()))
833 6157904 : if (!bin_sizes[p]) upper_bounds[p] = upper_bounds[p-1];
834 :
835 :
836 : //-------------------------------------------------------------
837 : // (4) determine the position in the global ranking for
838 : // each local object
839 : {
840 : //----------------------------------------------
841 : // all objects, not just local ones
842 :
843 : // Request sets to send to each processor
844 : std::map<processor_id_type, std::vector<Parallel::DofObjectKey>>
845 28216 : requested_ids;
846 : // Results to gather from each processor
847 : std::map<processor_id_type, std::vector<dof_id_type>>
848 28216 : filled_request;
849 :
850 : // build up list of requests
851 14108 : std::vector<Parallel::DofObjectKey>::const_iterator hi =
852 14108 : hilbert_keys.begin();
853 :
854 40690804 : for (ForwardIterator it = begin; it != end; ++it)
855 : {
856 1422342 : libmesh_assert (hi != hilbert_keys.end());
857 :
858 : std::vector<Parallel::DofObjectKey>::iterator lb =
859 40031059 : std::lower_bound(upper_bounds.begin(), upper_bounds.end(),
860 1422342 : *hi);
861 :
862 40031059 : const processor_id_type pid =
863 : cast_int<processor_id_type>
864 1422342 : (std::distance (upper_bounds.begin(), lb));
865 :
866 1422342 : libmesh_assert_less (pid, communicator.size());
867 :
868 40031059 : requested_ids[pid].push_back(*hi);
869 :
870 1422342 : ++hi;
871 : // go ahead and put pid in index_map, that way we
872 : // don't have to repeat the std::lower_bound()
873 40031059 : index_map.push_back(pid);
874 : }
875 :
876 4983086 : auto gather_functor =
877 40811124 : [
878 : #ifndef NDEBUG
879 : & upper_bounds,
880 : & communicator,
881 : #endif
882 : & bbox,
883 : & my_bin,
884 : my_offset
885 : ]
886 : (processor_id_type, const std::vector<Parallel::DofObjectKey> & keys,
887 56422 : std::vector<dof_id_type> & global_ids)
888 : {
889 : // Ignore unused lambda capture warnings in devel mode
890 28211 : libmesh_ignore(bbox);
891 :
892 : // Fill the requests
893 56422 : const std::size_t keys_size = keys.size();
894 28211 : global_ids.clear();
895 3681173 : global_ids.reserve(keys_size);
896 43712232 : for (std::size_t idx=0; idx != keys_size; idx++)
897 : {
898 2844686 : const Parallel::DofObjectKey & hilbert_indices = keys[idx];
899 1422342 : libmesh_assert_less_equal (hilbert_indices, upper_bounds[communicator.rank()]);
900 :
901 : // find the requested index in my node bin
902 : std::vector<Parallel::DofObjectKey>::const_iterator pos =
903 40031059 : std::lower_bound (my_bin.begin(), my_bin.end(), hilbert_indices);
904 1422342 : libmesh_assert (pos != my_bin.end());
905 : #ifdef DEBUG
906 : // If we could not find the requested Hilbert index in
907 : // my_bin, something went terribly wrong, possibly the
908 : // Mesh was displaced differently on different processors,
909 : // and therefore the Hilbert indices don't agree.
910 1422342 : if (*pos != hilbert_indices)
911 : {
912 : // The input will be hilbert_indices. We convert it
913 : // to BitVecType using the operator= provided by the
914 : // BitVecType class. BitVecType is a CBigBitVec!
915 0 : Hilbert::BitVecType input;
916 : #ifdef LIBMESH_ENABLE_UNIQUE_ID
917 0 : input = hilbert_indices.first;
918 : #else
919 : input = hilbert_indices;
920 : #endif
921 :
922 : // Get output in a vector of CBigBitVec
923 0 : std::vector<CBigBitVec> output(3);
924 :
925 : // Call the indexToCoords function
926 0 : Hilbert::indexToCoords(output.data(), 8*sizeof(Hilbert::inttype), 3, input);
927 :
928 : // The entries in the output racks are integers in the
929 : // range [0, Hilbert::inttype::max] which can be
930 : // converted to floating point values in [0,1] and
931 : // finally to actual values using the bounding box.
932 0 : const Real max_int_as_real =
933 : static_cast<Real>(std::numeric_limits<Hilbert::inttype>::max());
934 :
935 : // Get the points in [0,1]^3. The zeroth rack of each entry in
936 : // 'output' maps to the normalized x, y, and z locations,
937 : // respectively.
938 0 : Point p_hat(static_cast<Real>(output[0].racks()[0]) / max_int_as_real,
939 0 : static_cast<Real>(output[1].racks()[0]) / max_int_as_real,
940 0 : static_cast<Real>(output[2].racks()[0]) / max_int_as_real);
941 :
942 : // Convert the points from [0,1]^3 to their actual (x,y,z) locations
943 : Real
944 0 : xmin = bbox.first(0),
945 0 : xmax = bbox.second(0),
946 0 : ymin = bbox.first(1),
947 0 : ymax = bbox.second(1),
948 0 : zmin = bbox.first(2),
949 0 : zmax = bbox.second(2);
950 :
951 : // Convert the points from [0,1]^3 to their actual (x,y,z) locations
952 0 : Point p(xmin + (xmax-xmin)*p_hat(0),
953 0 : ymin + (ymax-ymin)*p_hat(1),
954 0 : zmin + (zmax-zmin)*p_hat(2));
955 :
956 0 : libmesh_error_msg("Could not find hilbert indices: "
957 : << hilbert_indices
958 : << " corresponding to point " << p);
959 : }
960 : #endif
961 :
962 : // Finally, assign the global index based off the position of the index
963 : // in my array, properly offset.
964 42875747 : global_ids.push_back (cast_int<dof_id_type>(std::distance(my_bin.begin(), pos) + my_offset));
965 : }
966 : };
967 :
968 716054 : auto action_functor =
969 7249502 : [&filled_request]
970 : (processor_id_type pid,
971 : const std::vector<Parallel::DofObjectKey> &,
972 28211 : const std::vector<dof_id_type> & global_ids)
973 : {
974 3681173 : filled_request[pid] = global_ids;
975 : };
976 :
977 14108 : const dof_id_type * ex = nullptr;
978 : Parallel::pull_parallel_vector_data
979 659632 : (communicator, requested_ids, gather_functor, action_functor, ex);
980 :
981 : // We now have all the filled requests, so we can loop through our
982 : // nodes once and assign the global index to each one.
983 : {
984 : std::vector<std::vector<dof_id_type>::const_iterator>
985 673740 : next_obj_on_proc; next_obj_on_proc.reserve(communicator.size());
986 7463060 : for (auto pid : make_range(communicator.size()))
987 13578640 : next_obj_on_proc.push_back(filled_request[pid].begin());
988 :
989 14108 : unsigned int cnt=0;
990 42113035 : for (ForwardIterator it = begin; it != end; ++it, cnt++)
991 : {
992 1422342 : const processor_id_type pid = cast_int<processor_id_type>
993 40031059 : (index_map[cnt]);
994 :
995 1422342 : libmesh_assert_less (pid, communicator.size());
996 1422342 : libmesh_assert (next_obj_on_proc[pid] != filled_request[pid].end());
997 :
998 40031059 : const dof_id_type global_index = *next_obj_on_proc[pid];
999 40031059 : index_map[cnt] = global_index;
1000 :
1001 1422342 : ++next_obj_on_proc[pid];
1002 : }
1003 : }
1004 : }
1005 :
1006 14108 : libmesh_assert_equal_to(index_map.size(), n_objects);
1007 1291048 : }
1008 : #else // LIBMESH_HAVE_LIBHILBERT, LIBMESH_HAVE_MPI
1009 : template <typename ForwardIterator>
1010 : void MeshCommunication::find_local_indices (const libMesh::BoundingBox &,
1011 : const ForwardIterator & begin,
1012 : const ForwardIterator & end,
1013 : std::unordered_map<dof_id_type, dof_id_type> & index_map) const
1014 : {
1015 : index_map.clear();
1016 : dof_id_type index = 0;
1017 : for (ForwardIterator it=begin; it!=end; ++it)
1018 : index_map[(*it)->id()] = (index++);
1019 : }
1020 :
1021 : template <typename ForwardIterator>
1022 : void MeshCommunication::find_global_indices (const Parallel::Communicator &,
1023 : const libMesh::BoundingBox &,
1024 : const ForwardIterator & begin,
1025 : const ForwardIterator & end,
1026 : std::vector<dof_id_type> & index_map) const
1027 : {
1028 : index_map.clear();
1029 : index_map.reserve(std::distance (begin, end));
1030 : unsigned int index = 0;
1031 : for (ForwardIterator it=begin; it!=end; ++it)
1032 : index_map.push_back(index++);
1033 : }
1034 : #endif // LIBMESH_HAVE_LIBHILBERT, LIBMESH_HAVE_MPI
1035 :
1036 :
1037 :
1038 : //------------------------------------------------------------------
1039 : template LIBMESH_EXPORT void MeshCommunication::find_global_indices<MeshBase::const_node_iterator> (const Parallel::Communicator &,
1040 : const libMesh::BoundingBox &,
1041 : const MeshBase::const_node_iterator &,
1042 : const MeshBase::const_node_iterator &,
1043 : std::vector<dof_id_type> &) const;
1044 :
1045 : template LIBMESH_EXPORT void MeshCommunication::find_global_indices<MeshBase::const_element_iterator> (const Parallel::Communicator &,
1046 : const libMesh::BoundingBox &,
1047 : const MeshBase::const_element_iterator &,
1048 : const MeshBase::const_element_iterator &,
1049 : std::vector<dof_id_type> &) const;
1050 : template LIBMESH_EXPORT void MeshCommunication::find_global_indices<MeshBase::node_iterator> (const Parallel::Communicator &,
1051 : const libMesh::BoundingBox &,
1052 : const MeshBase::node_iterator &,
1053 : const MeshBase::node_iterator &,
1054 : std::vector<dof_id_type> &) const;
1055 :
1056 : template LIBMESH_EXPORT void MeshCommunication::find_global_indices<MeshBase::element_iterator> (const Parallel::Communicator &,
1057 : const libMesh::BoundingBox &,
1058 : const MeshBase::element_iterator &,
1059 : const MeshBase::element_iterator &,
1060 : std::vector<dof_id_type> &) const;
1061 : template LIBMESH_EXPORT void MeshCommunication::find_local_indices<MeshBase::const_element_iterator> (const libMesh::BoundingBox &,
1062 : const MeshBase::const_element_iterator &,
1063 : const MeshBase::const_element_iterator &,
1064 : std::unordered_map<dof_id_type, dof_id_type> &) const;
1065 :
1066 : } // namespace libMesh
|