Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 : // Local includes
20 : #include "libmesh/parallel_sort.h"
21 :
22 : // libMesh includes
23 : #include "libmesh/libmesh_common.h"
24 : #include "libmesh/parallel_bin_sorter.h"
25 : #include "libmesh/parallel_hilbert.h"
26 :
27 : // TIMPI includes
28 : #include "timpi/parallel_implementation.h"
29 : #include "timpi/parallel_sync.h"
30 :
31 : // C++ includes
32 : #include <algorithm>
33 : #include <iostream>
34 :
35 :
36 : namespace libMesh
37 : {
38 :
39 :
40 : namespace Parallel {
41 :
42 : // The Constructor sorts the local data using
43 : // std::sort(). Therefore, the construction of
44 : // a Parallel::Sort object takes O(n log n) time,
45 : // where n is the length of _data.
46 : template <typename KeyType, typename IdxType>
47 698683 : Sort<KeyType,IdxType>::Sort(const Parallel::Communicator & comm_in,
48 : std::vector<KeyType> & d) :
49 : ParallelObject(comm_in),
50 698683 : _n_procs(cast_int<processor_id_type>(comm_in.size())),
51 698683 : _proc_id(cast_int<processor_id_type>(comm_in.rank())),
52 668215 : _bin_is_sorted(false),
53 729151 : _data(d)
54 : {
55 698683 : std::sort(_data.begin(), _data.end());
56 :
57 : // Allocate storage
58 698683 : _local_bin_sizes.resize(_n_procs);
59 698683 : }
60 :
61 :
62 :
63 : template <typename KeyType, typename IdxType>
64 698683 : void Sort<KeyType,IdxType>::sort()
65 : {
66 : // Find the global data size. The sorting
67 : // algorithms assume they have a range to
68 : // work with, so catch the degenerate cases here
69 698683 : IdxType global_data_size = cast_int<IdxType>(_data.size());
70 :
71 698683 : this->comm().sum (global_data_size);
72 :
73 698683 : if (global_data_size < 2)
74 : {
75 : // the entire global range is either empty
76 : // or contains only one element
77 140 : _my_bin = _data;
78 :
79 144 : this->comm().allgather (static_cast<IdxType>(_my_bin.size()),
80 140 : _local_bin_sizes);
81 : }
82 : else
83 : {
84 713773 : if (this->n_processors() > 1)
85 : {
86 697941 : this->binsort();
87 697941 : this->communicate_bins();
88 : }
89 : else
90 602 : _my_bin = _data;
91 :
92 698543 : this->sort_local_bin();
93 : }
94 :
95 : // Set sorted flag to true
96 698683 : _bin_is_sorted = true;
97 698683 : }
98 :
99 :
100 :
101 : template <typename KeyType, typename IdxType>
102 69 : void Sort<KeyType,IdxType>::binsort()
103 : {
104 : // Find the global min and max from all the
105 : // processors.
106 71 : std::vector<KeyType> global_min_max(2);
107 :
108 : // Insert the local min and max for this processor
109 71 : global_min_max[0] = -_data.front();
110 69 : global_min_max[1] = _data.back();
111 :
112 : // Communicate to determine the global
113 : // min and max for all processors.
114 69 : this->comm().max(global_min_max);
115 :
116 : // Multiply the min by -1 to obtain the true min
117 69 : global_min_max[0] *= -1;
118 :
119 : // Bin-Sort based on the global min and max
120 71 : Parallel::BinSorter<KeyType> bs(this->comm(), _data);
121 69 : bs.binsort(_n_procs, global_min_max[1], global_min_max[0]);
122 :
123 : // Now save the local bin sizes in a vector so
124 : // we don't have to keep around the BinSorter.
125 760 : for (processor_id_type i=0; i<_n_procs; ++i)
126 695 : _local_bin_sizes[i] = bs.sizeof_bin(i);
127 69 : }
128 :
129 :
130 :
131 : #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
132 : // Full specialization for HilbertIndices, there is a fair amount of
133 : // code duplication here that could potentially be consolidated with the
134 : // above method
135 : template <>
136 697872 : void Sort<Parallel::DofObjectKey,unsigned int>::binsort()
137 : {
138 : // Find the global min and max from all the
139 : // processors. Do this using MPI_Allreduce.
140 : Parallel::DofObjectKey
141 15228 : local_min, local_max,
142 15228 : global_min, global_max;
143 :
144 713100 : if (_data.empty())
145 : {
146 : #ifdef LIBMESH_ENABLE_UNIQUE_ID
147 352425 : local_min.first.rack0 = local_min.first.rack1 = local_min.first.rack2 = static_cast<Hilbert::inttype>(-1);
148 352425 : local_min.second = std::numeric_limits<unique_id_type>::max();
149 349449 : local_max.first.rack0 = local_max.first.rack1 = local_max.first.rack2 = 0;
150 2976 : local_max.second = 0;
151 : #else
152 : local_min.rack0 = local_min.rack1 = local_min.rack2 = static_cast<Hilbert::inttype>(-1);
153 : local_max.rack0 = local_max.rack1 = local_max.rack2 = 0;
154 : #endif
155 : }
156 : else
157 : {
158 12252 : local_min = _data.front();
159 12252 : local_max = _data.back();
160 : }
161 :
162 : MPI_Op hilbert_max, hilbert_min;
163 :
164 697872 : MPI_Op_create ((MPI_User_function*)dofobjectkey_max_op, true, &hilbert_max);
165 697872 : MPI_Op_create ((MPI_User_function*)dofobjectkey_min_op, true, &hilbert_min);
166 :
167 : // Communicate to determine the global
168 : // min and max for all processors.
169 697872 : MPI_Allreduce(&local_min,
170 : &global_min,
171 : 1,
172 1395744 : Parallel::StandardType<Parallel::DofObjectKey>(&local_min),
173 : hilbert_min,
174 45684 : this->comm().get());
175 :
176 697872 : MPI_Allreduce(&local_max,
177 : &global_max,
178 : 1,
179 728328 : Parallel::StandardType<Parallel::DofObjectKey>(&local_max),
180 : hilbert_max,
181 45684 : this->comm().get());
182 :
183 697872 : MPI_Op_free (&hilbert_max);
184 697872 : MPI_Op_free (&hilbert_min);
185 :
186 : // Bin-Sort based on the global min and max
187 713100 : Parallel::BinSorter<Parallel::DofObjectKey> bs(this->comm(),_data);
188 697872 : bs.binsort(_n_procs, global_max, global_min);
189 :
190 : // Now save the local bin sizes in a vector so
191 : // we don't have to keep around the BinSorter.
192 7884152 : for (processor_id_type i=0; i<_n_procs; ++i)
193 7216736 : _local_bin_sizes[i] = bs.sizeof_bin(i);
194 697872 : }
195 :
196 : #endif // #ifdef LIBMESH_HAVE_LIBHILBERT
197 :
198 :
199 : template <typename KeyType, typename IdxType>
200 697941 : void Sort<KeyType,IdxType>::communicate_bins()
201 : {
202 : #ifdef LIBMESH_HAVE_MPI
203 : // Find each section of our data to send
204 15230 : IdxType local_offset = 0;
205 30460 : std::map<processor_id_type, std::vector<KeyType> > pushed_keys, received_keys;
206 :
207 7884912 : for (processor_id_type i=0; i != _n_procs; ++i)
208 : {
209 7186971 : IdxType next_offset = local_offset + _local_bin_sizes[i];
210 7186971 : if (_local_bin_sizes[i])
211 : {
212 642370 : auto begin = _data.begin() + local_offset;
213 19713 : auto end = _data.begin() + next_offset;
214 642370 : pushed_keys[i].assign(begin, end);
215 : }
216 :
217 30460 : local_offset = next_offset;
218 : }
219 :
220 737367 : auto keys_action_functor =
221 1205888 : [& received_keys]
222 : (processor_id_type pid,
223 19713 : const std::vector<KeyType> & keys)
224 : {
225 642370 : received_keys[pid] = keys;
226 : };
227 :
228 : Parallel::push_parallel_vector_data
229 697941 : (this->comm(), pushed_keys, keys_action_functor);
230 :
231 15230 : std::size_t my_bin_size = 0;
232 1340311 : for (auto & p : received_keys)
233 662083 : my_bin_size += p.second.size();
234 :
235 697941 : _my_bin.clear();
236 697941 : _my_bin.reserve(my_bin_size);
237 :
238 1340311 : for (auto & p : received_keys)
239 642370 : _my_bin.insert(_my_bin.end(), p.second.begin(), p.second.end());
240 :
241 : #ifdef DEBUG
242 30460 : std::vector<IdxType> global_bin_sizes = _local_bin_sizes;
243 :
244 15230 : this->comm().sum(global_bin_sizes);
245 :
246 15230 : libmesh_assert_equal_to
247 : (global_bin_sizes[this->processor_id()], _my_bin.size());
248 : #endif
249 :
250 : #endif // LIBMESH_HAVE_MPI
251 697941 : }
252 :
253 :
254 :
255 : template <typename KeyType, typename IdxType>
256 698543 : void Sort<KeyType,IdxType>::sort_local_bin()
257 : {
258 698543 : std::sort(_my_bin.begin(), _my_bin.end());
259 698543 : }
260 :
261 :
262 :
263 : template <typename KeyType, typename IdxType>
264 698683 : const std::vector<KeyType> & Sort<KeyType,IdxType>::bin()
265 : {
266 698683 : if (!_bin_is_sorted)
267 : {
268 0 : libMesh::out << "Warning! Bin is not yet sorted!" << std::endl;
269 : }
270 :
271 698683 : return _my_bin;
272 : }
273 :
274 : }
275 :
276 :
277 :
278 : // Explicitly instantiate for int, double
279 : template class LIBMESH_EXPORT Parallel::Sort<int, unsigned int>;
280 : template class LIBMESH_EXPORT Parallel::Sort<double, unsigned int>;
281 : #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
282 : template class LIBMESH_EXPORT Parallel::Sort<Parallel::DofObjectKey, unsigned int>;
283 : #endif
284 :
285 : } // namespace libMesh
|