libMesh
parallel_sort.C
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 // Local includes
20 #include "libmesh/parallel_sort.h"
21 
22 // libMesh includes
23 #include "libmesh/libmesh_common.h"
24 #include "libmesh/parallel_bin_sorter.h"
25 #include "libmesh/parallel_hilbert.h"
26 
27 // TIMPI includes
29 #include "timpi/parallel_sync.h"
30 
31 // C++ includes
32 #include <algorithm>
33 #include <iostream>
34 
35 
36 namespace libMesh
37 {
38 
39 
40 namespace Parallel {
41 
42 // The Constructor sorts the local data using
43 // std::sort(). Therefore, the construction of
44 // a Parallel::Sort object takes O(n log n) time,
45 // where n is the length of _data.
46 template <typename KeyType, typename IdxType>
48  std::vector<KeyType> & d) :
49  ParallelObject(comm_in),
50  _n_procs(cast_int<processor_id_type>(comm_in.size())),
51  _proc_id(cast_int<processor_id_type>(comm_in.rank())),
52  _bin_is_sorted(false),
53  _data(d)
54 {
55  std::sort(_data.begin(), _data.end());
56 
57  // Allocate storage
58  _local_bin_sizes.resize(_n_procs);
59 }
60 
61 
62 
63 template <typename KeyType, typename IdxType>
65 {
66  // Find the global data size. The sorting
67  // algorithms assume they have a range to
68  // work with, so catch the degenerate cases here
69  IdxType global_data_size = cast_int<IdxType>(_data.size());
70 
71  this->comm().sum (global_data_size);
72 
73  if (global_data_size < 2)
74  {
75  // the entire global range is either empty
76  // or contains only one element
77  _my_bin = _data;
78 
79  this->comm().allgather (static_cast<IdxType>(_my_bin.size()),
80  _local_bin_sizes);
81  }
82  else
83  {
84  if (this->n_processors() > 1)
85  {
86  this->binsort();
87  this->communicate_bins();
88  }
89  else
90  _my_bin = _data;
91 
92  this->sort_local_bin();
93  }
94 
95  // Set sorted flag to true
96  _bin_is_sorted = true;
97 }
98 
99 
100 
101 template <typename KeyType, typename IdxType>
103 {
104  // Find the global min and max from all the
105  // processors.
106  std::vector<KeyType> global_min_max(2);
107 
108  // Insert the local min and max for this processor
109  global_min_max[0] = -_data.front();
110  global_min_max[1] = _data.back();
111 
112  // Communicate to determine the global
113  // min and max for all processors.
114  this->comm().max(global_min_max);
115 
116  // Multiply the min by -1 to obtain the true min
117  global_min_max[0] *= -1;
118 
119  // Bin-Sort based on the global min and max
120  Parallel::BinSorter<KeyType> bs(this->comm(), _data);
121  bs.binsort(_n_procs, global_min_max[1], global_min_max[0]);
122 
123  // Now save the local bin sizes in a vector so
124  // we don't have to keep around the BinSorter.
125  for (processor_id_type i=0; i<_n_procs; ++i)
126  _local_bin_sizes[i] = bs.sizeof_bin(i);
127 }
128 
129 
130 
131 #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
132 // Full specialization for HilbertIndices, there is a fair amount of
133 // code duplication here that could potentially be consolidated with the
134 // above method
135 template <>
137 {
138  // Find the global min and max from all the
139  // processors. Do this using MPI_Allreduce.
141  local_min, local_max,
142  global_min, global_max;
143 
144  if (_data.empty())
145  {
146 #ifdef LIBMESH_ENABLE_UNIQUE_ID
147  local_min.first.rack0 = local_min.first.rack1 = local_min.first.rack2 = static_cast<Hilbert::inttype>(-1);
148  local_min.second = std::numeric_limits<unique_id_type>::max();
149  local_max.first.rack0 = local_max.first.rack1 = local_max.first.rack2 = 0;
150  local_max.second = 0;
151 #else
152  local_min.rack0 = local_min.rack1 = local_min.rack2 = static_cast<Hilbert::inttype>(-1);
153  local_max.rack0 = local_max.rack1 = local_max.rack2 = 0;
154 #endif
155  }
156  else
157  {
158  local_min = _data.front();
159  local_max = _data.back();
160  }
161 
162  MPI_Op hilbert_max, hilbert_min;
163 
164  MPI_Op_create ((MPI_User_function*)dofobjectkey_max_op, true, &hilbert_max);
165  MPI_Op_create ((MPI_User_function*)dofobjectkey_min_op, true, &hilbert_min);
166 
167  // Communicate to determine the global
168  // min and max for all processors.
169  MPI_Allreduce(&local_min,
170  &global_min,
171  1,
173  hilbert_min,
174  this->comm().get());
175 
176  MPI_Allreduce(&local_max,
177  &global_max,
178  1,
180  hilbert_max,
181  this->comm().get());
182 
183  MPI_Op_free (&hilbert_max);
184  MPI_Op_free (&hilbert_min);
185 
186  // Bin-Sort based on the global min and max
187  Parallel::BinSorter<Parallel::DofObjectKey> bs(this->comm(),_data);
188  bs.binsort(_n_procs, global_max, global_min);
189 
190  // Now save the local bin sizes in a vector so
191  // we don't have to keep around the BinSorter.
192  for (processor_id_type i=0; i<_n_procs; ++i)
193  _local_bin_sizes[i] = bs.sizeof_bin(i);
194 }
195 
196 #endif // #ifdef LIBMESH_HAVE_LIBHILBERT
197 
198 
199 template <typename KeyType, typename IdxType>
201 {
202 #ifdef LIBMESH_HAVE_MPI
203  // Find each section of our data to send
204  IdxType local_offset = 0;
205  std::map<processor_id_type, std::vector<KeyType> > pushed_keys, received_keys;
206 
207  for (processor_id_type i=0; i != _n_procs; ++i)
208  {
209  IdxType next_offset = local_offset + _local_bin_sizes[i];
210  if (_local_bin_sizes[i])
211  {
212  auto begin = _data.begin() + local_offset;
213  auto end = _data.begin() + next_offset;
214  pushed_keys[i].assign(begin, end);
215  }
216 
217  local_offset = next_offset;
218  }
219 
220  auto keys_action_functor =
221  [& received_keys]
222  (processor_id_type pid,
223  const std::vector<KeyType> & keys)
224  {
225  received_keys[pid] = keys;
226  };
227 
228  Parallel::push_parallel_vector_data
229  (this->comm(), pushed_keys, keys_action_functor);
230 
231  std::size_t my_bin_size = 0;
232  for (auto & p : received_keys)
233  my_bin_size += p.second.size();
234 
235  _my_bin.clear();
236  _my_bin.reserve(my_bin_size);
237 
238  for (auto & p : received_keys)
239  _my_bin.insert(_my_bin.end(), p.second.begin(), p.second.end());
240 
241 #ifdef DEBUG
242  std::vector<IdxType> global_bin_sizes = _local_bin_sizes;
243 
244  this->comm().sum(global_bin_sizes);
245 
246  libmesh_assert_equal_to
247  (global_bin_sizes[this->processor_id()], _my_bin.size());
248 #endif
249 
250 #endif // LIBMESH_HAVE_MPI
251 }
252 
253 
254 
255 template <typename KeyType, typename IdxType>
257 {
258  std::sort(_my_bin.begin(), _my_bin.end());
259 }
260 
261 
262 
263 template <typename KeyType, typename IdxType>
264 const std::vector<KeyType> & Sort<KeyType,IdxType>::bin()
265 {
266  if (!_bin_is_sorted)
267  {
268  libMesh::out << "Warning! Bin is not yet sorted!" << std::endl;
269  }
270 
271  return _my_bin;
272 }
273 
274 }
275 
276 
277 
278 // Explicitly instantiate for int, double
279 template class LIBMESH_EXPORT Parallel::Sort<int, unsigned int>;
280 template class LIBMESH_EXPORT Parallel::Sort<double, unsigned int>;
281 #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
282 template class LIBMESH_EXPORT Parallel::Sort<Parallel::DofObjectKey, unsigned int>;
283 #endif
284 
285 } // namespace libMesh
const std::vector< KeyType > & bin()
Return a constant reference to _my_bin.
void dofobjectkey_min_op(libMesh::Parallel::DofObjectKey *in, libMesh::Parallel::DofObjectKey *inout, int *len, void *)
std::pair< Hilbert::HilbertIndices, unique_id_type > DofObjectKey
void sort()
This is the only method which needs to be called by the user.
Definition: parallel_sort.C:64
std::vector< KeyType > & _data
The raw, unsorted data which will need to be sorted (in parallel) across all processors.
std::vector< IdxType > _local_bin_sizes
Vector which holds the size of each bin on this processor.
The libMesh namespace provides an interface to certain functionality in the library.
Tnew cast_int(Told oldvar)
uint8_t processor_id_type
void dofobjectkey_max_op(libMesh::Parallel::DofObjectKey *in, libMesh::Parallel::DofObjectKey *inout, int *len, void *)
void sort_local_bin()
After all the bins have been communicated, we can sort our local bin.
void communicate_bins()
Communicates the bins from each processor to the appropriate processor.
const processor_id_type _n_procs
The number of processors to work with.
Definition: parallel_sort.h:90
Perform a parallel sort using a bin-sort method.
An object whose state is distributed along a set of processors.
IdxType sizeof_bin(const IdxType bin) const
The parallel sorting method is templated on the type of data which is to be sorted.
Definition: parallel_sort.h:55
OStreamProxy out
Sort(const Parallel::Communicator &comm, std::vector< KeyType > &d)
Constructor takes the number of processors, the processor id, and a reference to a vector of data to ...
Definition: parallel_sort.C:47
void binsort()
Sorts the local data into bins across all processors.
void binsort(const IdxType nbins, KeyType max, KeyType min)
The actual function which sorts the data into nbins.