https://mooseframework.inl.gov
Shuffle.h
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #pragma once
11 
12 #include "MooseRandom.h"
13 #include "libmesh/communicator.h"
14 #include "libmesh/parallel.h"
15 #include "libmesh/parallel_sync.h"
16 #include "libmesh/libmesh_common.h"
17 #include <list>
18 #include <memory>
19 #include <iterator>
20 #include <algorithm>
21 
22 namespace MooseUtils
23 {
25 
32 template <typename T>
33 void swap(std::vector<T> & data,
34  const std::size_t idx0,
35  const std::size_t idx1,
36  const libMesh::Parallel::Communicator & comm);
37 template <typename T>
38 void swap(std::vector<T> & data,
39  const std::size_t idx0,
40  const std::size_t idx1,
41  const libMesh::Parallel::Communicator * comm_ptr = nullptr);
43 
45 
73 template <typename T>
74 void shuffle(std::vector<T> & data, MooseRandom & generator, const std::size_t seed_index = 0);
75 template <typename T>
76 void shuffle(std::vector<T> & data,
77  MooseRandom & generator,
78  const libMesh::Parallel::Communicator & comm);
79 template <typename T>
80 void shuffle(std::vector<T> & data,
81  MooseRandom & generator,
82  const std::size_t seed_index,
83  const libMesh::Parallel::Communicator & comm);
84 template <typename T>
85 void shuffle(std::vector<T> & data,
86  MooseRandom & generator,
87  const std::size_t seed_index,
88  const libMesh::Parallel::Communicator * comm_ptr);
90 
92 
100 template <typename T>
101 std::vector<T>
102 resample(const std::vector<T> & data, MooseRandom & generator, const std::size_t seed_index = 0);
103 template <typename T>
104 std::vector<T> resample(const std::vector<T> & data,
105  MooseRandom & generator,
106  const libMesh::Parallel::Communicator & comm);
107 template <typename T>
108 std::vector<T> resample(const std::vector<T> & data,
109  MooseRandom & generator,
110  const std::size_t seed_index,
111  const libMesh::Parallel::Communicator & comm);
112 template <typename T>
113 std::vector<T> resample(const std::vector<T> & data,
114  MooseRandom & generator,
115  const std::size_t seed_index,
116  const libMesh::Parallel::Communicator * comm_ptr);
118 
120 
129 template <typename T, typename ActionFunctor>
130 void resampleWithFunctor(const std::vector<T> & data,
131  const ActionFunctor & functor,
132  MooseRandom & generator,
133  const std::size_t seed_index = 0);
134 template <typename T, typename ActionFunctor>
135 void resampleWithFunctor(const std::vector<T> & data,
136  const ActionFunctor & functor,
137  MooseRandom & generator,
138  const libMesh::Parallel::Communicator & comm);
139 template <typename T, typename ActionFunctor>
140 void resampleWithFunctor(const std::vector<T> & data,
141  const ActionFunctor & functor,
142  MooseRandom & generator,
143  const std::size_t seed_index,
144  const libMesh::Parallel::Communicator & comm);
145 template <typename T, typename ActionFunctor>
146 void resampleWithFunctor(const std::vector<T> & data,
147  const ActionFunctor & functor,
148  MooseRandom & generator,
149  const std::size_t seed_index,
150  const libMesh::Parallel::Communicator * comm_ptr);
152 }
153 
154 template <typename T>
155 void
156 MooseUtils::swap(std::vector<T> & data,
157  const std::size_t idx0,
158  const std::size_t idx1,
159  const libMesh::Parallel::Communicator * comm_ptr)
160 {
161  if (!comm_ptr || comm_ptr->size() == 1)
162  {
163  mooseAssert(idx0 < data.size(),
164  "idx0 (" << idx0 << ") out of range, data.size() is " << data.size());
165  mooseAssert(idx1 < data.size(),
166  "idx1 (" << idx1 << ") out of range, data.size() is " << data.size());
167  std::swap(data[idx0], data[idx1]);
168  }
169 
170  else
171  {
172  // Size of the local input data
173  const auto n_local = data.size();
174  const auto rank = comm_ptr->rank();
175 
176  // Compute the global size of the vector
177  std::size_t n_global = n_local;
178  comm_ptr->sum(n_global);
179  mooseAssert(idx0 < n_global,
180  "idx0 (" << idx0 << ") out of range, the global data size is " << n_global);
181  mooseAssert(idx1 < n_global,
182  "idx1 (" << idx1 << ") out of range, the global data size is " << n_global);
183 
184  // Compute the vector data offsets, the scope cleans up the "n_local" vector
185  std::vector<std::size_t> offsets(comm_ptr->size());
186  {
187  std::vector<std::size_t> local_sizes;
188  comm_ptr->allgather(n_local, local_sizes);
189  for (std::size_t i = 0; i < local_sizes.size() - 1; ++i)
190  offsets[i + 1] = offsets[i] + local_sizes[i];
191  }
192 
193  // Locate the rank and local index of the data to swap
194  auto idx0_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx0));
195  auto idx0_rank = std::distance(offsets.begin(), idx0_offset_iter);
196  auto idx0_local_idx = idx0 - *idx0_offset_iter;
197 
198  auto idx1_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx1));
199  auto idx1_rank = std::distance(offsets.begin(), idx1_offset_iter);
200  auto idx1_local_idx = idx1 - *idx1_offset_iter;
201 
202  // The values, if any, needed from other rank
203  std::unordered_map<processor_id_type, std::vector<std::size_t>> needs;
204  if (idx0_rank != rank && idx1_rank == rank)
205  needs[idx0_rank].push_back(idx0_local_idx);
206  if (idx0_rank == rank && idx1_rank != rank)
207  needs[idx1_rank].push_back(idx1_local_idx);
208 
209  // Collect the values needed by this processor
210  std::unordered_map<processor_id_type, std::vector<T>> returns;
211  auto return_functor =
212  [&data, &returns](processor_id_type pid, const std::vector<std::size_t> & indices)
213  {
214  auto & returns_pid = returns[pid];
215  for (auto idx : indices)
216  returns_pid.push_back(data[idx]);
217  };
218  Parallel::push_parallel_vector_data(*comm_ptr, needs, return_functor);
219 
220  // Receive needed values from the others processors
221  std::vector<T> incoming;
222  auto recv_functor = [&incoming](processor_id_type /*pid*/, const std::vector<T> & values)
223  { incoming = values; };
224  Parallel::push_parallel_vector_data(*comm_ptr, returns, recv_functor);
225 
226  if (idx0_rank == rank && idx1_rank == rank)
227  MooseUtils::swap(data, idx0_local_idx, idx1_local_idx);
228 
229  else if (idx0_rank == rank)
230  {
231  mooseAssert(incoming.size() == 1, "Only one value should be received");
232  data[idx0_local_idx] = incoming[0];
233  }
234  else if (idx1_rank == rank)
235  {
236  mooseAssert(incoming.size() == 1, "Only one value should be received");
237  data[idx1_local_idx] = incoming[0];
238  }
239  }
240 }
241 
242 template <typename T>
243 void
244 MooseUtils::shuffle(std::vector<T> & data,
245  MooseRandom & generator,
246  const std::size_t seed_index,
247  const libMesh::Parallel::Communicator * comm_ptr)
248 {
249  // REPLICATED data
250  if (!comm_ptr || comm_ptr->size() == 1)
251  {
252  std::size_t n_global = data.size();
253  for (std::size_t i = n_global - 1; i > 0; --i)
254  {
255  auto j = generator.randl(seed_index, 0, i);
256  MooseUtils::swap(data, i, j, nullptr);
257  }
258  }
259 
260  // DISTRIBUTED data
261  else
262  {
263  // Local/global size
264  std::size_t n_local = data.size();
265  std::size_t n_global = n_local;
266  comm_ptr->sum(n_global);
267 
268  // Compute the vector data offsets, the scope cleans up the "n_local" vector
269  std::vector<std::size_t> offsets(comm_ptr->size());
270  {
271  std::vector<std::size_t> local_sizes;
272  comm_ptr->allgather(n_local, local_sizes);
273  for (std::size_t i = 0; i < local_sizes.size() - 1; ++i)
274  offsets[i + 1] = offsets[i] + local_sizes[i];
275  }
276 
277  // Perform swaps
278  auto rank = comm_ptr->rank();
279  for (std::size_t idx0 = n_global - 1; idx0 > 0; --idx0)
280  {
281  auto idx1 = generator.randl(seed_index, 0, idx0);
282 
283  // Locate the rank and local index of the data to swap
284  auto idx0_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx0));
285  auto idx0_rank = std::distance(offsets.begin(), idx0_offset_iter);
286  auto idx0_local_idx = idx0 - *idx0_offset_iter;
287 
288  auto idx1_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx1));
289  auto idx1_rank = std::distance(offsets.begin(), idx1_offset_iter);
290  auto idx1_local_idx = idx1 - *idx1_offset_iter;
291 
292  // The values, if any, needed from other rank
293  std::unordered_map<processor_id_type, std::vector<std::size_t>> needs;
294  if (idx0_rank != rank && idx1_rank == rank)
295  needs[idx0_rank].push_back(idx0_local_idx);
296  if (idx0_rank == rank && idx1_rank != rank)
297  needs[idx1_rank].push_back(idx1_local_idx);
298 
299  // Collect the values needed by this processor
300  std::unordered_map<processor_id_type, std::vector<T>> returns;
301  auto return_functor =
302  [&data, &returns](processor_id_type pid, const std::vector<std::size_t> & indices)
303  {
304  auto & returns_pid = returns[pid];
305  for (auto idx : indices)
306  returns_pid.push_back(data[idx]);
307  };
308  Parallel::push_parallel_vector_data(*comm_ptr, needs, return_functor);
309 
310  // Receive needed values from the others processors
311  std::vector<T> incoming;
312  auto recv_functor = [&incoming](processor_id_type /*pid*/, const std::vector<T> & values)
313  { incoming = values; };
314  Parallel::push_parallel_vector_data(*comm_ptr, returns, recv_functor);
315 
316  if (idx0_rank == rank && idx1_rank == rank)
317  MooseUtils::swap(data, idx0_local_idx, idx1_local_idx);
318 
319  else if (idx0_rank == rank)
320  {
321  mooseAssert(incoming.size() == 1, "Only one value should be received");
322  data[idx0_local_idx] = incoming[0];
323  }
324  else if (idx1_rank == rank)
325  {
326  mooseAssert(incoming.size() == 1, "Only one value should be received");
327  data[idx1_local_idx] = incoming[0];
328  }
329  }
330  }
331 }
332 
333 template <typename T>
334 std::vector<T>
335 MooseUtils::resample(const std::vector<T> & data,
336  MooseRandom & generator,
337  const std::size_t seed_index,
338  const libMesh::Parallel::Communicator * comm_ptr)
339 {
340  // Size of the local input data
341  const std::size_t n_local = data.size();
342 
343  // Re-sampled data vector to be returned
344  std::vector<T> replicate(n_local);
345 
346  // REPLICATED data
347  if (!comm_ptr || comm_ptr->size() == 1)
348  {
349  replicate.resize(n_local);
350  for (std::size_t j = 0; j < n_local; ++j)
351  {
352  auto index = generator.randl(seed_index, 0, n_local);
353  replicate[j] = data[index];
354  }
355  }
356 
357  // DISTRIBUTED data
358  else
359  {
360  // Compute the global size of the vector
361  std::size_t n_global = n_local;
362  comm_ptr->sum(n_global);
363 
364  // Compute the vector data offsets, the scope cleans up the "n_local" vector
365  std::vector<std::size_t> offsets(comm_ptr->size());
366  {
367  std::vector<std::size_t> local_sizes;
368  comm_ptr->allgather(n_local, local_sizes);
369  for (std::size_t i = 0; i < local_sizes.size() - 1; ++i)
370  offsets[i + 1] = offsets[i] + local_sizes[i];
371  }
372 
373  // Advance the random number generator to the current offset
374  const auto rank = comm_ptr->rank();
375  for (std::size_t i = 0; i < offsets[rank]; ++i)
376  generator.randl(seed_index, 0, n_global);
377 
378  // Compute the needs for this processor
379  std::unordered_map<processor_id_type, std::vector<std::pair<std::size_t, std::size_t>>> needs;
380  for (std::size_t i = 0; i < n_local; ++i)
381  {
382  const auto idx = generator.randl(seed_index, 0, n_global); // random global index
383 
384  // Locate the rank and local index of the data desired
385  const auto idx_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx));
386  const auto idx_rank = std::distance(offsets.begin(), idx_offset_iter);
387  const auto idx_local_idx = idx - *idx_offset_iter;
388 
389  // Local available data can be inserted into the re-sample, non-local data is add the the
390  // needs from other ranks
391  if (idx_rank == rank)
392  replicate[i] = data[idx_local_idx];
393  else
394  needs[idx_rank].emplace_back(idx_local_idx, i);
395  }
396 
397  // Advance the random number generator to the end of the global vector
398  for (std::size_t i = offsets[rank] + n_local; i < n_global; ++i)
399  generator.randl(seed_index, 0, n_global);
400 
401  // Collect the values to be returned to the various processors
402  std::unordered_map<processor_id_type, std::vector<std::pair<T, std::size_t>>> returns;
403  auto return_functor =
404  [&data, &returns](processor_id_type pid,
405  const std::vector<std::pair<std::size_t, std::size_t>> & indices)
406  {
407  auto & returns_pid = returns[pid];
408  for (const auto & idx : indices)
409  returns_pid.emplace_back(data[idx.first], idx.second);
410  };
411  Parallel::push_parallel_vector_data(*comm_ptr, needs, return_functor);
412 
413  // Receive resampled values from the various processors
414  auto recv_functor =
415  [&replicate](processor_id_type, const std::vector<std::pair<T, std::size_t>> & values)
416  {
417  for (const auto & value : values)
418  replicate[value.second] = value.first;
419  };
420  Parallel::push_parallel_vector_data(*comm_ptr, returns, recv_functor);
421  }
422  return replicate;
423 }
424 
425 template <typename T, typename ActionFunctor>
426 void
427 MooseUtils::resampleWithFunctor(const std::vector<T> & data,
428  const ActionFunctor & functor,
429  MooseRandom & generator,
430  const std::size_t seed_index,
431  const libMesh::Parallel::Communicator * comm_ptr)
432 {
433  const std::size_t n_local = data.size();
434 
435  if (!comm_ptr || comm_ptr->size() == 1)
436  {
437  for (std::size_t j = 0; j < n_local; ++j)
438  {
439  auto index = generator.randl(seed_index, 0, n_local);
440  functor(data[index]);
441  }
442  }
443  else
444  {
445  // Compute the global size of the vector
446  std::size_t n_global = n_local;
447  comm_ptr->sum(n_global);
448 
449  // Compute the vector data offsets, the scope cleans up the "n_local" vector
450  std::vector<std::size_t> offsets(comm_ptr->size());
451  {
452  std::vector<std::size_t> local_sizes;
453  comm_ptr->allgather(n_local, local_sizes);
454  for (std::size_t i = 0; i < local_sizes.size() - 1; ++i)
455  offsets[i + 1] = offsets[i] + local_sizes[i];
456  }
457 
458  // Advance the random number generator to the current offset
459  const auto rank = comm_ptr->rank();
460  for (std::size_t i = 0; i < offsets[rank]; ++i)
461  generator.randl(seed_index, 0, n_global);
462 
463  // Compute the needs for this processor
464  std::unordered_map<processor_id_type, std::vector<std::size_t>> indices;
465  for (std::size_t i = 0; i < n_local; ++i)
466  {
467  const auto idx = generator.randl(seed_index, 0, n_global); // random global index
468 
469  // Locate the rank and local index of the data desired
470  const auto idx_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx));
471  const auto idx_rank = std::distance(offsets.begin(), idx_offset_iter);
472  const auto idx_local_idx = idx - *idx_offset_iter;
473 
474  // Push back the index to appropriate rank
475  indices[idx_rank].push_back(idx_local_idx);
476  }
477 
478  // Advance the random number generator to the end of the global vector
479  for (std::size_t i = offsets[rank] + n_local; i < n_global; ++i)
480  generator.randl(seed_index, 0, n_global);
481 
482  // Send the indices to the appropriate rank and have the calculator do its work
483  auto act_functor =
484  [&functor, &data](processor_id_type /*pid*/, const std::vector<std::size_t> & indices)
485  {
486  for (const auto & idx : indices)
487  functor(data[idx]);
488  };
489  Parallel::push_parallel_vector_data(*comm_ptr, indices, act_functor);
490  }
491 }
492 
493 template <typename T>
494 void
495 MooseUtils::swap(std::vector<T> & data,
496  const std::size_t idx0,
497  const std::size_t idx1,
498  const libMesh::Parallel::Communicator & comm)
499 {
500  MooseUtils::swap<T>(data, idx0, idx1, &comm);
501 }
502 
503 template <typename T>
504 void
505 MooseUtils::shuffle(std::vector<T> & data, MooseRandom & generator, const std::size_t seed_index)
506 {
507  return MooseUtils::shuffle(data, generator, seed_index, nullptr);
508 }
509 
510 template <typename T>
511 void
512 MooseUtils::shuffle(std::vector<T> & data,
513  MooseRandom & generator,
514  const libMesh::Parallel::Communicator & comm)
515 {
516  return MooseUtils::shuffle(data, generator, 0, &comm);
517 }
518 
519 template <typename T>
520 void
521 MooseUtils::shuffle(std::vector<T> & data,
522  MooseRandom & generator,
523  const std::size_t seed_index,
524  const libMesh::Parallel::Communicator & comm)
525 {
526  return MooseUtils::shuffle(data, generator, seed_index, &comm);
527 }
528 
529 template <typename T>
530 std::vector<T>
531 MooseUtils::resample(const std::vector<T> & data,
532  MooseRandom & generator,
533  const std::size_t seed_index)
534 {
535  return MooseUtils::resample(data, generator, seed_index, nullptr);
536 }
537 
538 template <typename T>
539 std::vector<T>
540 MooseUtils::resample(const std::vector<T> & data,
541  MooseRandom & generator,
542  const libMesh::Parallel::Communicator & comm)
543 {
544  return MooseUtils::resample(data, generator, 0, &comm);
545 }
546 
547 template <typename T>
548 std::vector<T>
549 MooseUtils::resample(const std::vector<T> & data,
550  MooseRandom & generator,
551  const std::size_t seed_index,
552  const libMesh::Parallel::Communicator & comm)
553 {
554  return MooseUtils::resample(data, generator, seed_index, &comm);
555 }
556 
557 template <typename T, typename ActionFunctor>
558 void
559 MooseUtils::resampleWithFunctor(const std::vector<T> & data,
560  const ActionFunctor & functor,
561  MooseRandom & generator,
562  const std::size_t seed_index)
563 {
564  return MooseUtils::resampleWithFunctor(data, functor, generator, seed_index, nullptr);
565 }
566 
567 template <typename T, typename ActionFunctor>
568 void
569 MooseUtils::resampleWithFunctor(const std::vector<T> & data,
570  const ActionFunctor & functor,
571  MooseRandom & generator,
572  const libMesh::Parallel::Communicator & comm)
573 {
574  return MooseUtils::resampleWithFunctor(data, functor, generator, 0, &comm);
575 }
576 
577 template <typename T, typename ActionFunctor>
578 void
579 MooseUtils::resampleWithFunctor(const std::vector<T> & data,
580  const ActionFunctor & functor,
581  MooseRandom & generator,
582  const std::size_t seed_index,
583  const libMesh::Parallel::Communicator & comm)
584 {
585  return MooseUtils::resampleWithFunctor(data, functor, generator, seed_index, &comm);
586 }
void allgather(const T &send_data, std::vector< T, A > &recv_data) const
static uint32_t randl()
This method returns the next random number (long format) from the generator.
Definition: MooseRandom.h:71
void swap(std::vector< T > &data, const std::size_t idx0, const std::size_t idx1, const libMesh::Parallel::Communicator &comm)
Swap function for serial or distributed vector of data.
Definition: Shuffle.h:495
void shuffle(std::vector< T > &data, MooseRandom &generator, const std::size_t seed_index=0)
Shuffle function for serial or distributed vector of data that shuffles in place. ...
Definition: Shuffle.h:505
processor_id_type rank() const
std::vector< T > resample(const std::vector< T > &data, MooseRandom &generator, const std::size_t seed_index=0)
Randomly resample a vector of data, allowing a value to be repeated.
Definition: Shuffle.h:531
processor_id_type size() const
void resampleWithFunctor(const std::vector< T > &data, const ActionFunctor &functor, MooseRandom &generator, const std::size_t seed_index=0)
Randomly resample a vector of data and apply a functor, allowing a value to be repeated.
Definition: Shuffle.h:559
uint8_t processor_id_type
void swap(std::vector< T > &data, const std::size_t idx0, const std::size_t idx1, const libMesh::Parallel::Communicator *comm_ptr=nullptr)
Definition: Shuffle.h:156
Real value(unsigned n, unsigned alpha, unsigned beta, Real x)
This class encapsulates a useful, consistent, cross-platform random number generator with multiple ut...
Definition: MooseRandom.h:37
unsigned int idx(const ElemType type, const unsigned int nx, const unsigned int i, const unsigned int j)