https://mooseframework.inl.gov
Shuffle.h
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #pragma once
11 #include "MooseRandom.h"
12 #include "libmesh/communicator.h"
13 #include "libmesh/parallel.h"
14 #include "libmesh/parallel_sync.h"
15 #include "libmesh/libmesh_common.h"
16 #include <list>
17 #include <memory>
18 #include <iterator>
19 #include <algorithm>
20 
21 namespace MooseUtils
22 {
24 
31 template <typename T>
32 void swap(std::vector<T> & data,
33  const std::size_t idx0,
34  const std::size_t idx1,
35  const libMesh::Parallel::Communicator & comm);
36 template <typename T>
37 void swap(std::vector<T> & data,
38  const std::size_t idx0,
39  const std::size_t idx1,
40  const libMesh::Parallel::Communicator * comm_ptr = nullptr);
42 
44 
72 template <typename T>
73 void shuffle(std::vector<T> & data, MooseRandom & generator, const std::size_t seed_index = 0);
74 template <typename T>
75 void shuffle(std::vector<T> & data,
76  MooseRandom & generator,
77  const libMesh::Parallel::Communicator & comm);
78 template <typename T>
79 void shuffle(std::vector<T> & data,
80  MooseRandom & generator,
81  const std::size_t seed_index,
82  const libMesh::Parallel::Communicator & comm);
83 template <typename T>
84 void shuffle(std::vector<T> & data,
85  MooseRandom & generator,
86  const std::size_t seed_index,
87  const libMesh::Parallel::Communicator * comm_ptr);
89 
91 
99 template <typename T>
100 std::vector<T>
101 resample(const std::vector<T> & data, MooseRandom & generator, const std::size_t seed_index = 0);
102 template <typename T>
103 std::vector<T> resample(const std::vector<T> & data,
104  MooseRandom & generator,
105  const libMesh::Parallel::Communicator & comm);
106 template <typename T>
107 std::vector<T> resample(const std::vector<T> & data,
108  MooseRandom & generator,
109  const std::size_t seed_index,
110  const libMesh::Parallel::Communicator & comm);
111 template <typename T>
112 std::vector<T> resample(const std::vector<T> & data,
113  MooseRandom & generator,
114  const std::size_t seed_index,
115  const libMesh::Parallel::Communicator * comm_ptr);
117 
119 
128 template <typename T, typename ActionFunctor>
129 void resampleWithFunctor(const std::vector<T> & data,
130  const ActionFunctor & functor,
131  MooseRandom & generator,
132  const std::size_t seed_index = 0);
133 template <typename T, typename ActionFunctor>
134 void resampleWithFunctor(const std::vector<T> & data,
135  const ActionFunctor & functor,
136  MooseRandom & generator,
137  const libMesh::Parallel::Communicator & comm);
138 template <typename T, typename ActionFunctor>
139 void resampleWithFunctor(const std::vector<T> & data,
140  const ActionFunctor & functor,
141  MooseRandom & generator,
142  const std::size_t seed_index,
143  const libMesh::Parallel::Communicator & comm);
144 template <typename T, typename ActionFunctor>
145 void resampleWithFunctor(const std::vector<T> & data,
146  const ActionFunctor & functor,
147  MooseRandom & generator,
148  const std::size_t seed_index,
149  const libMesh::Parallel::Communicator * comm_ptr);
151 }
152 
153 template <typename T>
154 void
155 MooseUtils::swap(std::vector<T> & data,
156  const std::size_t idx0,
157  const std::size_t idx1,
158  const libMesh::Parallel::Communicator * comm_ptr)
159 {
160  if (!comm_ptr || comm_ptr->size() == 1)
161  {
162  mooseAssert(idx0 < data.size(),
163  "idx0 (" << idx0 << ") out of range, data.size() is " << data.size());
164  mooseAssert(idx1 < data.size(),
165  "idx1 (" << idx1 << ") out of range, data.size() is " << data.size());
166  std::swap(data[idx0], data[idx1]);
167  }
168 
169  else
170  {
171  // Size of the local input data
172  const auto n_local = data.size();
173  const auto rank = comm_ptr->rank();
174 
175  // Compute the global size of the vector
176  std::size_t n_global = n_local;
177  comm_ptr->sum(n_global);
178  mooseAssert(idx0 < n_global,
179  "idx0 (" << idx0 << ") out of range, the global data size is " << n_global);
180  mooseAssert(idx1 < n_global,
181  "idx1 (" << idx1 << ") out of range, the global data size is " << n_global);
182 
183  // Compute the vector data offsets, the scope cleans up the "n_local" vector
184  std::vector<std::size_t> offsets(comm_ptr->size());
185  {
186  std::vector<std::size_t> local_sizes;
187  comm_ptr->allgather(n_local, local_sizes);
188  for (std::size_t i = 0; i < local_sizes.size() - 1; ++i)
189  offsets[i + 1] = offsets[i] + local_sizes[i];
190  }
191 
192  // Locate the rank and local index of the data to swap
193  auto idx0_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx0));
194  auto idx0_rank = std::distance(offsets.begin(), idx0_offset_iter);
195  auto idx0_local_idx = idx0 - *idx0_offset_iter;
196 
197  auto idx1_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx1));
198  auto idx1_rank = std::distance(offsets.begin(), idx1_offset_iter);
199  auto idx1_local_idx = idx1 - *idx1_offset_iter;
200 
201  // The values, if any, needed from other rank
202  std::unordered_map<processor_id_type, std::vector<std::size_t>> needs;
203  if (idx0_rank != rank && idx1_rank == rank)
204  needs[idx0_rank].push_back(idx0_local_idx);
205  if (idx0_rank == rank && idx1_rank != rank)
206  needs[idx1_rank].push_back(idx1_local_idx);
207 
208  // Collect the values needed by this processor
209  std::unordered_map<processor_id_type, std::vector<T>> returns;
210  auto return_functor =
211  [&data, &returns](processor_id_type pid, const std::vector<std::size_t> & indices)
212  {
213  auto & returns_pid = returns[pid];
214  for (auto idx : indices)
215  returns_pid.push_back(data[idx]);
216  };
217  Parallel::push_parallel_vector_data(*comm_ptr, needs, return_functor);
218 
219  // Receive needed values from the others processors
220  std::vector<T> incoming;
221  auto recv_functor = [&incoming](processor_id_type /*pid*/, const std::vector<T> & values)
222  { incoming = values; };
223  Parallel::push_parallel_vector_data(*comm_ptr, returns, recv_functor);
224 
225  if (idx0_rank == rank && idx1_rank == rank)
226  MooseUtils::swap(data, idx0_local_idx, idx1_local_idx);
227 
228  else if (idx0_rank == rank)
229  {
230  mooseAssert(incoming.size() == 1, "Only one value should be received");
231  data[idx0_local_idx] = incoming[0];
232  }
233  else if (idx1_rank == rank)
234  {
235  mooseAssert(incoming.size() == 1, "Only one value should be received");
236  data[idx1_local_idx] = incoming[0];
237  }
238  }
239 }
240 
241 template <typename T>
242 void
243 MooseUtils::shuffle(std::vector<T> & data,
244  MooseRandom & generator,
245  const std::size_t seed_index,
246  const libMesh::Parallel::Communicator * comm_ptr)
247 {
248  // REPLICATED data
249  if (!comm_ptr || comm_ptr->size() == 1)
250  {
251  std::size_t n_global = data.size();
252  for (std::size_t i = n_global - 1; i > 0; --i)
253  {
254  auto j = generator.randl(seed_index, 0, i);
255  MooseUtils::swap(data, i, j, nullptr);
256  }
257  }
258 
259  // DISTRIBUTED data
260  else
261  {
262  // Local/global size
263  std::size_t n_local = data.size();
264  std::size_t n_global = n_local;
265  comm_ptr->sum(n_global);
266 
267  // Compute the vector data offsets, the scope cleans up the "n_local" vector
268  std::vector<std::size_t> offsets(comm_ptr->size());
269  {
270  std::vector<std::size_t> local_sizes;
271  comm_ptr->allgather(n_local, local_sizes);
272  for (std::size_t i = 0; i < local_sizes.size() - 1; ++i)
273  offsets[i + 1] = offsets[i] + local_sizes[i];
274  }
275 
276  // Perform swaps
277  auto rank = comm_ptr->rank();
278  for (std::size_t idx0 = n_global - 1; idx0 > 0; --idx0)
279  {
280  auto idx1 = generator.randl(seed_index, 0, idx0);
281 
282  // Locate the rank and local index of the data to swap
283  auto idx0_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx0));
284  auto idx0_rank = std::distance(offsets.begin(), idx0_offset_iter);
285  auto idx0_local_idx = idx0 - *idx0_offset_iter;
286 
287  auto idx1_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx1));
288  auto idx1_rank = std::distance(offsets.begin(), idx1_offset_iter);
289  auto idx1_local_idx = idx1 - *idx1_offset_iter;
290 
291  // The values, if any, needed from other rank
292  std::unordered_map<processor_id_type, std::vector<std::size_t>> needs;
293  if (idx0_rank != rank && idx1_rank == rank)
294  needs[idx0_rank].push_back(idx0_local_idx);
295  if (idx0_rank == rank && idx1_rank != rank)
296  needs[idx1_rank].push_back(idx1_local_idx);
297 
298  // Collect the values needed by this processor
299  std::unordered_map<processor_id_type, std::vector<T>> returns;
300  auto return_functor =
301  [&data, &returns](processor_id_type pid, const std::vector<std::size_t> & indices)
302  {
303  auto & returns_pid = returns[pid];
304  for (auto idx : indices)
305  returns_pid.push_back(data[idx]);
306  };
307  Parallel::push_parallel_vector_data(*comm_ptr, needs, return_functor);
308 
309  // Receive needed values from the others processors
310  std::vector<T> incoming;
311  auto recv_functor = [&incoming](processor_id_type /*pid*/, const std::vector<T> & values)
312  { incoming = values; };
313  Parallel::push_parallel_vector_data(*comm_ptr, returns, recv_functor);
314 
315  if (idx0_rank == rank && idx1_rank == rank)
316  MooseUtils::swap(data, idx0_local_idx, idx1_local_idx);
317 
318  else if (idx0_rank == rank)
319  {
320  mooseAssert(incoming.size() == 1, "Only one value should be received");
321  data[idx0_local_idx] = incoming[0];
322  }
323  else if (idx1_rank == rank)
324  {
325  mooseAssert(incoming.size() == 1, "Only one value should be received");
326  data[idx1_local_idx] = incoming[0];
327  }
328  }
329  }
330 }
331 
332 template <typename T>
333 std::vector<T>
334 MooseUtils::resample(const std::vector<T> & data,
335  MooseRandom & generator,
336  const std::size_t seed_index,
337  const libMesh::Parallel::Communicator * comm_ptr)
338 {
339  // Size of the local input data
340  const std::size_t n_local = data.size();
341 
342  // Re-sampled data vector to be returned
343  std::vector<T> replicate(n_local);
344 
345  // REPLICATED data
346  if (!comm_ptr || comm_ptr->size() == 1)
347  {
348  replicate.resize(n_local);
349  for (std::size_t j = 0; j < n_local; ++j)
350  {
351  auto index = generator.randl(seed_index, 0, n_local);
352  replicate[j] = data[index];
353  }
354  }
355 
356  // DISTRIBUTED data
357  else
358  {
359  // Compute the global size of the vector
360  std::size_t n_global = n_local;
361  comm_ptr->sum(n_global);
362 
363  // Compute the vector data offsets, the scope cleans up the "n_local" vector
364  std::vector<std::size_t> offsets(comm_ptr->size());
365  {
366  std::vector<std::size_t> local_sizes;
367  comm_ptr->allgather(n_local, local_sizes);
368  for (std::size_t i = 0; i < local_sizes.size() - 1; ++i)
369  offsets[i + 1] = offsets[i] + local_sizes[i];
370  }
371 
372  // Advance the random number generator to the current offset
373  const auto rank = comm_ptr->rank();
374  for (std::size_t i = 0; i < offsets[rank]; ++i)
375  generator.randl(seed_index, 0, n_global);
376 
377  // Compute the needs for this processor
378  std::unordered_map<processor_id_type, std::vector<std::pair<std::size_t, std::size_t>>> needs;
379  for (std::size_t i = 0; i < n_local; ++i)
380  {
381  const auto idx = generator.randl(seed_index, 0, n_global); // random global index
382 
383  // Locate the rank and local index of the data desired
384  const auto idx_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx));
385  const auto idx_rank = std::distance(offsets.begin(), idx_offset_iter);
386  const auto idx_local_idx = idx - *idx_offset_iter;
387 
388  // Local available data can be inserted into the re-sample, non-local data is add the the
389  // needs from other ranks
390  if (idx_rank == rank)
391  replicate[i] = data[idx_local_idx];
392  else
393  needs[idx_rank].emplace_back(idx_local_idx, i);
394  }
395 
396  // Advance the random number generator to the end of the global vector
397  for (std::size_t i = offsets[rank] + n_local; i < n_global; ++i)
398  generator.randl(seed_index, 0, n_global);
399 
400  // Collect the values to be returned to the various processors
401  std::unordered_map<processor_id_type, std::vector<std::pair<T, std::size_t>>> returns;
402  auto return_functor =
403  [&data, &returns](processor_id_type pid,
404  const std::vector<std::pair<std::size_t, std::size_t>> & indices)
405  {
406  auto & returns_pid = returns[pid];
407  for (const auto & idx : indices)
408  returns_pid.emplace_back(data[idx.first], idx.second);
409  };
410  Parallel::push_parallel_vector_data(*comm_ptr, needs, return_functor);
411 
412  // Receive resampled values from the various processors
413  auto recv_functor =
414  [&replicate](processor_id_type, const std::vector<std::pair<T, std::size_t>> & values)
415  {
416  for (const auto & value : values)
417  replicate[value.second] = value.first;
418  };
419  Parallel::push_parallel_vector_data(*comm_ptr, returns, recv_functor);
420  }
421  return replicate;
422 }
423 
424 template <typename T, typename ActionFunctor>
425 void
426 MooseUtils::resampleWithFunctor(const std::vector<T> & data,
427  const ActionFunctor & functor,
428  MooseRandom & generator,
429  const std::size_t seed_index,
430  const libMesh::Parallel::Communicator * comm_ptr)
431 {
432  const std::size_t n_local = data.size();
433 
434  if (!comm_ptr || comm_ptr->size() == 1)
435  {
436  for (std::size_t j = 0; j < n_local; ++j)
437  {
438  auto index = generator.randl(seed_index, 0, n_local);
439  functor(data[index]);
440  }
441  }
442  else
443  {
444  // Compute the global size of the vector
445  std::size_t n_global = n_local;
446  comm_ptr->sum(n_global);
447 
448  // Compute the vector data offsets, the scope cleans up the "n_local" vector
449  std::vector<std::size_t> offsets(comm_ptr->size());
450  {
451  std::vector<std::size_t> local_sizes;
452  comm_ptr->allgather(n_local, local_sizes);
453  for (std::size_t i = 0; i < local_sizes.size() - 1; ++i)
454  offsets[i + 1] = offsets[i] + local_sizes[i];
455  }
456 
457  // Advance the random number generator to the current offset
458  const auto rank = comm_ptr->rank();
459  for (std::size_t i = 0; i < offsets[rank]; ++i)
460  generator.randl(seed_index, 0, n_global);
461 
462  // Compute the needs for this processor
463  std::unordered_map<processor_id_type, std::vector<std::size_t>> indices;
464  for (std::size_t i = 0; i < n_local; ++i)
465  {
466  const auto idx = generator.randl(seed_index, 0, n_global); // random global index
467 
468  // Locate the rank and local index of the data desired
469  const auto idx_offset_iter = std::prev(std::upper_bound(offsets.begin(), offsets.end(), idx));
470  const auto idx_rank = std::distance(offsets.begin(), idx_offset_iter);
471  const auto idx_local_idx = idx - *idx_offset_iter;
472 
473  // Push back the index to appropriate rank
474  indices[idx_rank].push_back(idx_local_idx);
475  }
476 
477  // Advance the random number generator to the end of the global vector
478  for (std::size_t i = offsets[rank] + n_local; i < n_global; ++i)
479  generator.randl(seed_index, 0, n_global);
480 
481  // Send the indices to the appropriate rank and have the calculator do its work
482  auto act_functor =
483  [&functor, &data](processor_id_type /*pid*/, const std::vector<std::size_t> & indices)
484  {
485  for (const auto & idx : indices)
486  functor(data[idx]);
487  };
488  Parallel::push_parallel_vector_data(*comm_ptr, indices, act_functor);
489  }
490 }
491 
492 template <typename T>
493 void
494 MooseUtils::swap(std::vector<T> & data,
495  const std::size_t idx0,
496  const std::size_t idx1,
497  const libMesh::Parallel::Communicator & comm)
498 {
499  MooseUtils::swap<T>(data, idx0, idx1, &comm);
500 }
501 
502 template <typename T>
503 void
504 MooseUtils::shuffle(std::vector<T> & data, MooseRandom & generator, const std::size_t seed_index)
505 {
506  return MooseUtils::shuffle(data, generator, seed_index, nullptr);
507 }
508 
509 template <typename T>
510 void
511 MooseUtils::shuffle(std::vector<T> & data,
512  MooseRandom & generator,
513  const libMesh::Parallel::Communicator & comm)
514 {
515  return MooseUtils::shuffle(data, generator, 0, &comm);
516 }
517 
518 template <typename T>
519 void
520 MooseUtils::shuffle(std::vector<T> & data,
521  MooseRandom & generator,
522  const std::size_t seed_index,
523  const libMesh::Parallel::Communicator & comm)
524 {
525  return MooseUtils::shuffle(data, generator, seed_index, &comm);
526 }
527 
528 template <typename T>
529 std::vector<T>
530 MooseUtils::resample(const std::vector<T> & data,
531  MooseRandom & generator,
532  const std::size_t seed_index)
533 {
534  return MooseUtils::resample(data, generator, seed_index, nullptr);
535 }
536 
537 template <typename T>
538 std::vector<T>
539 MooseUtils::resample(const std::vector<T> & data,
540  MooseRandom & generator,
541  const libMesh::Parallel::Communicator & comm)
542 {
543  return MooseUtils::resample(data, generator, 0, &comm);
544 }
545 
546 template <typename T>
547 std::vector<T>
548 MooseUtils::resample(const std::vector<T> & data,
549  MooseRandom & generator,
550  const std::size_t seed_index,
551  const libMesh::Parallel::Communicator & comm)
552 {
553  return MooseUtils::resample(data, generator, seed_index, &comm);
554 }
555 
556 template <typename T, typename ActionFunctor>
557 void
558 MooseUtils::resampleWithFunctor(const std::vector<T> & data,
559  const ActionFunctor & functor,
560  MooseRandom & generator,
561  const std::size_t seed_index)
562 {
563  return MooseUtils::resampleWithFunctor(data, functor, generator, seed_index, nullptr);
564 }
565 
566 template <typename T, typename ActionFunctor>
567 void
568 MooseUtils::resampleWithFunctor(const std::vector<T> & data,
569  const ActionFunctor & functor,
570  MooseRandom & generator,
571  const libMesh::Parallel::Communicator & comm)
572 {
573  return MooseUtils::resampleWithFunctor(data, functor, generator, 0, &comm);
574 }
575 
576 template <typename T, typename ActionFunctor>
577 void
578 MooseUtils::resampleWithFunctor(const std::vector<T> & data,
579  const ActionFunctor & functor,
580  MooseRandom & generator,
581  const std::size_t seed_index,
582  const libMesh::Parallel::Communicator & comm)
583 {
584  return MooseUtils::resampleWithFunctor(data, functor, generator, seed_index, &comm);
585 }
void allgather(const T &send_data, std::vector< T, A > &recv_data) const
static uint32_t randl()
This method returns the next random number (long format) from the generator.
Definition: MooseRandom.h:71
void swap(std::vector< T > &data, const std::size_t idx0, const std::size_t idx1, const libMesh::Parallel::Communicator &comm)
Swap function for serial or distributed vector of data.
Definition: Shuffle.h:494
void shuffle(std::vector< T > &data, MooseRandom &generator, const std::size_t seed_index=0)
Shuffle function for serial or distributed vector of data that shuffles in place. ...
Definition: Shuffle.h:504
processor_id_type rank() const
std::vector< T > resample(const std::vector< T > &data, MooseRandom &generator, const std::size_t seed_index=0)
Randomly resample a vector of data, allowing a value to be repeated.
Definition: Shuffle.h:530
processor_id_type size() const
void resampleWithFunctor(const std::vector< T > &data, const ActionFunctor &functor, MooseRandom &generator, const std::size_t seed_index=0)
Randomly resample a vector of data and apply a functor, allowing a value to be repeated.
Definition: Shuffle.h:558
uint8_t processor_id_type
void swap(std::vector< T > &data, const std::size_t idx0, const std::size_t idx1, const libMesh::Parallel::Communicator *comm_ptr=nullptr)
Definition: Shuffle.h:155
Real value(unsigned n, unsigned alpha, unsigned beta, Real x)
This class encapsulates a useful, consistent, cross-platform random number generator with multiple ut...
Definition: MooseRandom.h:37
unsigned int idx(const ElemType type, const unsigned int nx, const unsigned int i, const unsigned int j)