LCOV - code coverage report
Current view: top level - src/parallel - parallel_sort.C (source / functions) Hit Total Coverage
Test: libMesh/libmesh: #4229 (6a9aeb) with base 727f46 Lines: 90 91 98.9 %
Date: 2025-08-19 19:27:09 Functions: 14 21 66.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // The libMesh Finite Element Library.
       2             : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
       3             : 
       4             : // This library is free software; you can redistribute it and/or
       5             : // modify it under the terms of the GNU Lesser General Public
       6             : // License as published by the Free Software Foundation; either
       7             : // version 2.1 of the License, or (at your option) any later version.
       8             : 
       9             : // This library is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             : // Lesser General Public License for more details.
      13             : 
      14             : // You should have received a copy of the GNU Lesser General Public
      15             : // License along with this library; if not, write to the Free Software
      16             : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      17             : 
      18             : 
      19             : // Local includes
      20             : #include "libmesh/parallel_sort.h"
      21             : 
      22             : // libMesh includes
      23             : #include "libmesh/libmesh_common.h"
      24             : #include "libmesh/parallel_bin_sorter.h"
      25             : #include "libmesh/parallel_hilbert.h"
      26             : 
      27             : // TIMPI includes
      28             : #include "timpi/parallel_implementation.h"
      29             : #include "timpi/parallel_sync.h"
      30             : 
      31             : // C++ includes
      32             : #include <algorithm>
      33             : #include <iostream>
      34             : 
      35             : 
      36             : namespace libMesh
      37             : {
      38             : 
      39             : 
      40             : namespace Parallel {
      41             : 
      42             : // The Constructor sorts the local data using
      43             : // std::sort().  Therefore, the construction of
      44             : // a Parallel::Sort object takes O(n log n) time,
      45             : // where n is the length of _data.
      46             : template <typename KeyType, typename IdxType>
      47      698683 : Sort<KeyType,IdxType>::Sort(const Parallel::Communicator & comm_in,
      48             :                             std::vector<KeyType> & d) :
      49             :   ParallelObject(comm_in),
      50      698683 :   _n_procs(cast_int<processor_id_type>(comm_in.size())),
      51      698683 :   _proc_id(cast_int<processor_id_type>(comm_in.rank())),
      52      668215 :   _bin_is_sorted(false),
      53      729151 :   _data(d)
      54             : {
      55      698683 :   std::sort(_data.begin(), _data.end());
      56             : 
      57             :   // Allocate storage
      58      698683 :   _local_bin_sizes.resize(_n_procs);
      59      698683 : }
      60             : 
      61             : 
      62             : 
      63             : template <typename KeyType, typename IdxType>
      64      698683 : void Sort<KeyType,IdxType>::sort()
      65             : {
      66             :   // Find the global data size.  The sorting
      67             :   // algorithms assume they have a range to
      68             :   // work with, so catch the degenerate cases here
      69      698683 :   IdxType global_data_size = cast_int<IdxType>(_data.size());
      70             : 
      71      698683 :   this->comm().sum (global_data_size);
      72             : 
      73      698683 :   if (global_data_size < 2)
      74             :     {
      75             :       // the entire global range is either empty
      76             :       // or contains only one element
      77         140 :       _my_bin = _data;
      78             : 
      79         144 :       this->comm().allgather (static_cast<IdxType>(_my_bin.size()),
      80         140 :                               _local_bin_sizes);
      81             :     }
      82             :   else
      83             :     {
      84      713773 :       if (this->n_processors() > 1)
      85             :         {
      86      697941 :           this->binsort();
      87      697941 :           this->communicate_bins();
      88             :         }
      89             :       else
      90         602 :         _my_bin = _data;
      91             : 
      92      698543 :       this->sort_local_bin();
      93             :     }
      94             : 
      95             :   // Set sorted flag to true
      96      698683 :   _bin_is_sorted = true;
      97      698683 : }
      98             : 
      99             : 
     100             : 
     101             : template <typename KeyType, typename IdxType>
     102          69 : void Sort<KeyType,IdxType>::binsort()
     103             : {
     104             :   // Find the global min and max from all the
     105             :   // processors.
     106          71 :   std::vector<KeyType> global_min_max(2);
     107             : 
     108             :   // Insert the local min and max for this processor
     109          71 :   global_min_max[0] = -_data.front();
     110          69 :   global_min_max[1] =  _data.back();
     111             : 
     112             :   // Communicate to determine the global
     113             :   // min and max for all processors.
     114          69 :   this->comm().max(global_min_max);
     115             : 
     116             :   // Multiply the min by -1 to obtain the true min
     117          69 :   global_min_max[0] *= -1;
     118             : 
     119             :   // Bin-Sort based on the global min and max
     120          71 :   Parallel::BinSorter<KeyType> bs(this->comm(), _data);
     121          69 :   bs.binsort(_n_procs, global_min_max[1], global_min_max[0]);
     122             : 
     123             :   // Now save the local bin sizes in a vector so
     124             :   // we don't have to keep around the BinSorter.
     125         760 :   for (processor_id_type i=0; i<_n_procs; ++i)
     126         695 :     _local_bin_sizes[i] = bs.sizeof_bin(i);
     127          69 : }
     128             : 
     129             : 
     130             : 
     131             : #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
     132             : // Full specialization for HilbertIndices, there is a fair amount of
     133             : // code duplication here that could potentially be consolidated with the
     134             : // above method
     135             : template <>
     136      697872 : void Sort<Parallel::DofObjectKey,unsigned int>::binsort()
     137             : {
     138             :   // Find the global min and max from all the
     139             :   // processors.  Do this using MPI_Allreduce.
     140             :   Parallel::DofObjectKey
     141       15228 :     local_min,  local_max,
     142       15228 :     global_min, global_max;
     143             : 
     144      713100 :   if (_data.empty())
     145             :     {
     146             : #ifdef LIBMESH_ENABLE_UNIQUE_ID
     147      352425 :       local_min.first.rack0 = local_min.first.rack1 = local_min.first.rack2 = static_cast<Hilbert::inttype>(-1);
     148      352425 :       local_min.second = std::numeric_limits<unique_id_type>::max();
     149      349449 :       local_max.first.rack0 = local_max.first.rack1 = local_max.first.rack2 = 0;
     150        2976 :       local_max.second = 0;
     151             : #else
     152             :       local_min.rack0 = local_min.rack1 = local_min.rack2 = static_cast<Hilbert::inttype>(-1);
     153             :       local_max.rack0 = local_max.rack1 = local_max.rack2 = 0;
     154             : #endif
     155             :     }
     156             :   else
     157             :     {
     158       12252 :       local_min = _data.front();
     159       12252 :       local_max = _data.back();
     160             :     }
     161             : 
     162             :   MPI_Op hilbert_max, hilbert_min;
     163             : 
     164      697872 :   MPI_Op_create       ((MPI_User_function*)dofobjectkey_max_op, true, &hilbert_max);
     165      697872 :   MPI_Op_create       ((MPI_User_function*)dofobjectkey_min_op, true, &hilbert_min);
     166             : 
     167             :   // Communicate to determine the global
     168             :   // min and max for all processors.
     169      697872 :   MPI_Allreduce(&local_min,
     170             :                 &global_min,
     171             :                 1,
     172     1395744 :                 Parallel::StandardType<Parallel::DofObjectKey>(&local_min),
     173             :                 hilbert_min,
     174       45684 :                 this->comm().get());
     175             : 
     176      697872 :   MPI_Allreduce(&local_max,
     177             :                 &global_max,
     178             :                 1,
     179      728328 :                 Parallel::StandardType<Parallel::DofObjectKey>(&local_max),
     180             :                 hilbert_max,
     181       45684 :                 this->comm().get());
     182             : 
     183      697872 :   MPI_Op_free   (&hilbert_max);
     184      697872 :   MPI_Op_free   (&hilbert_min);
     185             : 
     186             :   // Bin-Sort based on the global min and max
     187      713100 :   Parallel::BinSorter<Parallel::DofObjectKey> bs(this->comm(),_data);
     188      697872 :   bs.binsort(_n_procs, global_max, global_min);
     189             : 
     190             :   // Now save the local bin sizes in a vector so
     191             :   // we don't have to keep around the BinSorter.
     192     7884152 :   for (processor_id_type i=0; i<_n_procs; ++i)
     193     7216736 :     _local_bin_sizes[i] = bs.sizeof_bin(i);
     194      697872 : }
     195             : 
     196             : #endif // #ifdef LIBMESH_HAVE_LIBHILBERT
     197             : 
     198             : 
     199             : template <typename KeyType, typename IdxType>
     200      697941 : void Sort<KeyType,IdxType>::communicate_bins()
     201             : {
     202             : #ifdef LIBMESH_HAVE_MPI
     203             :   // Find each section of our data to send
     204       15230 :   IdxType local_offset = 0;
     205       30460 :   std::map<processor_id_type, std::vector<KeyType> > pushed_keys, received_keys;
     206             : 
     207     7884912 :   for (processor_id_type i=0; i != _n_procs; ++i)
     208             :     {
     209     7186971 :       IdxType next_offset = local_offset + _local_bin_sizes[i];
     210     7186971 :         if (_local_bin_sizes[i])
     211             :           {
     212      642370 :             auto begin = _data.begin() + local_offset;
     213       19713 :             auto end = _data.begin() + next_offset;
     214      642370 :             pushed_keys[i].assign(begin, end);
     215             :           }
     216             : 
     217       30460 :       local_offset = next_offset;
     218             :     }
     219             : 
     220      737367 :   auto keys_action_functor =
     221     1205888 :     [& received_keys]
     222             :     (processor_id_type pid,
     223       19713 :      const std::vector<KeyType> & keys)
     224             :     {
     225      642370 :       received_keys[pid] = keys;
     226             :     };
     227             : 
     228             :   Parallel::push_parallel_vector_data
     229      697941 :     (this->comm(), pushed_keys, keys_action_functor);
     230             : 
     231       15230 :   std::size_t my_bin_size = 0;
     232     1340311 :   for (auto & p : received_keys)
     233      662083 :     my_bin_size += p.second.size();
     234             : 
     235      697941 :   _my_bin.clear();
     236      697941 :   _my_bin.reserve(my_bin_size);
     237             : 
     238     1340311 :   for (auto & p : received_keys)
     239      642370 :     _my_bin.insert(_my_bin.end(), p.second.begin(), p.second.end());
     240             : 
     241             : #ifdef DEBUG
     242       30460 :   std::vector<IdxType> global_bin_sizes = _local_bin_sizes;
     243             : 
     244       15230 :   this->comm().sum(global_bin_sizes);
     245             : 
     246       15230 :   libmesh_assert_equal_to
     247             :     (global_bin_sizes[this->processor_id()], _my_bin.size());
     248             : #endif
     249             : 
     250             : #endif // LIBMESH_HAVE_MPI
     251      697941 : }
     252             : 
     253             : 
     254             : 
     255             : template <typename KeyType, typename IdxType>
     256      698543 : void Sort<KeyType,IdxType>::sort_local_bin()
     257             : {
     258      698543 :   std::sort(_my_bin.begin(), _my_bin.end());
     259      698543 : }
     260             : 
     261             : 
     262             : 
     263             : template <typename KeyType, typename IdxType>
     264      698683 : const std::vector<KeyType> & Sort<KeyType,IdxType>::bin()
     265             : {
     266      698683 :   if (!_bin_is_sorted)
     267             :     {
     268           0 :       libMesh::out << "Warning! Bin is not yet sorted!" << std::endl;
     269             :     }
     270             : 
     271      698683 :   return _my_bin;
     272             : }
     273             : 
     274             : }
     275             : 
     276             : 
     277             : 
     278             : // Explicitly instantiate for int, double
     279             : template class LIBMESH_EXPORT Parallel::Sort<int, unsigned int>;
     280             : template class LIBMESH_EXPORT Parallel::Sort<double, unsigned int>;
     281             : #if defined(LIBMESH_HAVE_LIBHILBERT) && defined(LIBMESH_HAVE_MPI)
     282             : template class LIBMESH_EXPORT Parallel::Sort<Parallel::DofObjectKey, unsigned int>;
     283             : #endif
     284             : 
     285             : } // namespace libMesh

Generated by: LCOV version 1.14