LCOV - code coverage report
Current view: top level - include/parallel - threads_pthread.h (source / functions) Hit Total Coverage
Test: libMesh/libmesh: #4229 (6a9aeb) with base 727f46 Lines: 90 90 100.0 %
Date: 2025-08-19 19:27:09 Functions: 113 122 92.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // The libMesh Finite Element Library.
       2             : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
       3             : 
       4             : // This library is free software; you can redistribute it and/or
       5             : // modify it under the terms of the GNU Lesser General Public
       6             : // License as published by the Free Software Foundation; either
       7             : // version 2.1 of the License, or (at your option) any later version.
       8             : 
       9             : // This library is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             : // Lesser General Public License for more details.
      13             : 
      14             : // You should have received a copy of the GNU Lesser General Public
      15             : // License along with this library; if not, write to the Free Software
      16             : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      17             : 
      18             : #ifndef LIBMESH_THREADS_PTHREAD_H
      19             : #define LIBMESH_THREADS_PTHREAD_H
      20             : 
      21             : // Do not try to #include this header directly, it is designed to be
      22             : // #included directly by threads.h
      23             : #ifndef LIBMESH_SQUASH_HEADER_WARNING
      24             : # warning "This file is designed to be included through libmesh/threads.h"
      25             : #else
      26             : 
      27             : #ifdef LIBMESH_HAVE_PTHREAD
      28             : 
      29             : // C++ includes
      30             : #ifdef LIBMESH_HAVE_CXX11_THREAD
      31             : # include <thread>
      32             : #endif
      33             : 
      34             : #include <pthread.h>
      35             : #include <algorithm>
      36             : #include <vector>
      37             : #include <memory> // std::unique_ptr, std::make_unique
      38             : 
      39             : #ifdef __APPLE__
      40             : #  ifdef __MAC_10_12
      41             : #    include <os/lock.h>
      42             : #else
      43             : #    include <libkern/OSAtomic.h>
      44             : #  endif
      45             : #endif
      46             : 
      47             : // Thread-Local-Storage macros
      48             : #ifdef LIBMESH_HAVE_CXX11_THREAD
      49             : #  define LIBMESH_TLS_TYPE(type)  thread_local type
      50             : #  define LIBMESH_TLS_REF(value)  (value)
      51             : #else // Maybe support gcc __thread eventually?
      52             : #  define LIBMESH_TLS_TYPE(type)  type
      53             : #  define LIBMESH_TLS_REF(value)  (value)
      54             : #endif
      55             : 
      56             : namespace libMesh
      57             : {
      58             : 
      59             : namespace Threads
      60             : {
      61             : 
      62             : 
      63             : #ifdef LIBMESH_HAVE_CXX11_THREAD
      64             : /**
      65             :  * Use std::thread when available.
      66             :  */
      67             : typedef std::thread Thread;
      68             : 
      69             : #else
      70             : 
      71             : /**
      72             :  * Use the non-concurrent placeholder.
      73             :  */
      74             : typedef NonConcurrentThread Thread;
      75             : 
      76             : #endif // LIBMESH_HAVE_CXX11_THREAD
      77             : 
      78             : 
      79             : /**
      80             :  * Spin mutex.  Implements mutual exclusion by busy-waiting in user
      81             :  * space for the lock to be acquired.
      82             :  */
      83             : #ifdef __APPLE__
      84             : #ifdef __MAC_10_12
      85             : class spin_mutex
      86             : {
      87             : public:
      88             :   spin_mutex() { ulock = OS_UNFAIR_LOCK_INIT; }
      89             :   ~spin_mutex() = default;
      90             : 
      91             :   void lock () { os_unfair_lock_lock(&ulock); }
      92             :   void unlock () { os_unfair_lock_unlock(&ulock); }
      93             : 
      94             :   class scoped_lock
      95             :   {
      96             :   public:
      97             :     scoped_lock () : smutex(nullptr) {}
      98             :     explicit scoped_lock ( spin_mutex & in_smutex ) : smutex(&in_smutex) { smutex->lock(); }
      99             : 
     100             :     ~scoped_lock () { release(); }
     101             : 
     102             :     void acquire ( spin_mutex & in_smutex ) { smutex = &in_smutex; smutex->lock(); }
     103             :     void release () { if (smutex) smutex->unlock(); smutex = nullptr; }
     104             : 
     105             :   private:
     106             :     spin_mutex * smutex;
     107             :   };
     108             : 
     109             : private:
     110             :   os_unfair_lock ulock;
     111             : };
     112             : #else
     113             : class spin_mutex
     114             : {
     115             : public:
     116             :   spin_mutex() : slock(0) {} // The convention is that the lock being zero is _unlocked_
     117             :   ~spin_mutex() = default;
     118             : 
     119             :   void lock () { OSSpinLockLock(&slock); }
     120             :   void unlock () { OSSpinLockUnlock(&slock); }
     121             : 
     122             :   class scoped_lock
     123             :   {
     124             :   public:
     125             :     scoped_lock () : smutex(nullptr) {}
     126             :     explicit scoped_lock ( spin_mutex & in_smutex ) : smutex(&in_smutex) { smutex->lock(); }
     127             : 
     128             :     ~scoped_lock () { release(); }
     129             : 
     130             :     void acquire ( spin_mutex & in_smutex ) { smutex = &in_smutex; smutex->lock(); }
     131             :     void release () { if (smutex) smutex->unlock(); smutex = nullptr; }
     132             : 
     133             :   private:
     134             :     spin_mutex * smutex;
     135             :   };
     136             : 
     137             : private:
     138             :   OSSpinLock slock;
     139             : };
     140             : #endif
     141             : #else
     142             : class spin_mutex
     143             : {
     144             : public:
     145             :   // Might want to use PTHREAD_MUTEX_ADAPTIVE_NP on Linux, but it's not available on OSX.
     146       31208 :   spin_mutex() { pthread_spin_init(&slock, PTHREAD_PROCESS_PRIVATE); }
     147      137580 :   ~spin_mutex() { pthread_spin_destroy(&slock); }
     148             : 
     149  7467916156 :   void lock () { pthread_spin_lock(&slock); }
     150  6451589454 :   void unlock () { pthread_spin_unlock(&slock); }
     151             : 
     152             :   class scoped_lock
     153             :   {
     154             :   public:
     155             :     scoped_lock () : smutex(nullptr) {}
     156      287218 :     explicit scoped_lock ( spin_mutex & in_smutex ) : smutex(&in_smutex) { smutex->lock(); }
     157             : 
     158      287218 :     ~scoped_lock () { release(); }
     159             : 
     160           8 :     void acquire ( spin_mutex & in_smutex ) { smutex = &in_smutex; smutex->lock(); }
     161      355024 :     void release () { if (smutex) smutex->unlock(); smutex = nullptr; }
     162             : 
     163             :   private:
     164             :     spin_mutex * smutex;
     165             :   };
     166             : 
     167             : private:
     168             :   pthread_spinlock_t slock;
     169             : };
     170             : #endif // __APPLE__
     171             : 
     172             : 
     173             : 
     174             : /**
     175             :  * Recursive mutex.  Implements mutual exclusion by busy-waiting in user
     176             :  * space for the lock to be acquired.
     177             :  */
     178             : class recursive_mutex
     179             : {
     180             : public:
     181             :   // Might want to use PTHREAD_MUTEX_ADAPTIVE_NP on Linux, but it's not available on OSX.
     182       16389 :   recursive_mutex()
     183       16389 :   {
     184       16389 :     pthread_mutexattr_init(&attr);
     185       16389 :     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
     186       16389 :     pthread_mutex_init(&mutex, &attr);
     187       16389 :   }
     188       16389 :   ~recursive_mutex() { pthread_mutex_destroy(&mutex); }
     189             : 
     190             :   void lock () { pthread_mutex_lock(&mutex); }
     191             :   void unlock () { pthread_mutex_unlock(&mutex); }
     192             : 
     193             :   class scoped_lock
     194             :   {
     195             :   public:
     196             :     scoped_lock () : rmutex(nullptr) {}
     197             :     explicit scoped_lock ( recursive_mutex & in_rmutex ) : rmutex(&in_rmutex) { rmutex->lock(); }
     198             : 
     199             :     ~scoped_lock () { release(); }
     200             : 
     201             :     void acquire ( recursive_mutex & in_rmutex ) { rmutex = &in_rmutex; rmutex->lock(); }
     202             :     void release () { if (rmutex) rmutex->unlock(); rmutex = nullptr; }
     203             : 
     204             :   private:
     205             :     recursive_mutex * rmutex;
     206             :   };
     207             : 
     208             : private:
     209             :   pthread_mutex_t mutex;
     210             :   pthread_mutexattr_t attr;
     211             : };
     212             : 
     213             : template <typename Range>
     214      177398 : unsigned int num_pthreads(Range & range)
     215             : {
     216      365410 :   std::size_t min = std::min((std::size_t)libMesh::n_threads(), range.size());
     217      271404 :   return min > 0 ? cast_int<unsigned int>(min) : 1;
     218             : }
     219             : 
     220             : template <typename Range, typename Body>
     221             : class RangeBody
     222             : {
     223             : public:
     224             :   Range * range;
     225             :   Body * body;
     226             : };
     227             : 
     228             : template <typename Range, typename Body>
     229      133783 : void * run_body(void * args)
     230             : {
     231      133783 :   RangeBody<Range, Body> * range_body = (RangeBody<Range, Body> *)args;
     232             : 
     233      133783 :   Body & body = *range_body->body;
     234      133783 :   Range & range = *range_body->range;
     235             : 
     236      133783 :   body(range);
     237             : 
     238      133783 :   return nullptr;
     239             : }
     240             : 
     241             : /**
     242             :  * Scheduler to manage threads.
     243             :  */
     244             : class task_scheduler_init
     245             : {
     246             : public:
     247             :   static const int automatic = -1;
     248         462 :   explicit task_scheduler_init (int = automatic) {}
     249             :   void initialize (int = automatic) {}
     250             :   void terminate () {}
     251             : };
     252             : 
     253             : //-------------------------------------------------------------------
     254             : /**
     255             :  * Dummy "splitting object" used to distinguish splitting constructors
     256             :  * from copy constructors.
     257             :  */
     258             : class split {};
     259             : 
     260             : 
     261             : 
     262             : 
     263             : //-------------------------------------------------------------------
     264             : /**
     265             :  * Execute the provided function object in parallel on the specified
     266             :  * range.
     267             :  */
     268             : template <typename Range, typename Body>
     269             : inline
     270      571805 : void parallel_for (const Range & range, const Body & body)
     271             : {
     272       22914 :   Threads::BoolAcquire b(Threads::in_threads);
     273             : 
     274             :   // If we're running in serial - just run!
     275      571805 :   if (libMesh::n_threads() == 1)
     276             :   {
     277      513047 :     body(range);
     278        1046 :     return;
     279             :   }
     280             : 
     281       87472 :   DisablePerfLogInScope disable_perf;
     282             : 
     283       36890 :   unsigned int n_threads = num_pthreads(range);
     284             : 
     285      102494 :   std::vector<std::unique_ptr<Range>> ranges(n_threads);
     286       80626 :   std::vector<RangeBody<const Range, const Body>> range_bodies(n_threads);
     287       58758 :   std::vector<pthread_t> threads(n_threads);
     288             : 
     289             :   // Create the ranges for each thread
     290       58758 :   std::size_t range_size = range.size() / n_threads;
     291             : 
     292       58758 :   typename Range::const_iterator current_beginning = range.begin();
     293             : 
     294      154827 :   for (unsigned int i=0; i<n_threads; i++)
     295             :     {
     296       35305 :       std::size_t this_range_size = range_size;
     297             : 
     298       96069 :       if (i+1 == n_threads)
     299       58758 :         this_range_size += range.size() % n_threads; // Give the last one the remaining work to do
     300             : 
     301      131374 :       ranges[i] = std::make_unique<Range>(range, current_beginning, current_beginning + this_range_size);
     302             : 
     303      131374 :       current_beginning = current_beginning + this_range_size;
     304             :     }
     305             : 
     306             :   // Create the RangeBody arguments
     307      154827 :   for (unsigned int i=0; i<n_threads; i++)
     308             :     {
     309      131374 :       range_bodies[i].range = ranges[i].get();
     310       96069 :       range_bodies[i].body = &body;
     311             :     }
     312             : 
     313             :   // Create the threads.  It may seem redundant to wrap a pragma in
     314             :   // #ifdefs... but GCC warns about an "unknown pragma" if it
     315             :   // encounters this line of code when -fopenmp is not passed to the
     316             :   // compiler.
     317             : #ifdef LIBMESH_HAVE_OPENMP
     318       58758 : #pragma omp parallel for schedule (static)
     319             : #endif
     320             :   for (int i=0; i<static_cast<int>(n_threads); i++)
     321             :     {
     322             : #if !LIBMESH_HAVE_OPENMP
     323             :       pthread_create(&threads[i], nullptr, &run_body<Range, Body>, (void *)&range_bodies[i]);
     324             : #else
     325             :       run_body<Range, Body>((void *)&range_bodies[i]);
     326             : #endif
     327             :     }
     328             : 
     329             : #if !LIBMESH_HAVE_OPENMP
     330             :   // Wait for them to finish
     331             : 
     332             :   // The use of 'int' instead of unsigned for the iteration variable
     333             :   // is deliberate here.  This is an OpenMP loop, and some older
     334             :   // compilers warn when you don't use int for the loop index.  The
     335             :   // reason has to do with signed vs. unsigned integer overflow
     336             :   // behavior and optimization.
     337             :   // http://blog.llvm.org/2011/05/what-every-c-programmer-should-know.html
     338             :   for (int i=0; i<static_cast<int>(n_threads); i++)
     339             :     pthread_join(threads[i], nullptr);
     340             : #endif
     341       15022 : }
     342             : 
     343             : /**
     344             :  * Execute the provided function object in parallel on the specified
     345             :  * range with the specified partitioner.
     346             :  */
     347             : template <typename Range, typename Body, typename Partitioner>
     348             : inline
     349             : void parallel_for (const Range & range, const Body & body, const Partitioner &)
     350             : {
     351             :   parallel_for (range, body);
     352             : }
     353             : 
     354             : /**
     355             :  * Execute the provided reduction operation in parallel on the specified
     356             :  * range.
     357             :  */
     358             : template <typename Range, typename Body>
     359             : inline
     360     3673890 : void parallel_reduce (const Range & range, Body & body)
     361             : {
     362       73070 :   Threads::BoolAcquire b(Threads::in_threads);
     363             : 
     364             :   // If we're running in serial - just run!
     365     3673890 :   if (libMesh::n_threads() == 1)
     366             :   {
     367     3461244 :     body(range);
     368         932 :     return;
     369             :   }
     370             : 
     371      288552 :   DisablePerfLogInScope disable_perf;
     372             : 
     373      140508 :   unsigned int n_threads = num_pthreads(range);
     374             : 
     375      356922 :   std::vector<std::unique_ptr<Range>> ranges(n_threads);
     376      356922 :   std::vector<std::unique_ptr<Body>> managed_bodies(n_threads); // bodies we are responsible for
     377      284784 :   std::vector<Body *> bodies(n_threads); // dumb pointers to managed_bodies
     378      284784 :   std::vector<RangeBody<Range, Body>> range_bodies(n_threads);
     379             : 
     380             :   // Create n_threads-1 copies of "body". We manage the lifetime of
     381             :   // these copies with std::unique_ptrs.
     382      289640 :   for (unsigned int i=1; i<n_threads; i++)
     383       88294 :     managed_bodies[i] = std::make_unique<Body>(body, Threads::split());
     384             : 
     385             :   // Set up the "bodies" vector. Use the passed in body for the first
     386             :   // one, point to managed_bodies entries for the others.
     387      212646 :   bodies[0] = &body;
     388      289640 :   for (unsigned int i=1; i<n_threads; i++)
     389      103334 :     bodies[i] = managed_bodies[i].get();
     390             : 
     391             :   // Create the ranges for each thread
     392      212646 :   std::size_t range_size = range.size() / n_threads;
     393             : 
     394      212646 :   typename Range::const_iterator current_beginning = range.begin();
     395             : 
     396      502286 :   for (unsigned int i=0; i<n_threads; i++)
     397             :     {
     398       98478 :       std::size_t this_range_size = range_size;
     399             : 
     400      289640 :       if (i+1 == n_threads)
     401      212646 :         this_range_size += range.size() % n_threads; // Give the last one the remaining work to do
     402             : 
     403      388118 :       ranges[i] = std::make_unique<Range>(range, current_beginning, current_beginning + this_range_size);
     404             : 
     405      388118 :       current_beginning = current_beginning + this_range_size;
     406             :     }
     407             : 
     408             :   // Create the RangeBody arguments
     409      502286 :   for (unsigned int i=0; i<n_threads; i++)
     410             :     {
     411      388118 :       range_bodies[i].range = ranges[i].get();
     412      388118 :       range_bodies[i].body = bodies[i];
     413             :     }
     414             : 
     415             :   // Create the threads
     416      284784 :   std::vector<pthread_t> threads(n_threads);
     417             : 
     418             :   // It may seem redundant to wrap a pragma in #ifdefs... but GCC
     419             :   // warns about an "unknown pragma" if it encounters this line of
     420             :   // code when -fopenmp is not passed to the compiler.
     421             : #ifdef LIBMESH_HAVE_OPENMP
     422      212646 : #pragma omp parallel for schedule (static)
     423             : #endif
     424             :   // The use of 'int' instead of unsigned for the iteration variable
     425             :   // is deliberate here.  This is an OpenMP loop, and some older
     426             :   // compilers warn when you don't use int for the loop index.  The
     427             :   // reason has to do with signed vs. unsigned integer overflow
     428             :   // behavior and optimization.
     429             :   // http://blog.llvm.org/2011/05/what-every-c-programmer-should-know.html
     430             :   for (int i=0; i<static_cast<int>(n_threads); i++)
     431             :     {
     432             : #if !LIBMESH_HAVE_OPENMP
     433             :       pthread_create(&threads[i], nullptr, &run_body<Range, Body>, (void *)&range_bodies[i]);
     434             : #else
     435             :       run_body<Range, Body>((void *)&range_bodies[i]);
     436             : #endif
     437             :     }
     438             : 
     439             : #if !LIBMESH_HAVE_OPENMP
     440             :   // Wait for them to finish
     441             :   for (unsigned int i=0; i<n_threads; i++)
     442             :     pthread_join(threads[i], nullptr);
     443             : #endif
     444             : 
     445             :   // Join them all down to the original Body
     446      289640 :   for (unsigned int i=n_threads-1; i != 0; i--)
     447      129674 :     bodies[i-1]->join(*bodies[i]);
     448       68370 : }
     449             : 
     450             : /**
     451             :  * Execute the provided reduction operation in parallel on the specified
     452             :  * range with the specified partitioner.
     453             :  */
     454             : template <typename Range, typename Body, typename Partitioner>
     455             : inline
     456             : void parallel_reduce (const Range & range, Body & body, const Partitioner &)
     457             : {
     458             :   parallel_reduce(range, body);
     459             : }
     460             : 
     461             : 
     462             : /**
     463             :  * Defines atomic operations which can only be executed on a
     464             :  * single thread at a time.
     465             :  */
     466             : template <typename T>
     467             : class atomic
     468             : {
     469             : public:
     470         462 :   atomic () : val(0) {}
     471       16381 :   operator T () { return val; }
     472             : 
     473             :   T operator=( T value )
     474             :   {
     475             :     spin_mutex::scoped_lock lock(smutex);
     476             :     val = value;
     477             :     return val;
     478             :   }
     479             : 
     480             :   atomic<T> & operator=( const atomic<T> & value )
     481             :   {
     482             :     spin_mutex::scoped_lock lock(smutex);
     483             :     val = value;
     484             :     return *this;
     485             :   }
     486             : 
     487             : 
     488             :   T operator+=(T value)
     489             :   {
     490             :     spin_mutex::scoped_lock lock(smutex);
     491             :     val += value;
     492             :     return val;
     493             :   }
     494             : 
     495             :   T operator-=(T value)
     496             :   {
     497             :     spin_mutex::scoped_lock lock(smutex);
     498             :     val -= value;
     499             :     return val;
     500             :   }
     501             : 
     502  1567026230 :   T operator++()
     503             :   {
     504    42729276 :     spin_mutex::scoped_lock lock(smutex);
     505  2404658778 :     val++;
     506  1567026230 :     return val;
     507             :   }
     508             : 
     509             :   T operator++(int)
     510             :   {
     511             :     spin_mutex::scoped_lock lock(smutex);
     512             :     val++;
     513             :     return val;
     514             :   }
     515             : 
     516  4633270039 :   T operator--()
     517             :   {
     518    12207561 :     spin_mutex::scoped_lock lock(smutex);
     519  4811964193 :     val--;
     520  4633270039 :     return val;
     521             :   }
     522             : 
     523             :   T operator--(int)
     524             :   {
     525             :     spin_mutex::scoped_lock lock(smutex);
     526             :     val--;
     527             :     return val;
     528             :   }
     529             : 
     530             : private:
     531             :   T val;
     532             :   spin_mutex smutex;
     533             : };
     534             : 
     535             : } // namespace Threads
     536             : 
     537             : } // namespace libMesh
     538             : 
     539             : #endif // #ifdef LIBMESH_HAVE_PTHREAD
     540             : 
     541             : #endif // LIBMESH_SQUASH_HEADER_WARNING
     542             : 
     543             : #endif // LIBMESH_THREADS_PTHREAD_H

Generated by: LCOV version 1.14