doxygen/libmesh/threads__pthread_8h_source.html

 // The libMesh Finite Element Library.
 // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner

 // This library is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 2.1 of the License, or (at your option) any later version.

 // This library is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // Lesser General Public License for more details.

 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 #ifndef LIBMESH_THREADS_PTHREAD_H
 #define LIBMESH_THREADS_PTHREAD_H

 // Do not try to #include this header directly, it is designed to be
 // #included directly by threads.h
 #ifndef LIBMESH_SQUASH_HEADER_WARNING
 # warning "This file is designed to be included through libmesh/threads.h"
 #else

 #ifdef LIBMESH_HAVE_PTHREAD

 // C++ includes
 #ifdef LIBMESH_HAVE_CXX11_THREAD
 # include <thread>
 #endif

 #include <pthread.h>
 #include <algorithm>
 #include <vector>
 #include <memory> // std::unique_ptr, std::make_unique

 #ifdef __APPLE__
 #  ifdef __MAC_10_12
 #    include <os/lock.h>
 #else
 #    include <libkern/OSAtomic.h>
 #  endif
 #endif

 // Thread-Local-Storage macros
 #ifdef LIBMESH_HAVE_CXX11_THREAD
 #  define LIBMESH_TLS_TYPE(type)  thread_local type
 #  define LIBMESH_TLS_REF(value)  (value)
 #else // Maybe support gcc __thread eventually?
 #  define LIBMESH_TLS_TYPE(type)  type
 #  define LIBMESH_TLS_REF(value)  (value)
 #endif

 namespace libMesh
 {

 namespace Threads
 {


 #ifdef LIBMESH_HAVE_CXX11_THREAD

 typedef std::thread Thread;

 #else

 typedef NonConcurrentThread Thread;

 #endif // LIBMESH_HAVE_CXX11_THREAD


 #ifdef __APPLE__
 #ifdef __MAC_10_12
 class spin_mutex
 {
 public:
   spin_mutex() { ulock = OS_UNFAIR_LOCK_INIT; }
   ~spin_mutex() = default;

   void lock () { os_unfair_lock_lock(&ulock); }
   void unlock () { os_unfair_lock_unlock(&ulock); }

   class scoped_lock
   {
   public:
     scoped_lock () : smutex(nullptr) {}
     explicit scoped_lock ( spin_mutex & in_smutex ) : smutex(&in_smutex) { smutex->lock(); }

     ~scoped_lock () { release(); }

     void acquire ( spin_mutex & in_smutex ) { smutex = &in_smutex; smutex->lock(); }
     void release () { if (smutex) smutex->unlock(); smutex = nullptr; }

   private:
     spin_mutex * smutex;
   };

 private:
   os_unfair_lock ulock;
 };
 #else
 class spin_mutex
 {
 public:
   spin_mutex() : slock(0) {} // The convention is that the lock being zero is _unlocked_
   ~spin_mutex() = default;

   void lock () { OSSpinLockLock(&slock); }
   void unlock () { OSSpinLockUnlock(&slock); }

   class scoped_lock
   {
   public:
     scoped_lock () : smutex(nullptr) {}
     explicit scoped_lock ( spin_mutex & in_smutex ) : smutex(&in_smutex) { smutex->lock(); }

     ~scoped_lock () { release(); }

     void acquire ( spin_mutex & in_smutex ) { smutex = &in_smutex; smutex->lock(); }
     void release () { if (smutex) smutex->unlock(); smutex = nullptr; }

   private:
     spin_mutex * smutex;
   };

 private:
   OSSpinLock slock;
 };
 #endif
 #else
 class spin_mutex
 {
 public:
   // Might want to use PTHREAD_MUTEX_ADAPTIVE_NP on Linux, but it's not available on OSX.
   spin_mutex() { pthread_spin_init(&slock, PTHREAD_PROCESS_PRIVATE); }
   ~spin_mutex() { pthread_spin_destroy(&slock); }

   void lock () { pthread_spin_lock(&slock); }
   void unlock () { pthread_spin_unlock(&slock); }

   class scoped_lock
   {
   public:
     scoped_lock () : smutex(nullptr) {}
     explicit scoped_lock ( spin_mutex & in_smutex ) : smutex(&in_smutex) { smutex->lock(); }

     ~scoped_lock () { release(); }

     void acquire ( spin_mutex & in_smutex ) { smutex = &in_smutex; smutex->lock(); }
     void release () { if (smutex) smutex->unlock(); smutex = nullptr; }

   private:
     spin_mutex * smutex;
   };

 private:
   pthread_spinlock_t slock;
 };
 #endif // __APPLE__


 class recursive_mutex
 {
 public:
   // Might want to use PTHREAD_MUTEX_ADAPTIVE_NP on Linux, but it's not available on OSX.
   recursive_mutex()
   {
     pthread_mutexattr_init(&attr);
     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
     pthread_mutex_init(&mutex, &attr);
   }
   ~recursive_mutex() { pthread_mutex_destroy(&mutex); }

   void lock () { pthread_mutex_lock(&mutex); }
   void unlock () { pthread_mutex_unlock(&mutex); }

   class scoped_lock
   {
   public:
     scoped_lock () : rmutex(nullptr) {}
     explicit scoped_lock ( recursive_mutex & in_rmutex ) : rmutex(&in_rmutex) { rmutex->lock(); }

     ~scoped_lock () { release(); }

     void acquire ( recursive_mutex & in_rmutex ) { rmutex = &in_rmutex; rmutex->lock(); }
     void release () { if (rmutex) rmutex->unlock(); rmutex = nullptr; }

   private:
     recursive_mutex * rmutex;
   };

 private:
   pthread_mutex_t mutex;
   pthread_mutexattr_t attr;
 };

 template <typename Range>
 unsigned int num_pthreads(Range & range)
 {
   std::size_t min = std::min((std::size_t)libMesh::n_threads(), range.size());
   return min > 0 ? cast_int<unsigned int>(min) : 1;
 }

 template <typename Range, typename Body>
 class RangeBody
 {
 public:
   Range * range;
   Body * body;
 };

 template <typename Range, typename Body>
 void * run_body(void * args)
 {
   RangeBody<Range, Body> * range_body = (RangeBody<Range, Body> *)args;

   Body & body = *range_body->body;
   Range & range = *range_body->range;

   body(range);

   return nullptr;
 }

 class task_scheduler_init
 {
 public:
   static const int automatic = -1;
   explicit task_scheduler_init (int = automatic) {}
   void initialize (int = automatic) {}
   void terminate () {}
 };

 //-------------------------------------------------------------------
 class split {};


 //-------------------------------------------------------------------
 template <typename Range, typename Body>
 inline
 void parallel_for (const Range & range, const Body & body)
 {
   Threads::BoolAcquire b(Threads::in_threads);

   // If we're running in serial - just run!
   if (libMesh::n_threads() == 1)
   {
     body(range);
     return;
   }

   DisablePerfLogInScope disable_perf;

   unsigned int n_threads = num_pthreads(range);

   std::vector<std::unique_ptr<Range>> ranges(n_threads);
   std::vector<RangeBody<const Range, const Body>> range_bodies(n_threads);
   std::vector<pthread_t> threads(n_threads);

   // Create the ranges for each thread
   std::size_t range_size = range.size() / n_threads;

   typename Range::const_iterator current_beginning = range.begin();

   for (unsigned int i=0; i<n_threads; i++)
     {
       std::size_t this_range_size = range_size;

       if (i+1 == n_threads)
         this_range_size += range.size() % n_threads; // Give the last one the remaining work to do

       ranges[i] = std::make_unique<Range>(range, current_beginning, current_beginning + this_range_size);

       current_beginning = current_beginning + this_range_size;
     }

   // Create the RangeBody arguments
   for (unsigned int i=0; i<n_threads; i++)
     {
       range_bodies[i].range = ranges[i].get();
       range_bodies[i].body = &body;
     }

   // Create the threads.  It may seem redundant to wrap a pragma in
   // #ifdefs... but GCC warns about an "unknown pragma" if it
   // encounters this line of code when -fopenmp is not passed to the
   // compiler.
 #ifdef LIBMESH_HAVE_OPENMP
 #pragma omp parallel for schedule (static)
 #endif
   for (int i=0; i<static_cast<int>(n_threads); i++)
     {
 #if !LIBMESH_HAVE_OPENMP
       pthread_create(&threads[i], nullptr, &run_body<Range, Body>, (void *)&range_bodies[i]);
 #else
       run_body<Range, Body>((void *)&range_bodies[i]);
 #endif
     }

 #if !LIBMESH_HAVE_OPENMP
   // Wait for them to finish

   // The use of 'int' instead of unsigned for the iteration variable
   // is deliberate here.  This is an OpenMP loop, and some older
   // compilers warn when you don't use int for the loop index.  The
   // reason has to do with signed vs. unsigned integer overflow
   // behavior and optimization.
   // http://blog.llvm.org/2011/05/what-every-c-programmer-should-know.html
   for (int i=0; i<static_cast<int>(n_threads); i++)
     pthread_join(threads[i], nullptr);
 #endif
 }

 template <typename Range, typename Body, typename Partitioner>
 inline
 void parallel_for (const Range & range, const Body & body, const Partitioner &)
 {
   parallel_for (range, body);
 }

 template <typename Range, typename Body>
 inline
 void parallel_reduce (const Range & range, Body & body)
 {
   Threads::BoolAcquire b(Threads::in_threads);

   // If we're running in serial - just run!
   if (libMesh::n_threads() == 1)
   {
     body(range);
     return;
   }

   DisablePerfLogInScope disable_perf;

   unsigned int n_threads = num_pthreads(range);

   std::vector<std::unique_ptr<Range>> ranges(n_threads);
   std::vector<std::unique_ptr<Body>> managed_bodies(n_threads); // bodies we are responsible for
   std::vector<Body *> bodies(n_threads); // dumb pointers to managed_bodies
   std::vector<RangeBody<Range, Body>> range_bodies(n_threads);

   // Create n_threads-1 copies of "body". We manage the lifetime of
   // these copies with std::unique_ptrs.
   for (unsigned int i=1; i<n_threads; i++)
     managed_bodies[i] = std::make_unique<Body>(body, Threads::split());

   // Set up the "bodies" vector. Use the passed in body for the first
   // one, point to managed_bodies entries for the others.
   bodies[0] = &body;
   for (unsigned int i=1; i<n_threads; i++)
     bodies[i] = managed_bodies[i].get();

   // Create the ranges for each thread
   std::size_t range_size = range.size() / n_threads;

   typename Range::const_iterator current_beginning = range.begin();

   for (unsigned int i=0; i<n_threads; i++)
     {
       std::size_t this_range_size = range_size;

       if (i+1 == n_threads)
         this_range_size += range.size() % n_threads; // Give the last one the remaining work to do

       ranges[i] = std::make_unique<Range>(range, current_beginning, current_beginning + this_range_size);

       current_beginning = current_beginning + this_range_size;
     }

   // Create the RangeBody arguments
   for (unsigned int i=0; i<n_threads; i++)
     {
       range_bodies[i].range = ranges[i].get();
       range_bodies[i].body = bodies[i];
     }

   // Create the threads
   std::vector<pthread_t> threads(n_threads);

   // It may seem redundant to wrap a pragma in #ifdefs... but GCC
   // warns about an "unknown pragma" if it encounters this line of
   // code when -fopenmp is not passed to the compiler.
 #ifdef LIBMESH_HAVE_OPENMP
 #pragma omp parallel for schedule (static)
 #endif
   // The use of 'int' instead of unsigned for the iteration variable
   // is deliberate here.  This is an OpenMP loop, and some older
   // compilers warn when you don't use int for the loop index.  The
   // reason has to do with signed vs. unsigned integer overflow
   // behavior and optimization.
   // http://blog.llvm.org/2011/05/what-every-c-programmer-should-know.html
   for (int i=0; i<static_cast<int>(n_threads); i++)
     {
 #if !LIBMESH_HAVE_OPENMP
       pthread_create(&threads[i], nullptr, &run_body<Range, Body>, (void *)&range_bodies[i]);
 #else
       run_body<Range, Body>((void *)&range_bodies[i]);
 #endif
     }

 #if !LIBMESH_HAVE_OPENMP
   // Wait for them to finish
   for (unsigned int i=0; i<n_threads; i++)
     pthread_join(threads[i], nullptr);
 #endif

   // Join them all down to the original Body
   for (unsigned int i=n_threads-1; i != 0; i--)
     bodies[i-1]->join(*bodies[i]);
 }

 template <typename Range, typename Body, typename Partitioner>
 inline
 void parallel_reduce (const Range & range, Body & body, const Partitioner &)
 {
   parallel_reduce(range, body);
 }


 template <typename T>
 class atomic
 {
 public:
   atomic () : val(0) {}
   operator T () { return val; }

   T operator=( T value )
   {
     spin_mutex::scoped_lock lock(smutex);
     val = value;
     return val;
   }

   atomic<T> & operator=( const atomic<T> & value )
   {
     spin_mutex::scoped_lock lock(smutex);
     val = value;
     return *this;
   }


   T operator+=(T value)
   {
     spin_mutex::scoped_lock lock(smutex);
     val += value;
     return val;
   }

   T operator-=(T value)
   {
     spin_mutex::scoped_lock lock(smutex);
     val -= value;
     return val;
   }

   T operator++()
   {
     spin_mutex::scoped_lock lock(smutex);
     val++;
     return val;
   }

   T operator++(int)
   {
     spin_mutex::scoped_lock lock(smutex);
     val++;
     return val;
   }

   T operator--()
   {
     spin_mutex::scoped_lock lock(smutex);
     val--;
     return val;
   }

   T operator--(int)
   {
     spin_mutex::scoped_lock lock(smutex);
     val--;
     return val;
   }

 private:
   T val;
   spin_mutex smutex;
 };

 } // namespace Threads

 } // namespace libMesh

 #endif // #ifdef LIBMESH_HAVE_PTHREAD

 #endif // LIBMESH_SQUASH_HEADER_WARNING

 #endif // LIBMESH_THREADS_PTHREAD_H
libMesh::Threads::recursive_mutex::recursive_mutex
recursive_mutex()
Definition: threads_pthread.h:182

libMesh::Threads::RangeBody
Definition: threads_pthread.h:221

libMesh::Threads::spin_mutex::slock
OSSpinLock slock
Definition: threads_pthread.h:138

libMesh::Threads::spin_mutex::slock
pthread_spinlock_t slock
Definition: threads_pthread.h:168

libMesh::Threads::recursive_mutex::scoped_lock::scoped_lock
scoped_lock()
Definition: threads_pthread.h:196

libMesh::Threads::spin_mutex::ulock
os_unfair_lock ulock
Definition: threads_pthread.h:110

libMesh::Threads::task_scheduler_init::terminate
void terminate()
Definition: threads_pthread.h:250

libMesh::Threads::recursive_mutex::scoped_lock::scoped_lock
scoped_lock(recursive_mutex &in_rmutex)
Definition: threads_pthread.h:197

libMesh::Threads::Thread
NonConcurrentThread Thread
Use the non-concurrent placeholder.
Definition: threads_none.h:43

libMesh::Threads::spin_mutex::unlock
void unlock()
Definition: threads_pthread.h:92

libMesh::n_threads
unsigned int n_threads()
Definition: libmesh_base.h:96

libMesh::Threads::atomic::operator--
T operator--()
Definition: threads_pthread.h:516

libMesh::Threads::recursive_mutex::scoped_lock::~scoped_lock
~scoped_lock()
Definition: threads_pthread.h:199

libMesh::Threads::recursive_mutex::scoped_lock::acquire
void acquire(recursive_mutex &in_rmutex)
Definition: threads_pthread.h:201

libMesh::Threads::recursive_mutex::unlock
void unlock()
Definition: threads_pthread.h:191

libMesh::Threads::task_scheduler_init::initialize
void initialize(int=automatic)
Definition: threads_pthread.h:249

libMesh::Threads::parallel_for
void parallel_for(const Range &range, const Body &body)
Execute the provided function object in parallel on the specified range.
Definition: threads_none.h:73

libMesh::Threads::atomic::operator++
T operator++(int)
Definition: threads_pthread.h:509

libMesh::Threads::run_body
void * run_body(void *args)
Definition: threads_pthread.h:229

libMesh::Threads::atomic::atomic
atomic()
Definition: threads_pthread.h:470

libMesh::Threads::spin_mutex::scoped_lock::release
void release()
Definition: threads_none.h:140

libMesh::Threads::spin_mutex
Spin mutex.
Definition: threads_none.h:127

libMesh::Threads::RangeBody::range
Range * range
Definition: threads_pthread.h:224

libMesh
The libMesh namespace provides an interface to certain functionality in the library.

libMesh::Threads::in_threads
bool in_threads
A boolean which is true iff we are in a Threads:: function It may be useful to assert(!Threadsin_thre...
Definition: threads.C:32

libMesh::Threads::spin_mutex::~spin_mutex
~spin_mutex()
Definition: threads_pthread.h:147

libMesh::Threads::recursive_mutex::attr
pthread_mutexattr_t attr
Definition: threads_pthread.h:210

libMesh::Threads::task_scheduler_init
tbb::task_scheduler_init task_scheduler_init
Scheduler to manage threads.
Definition: threads_tbb.h:71

libMesh::Threads::atomic::operator+=
T operator+=(T value)
Definition: threads_pthread.h:488

libMesh::Threads::spin_mutex::~spin_mutex
~spin_mutex()=default

libMesh::Threads::spin_mutex::scoped_lock::~scoped_lock
~scoped_lock()
Definition: threads_pthread.h:100

libMesh::Threads::spin_mutex::scoped_lock::scoped_lock
scoped_lock(spin_mutex &in_smutex)
Definition: threads_pthread.h:98

libMesh::Threads::spin_mutex::scoped_lock::acquire
void acquire(spin_mutex &in_smutex)
Definition: threads_pthread.h:102

libMesh::Threads::atomic::operator++
T operator++()
Definition: threads_pthread.h:502

libMesh::Threads::recursive_mutex::scoped_lock::rmutex
recursive_mutex * rmutex
Definition: threads_pthread.h:205

libMesh::Threads::recursive_mutex::~recursive_mutex
~recursive_mutex()
Definition: threads_pthread.h:188

libMesh::Threads::atomic::val
T val
Definition: threads_pthread.h:531

libMesh::Threads::spin_mutex::lock
void lock()
Definition: threads_pthread.h:91

libMesh::Threads::recursive_mutex::scoped_lock::release
void release()
Definition: threads_none.h:161

libMesh::Threads::atomic::operator-=
T operator-=(T value)
Definition: threads_pthread.h:495

libMesh::Threads::atomic::operator=
T operator=(T value)
Definition: threads_pthread.h:473

libMesh::Threads::spin_mutex::spin_mutex
spin_mutex()
Definition: threads_pthread.h:88

libMesh::Threads::spin_mutex
tbb::spin_mutex spin_mutex
Spin mutex.
Definition: threads_tbb.h:167

libMesh::Threads::split
tbb::split split
Dummy "splitting object" used to distinguish splitting constructors from copy constructors.
Definition: threads_tbb.h:77

libMesh::Threads::atomic
Defines atomic operations which can only be executed on a single thread at a time.
Definition: threads_none.h:172

libMesh::Threads::atomic::operator=
atomic< T > & operator=(const atomic< T > &value)
Definition: threads_pthread.h:480

libMesh::Threads::atomic::operator--
T operator--(int)
Definition: threads_pthread.h:523

libMesh::Threads::spin_mutex::scoped_lock::scoped_lock
scoped_lock()
Definition: threads_pthread.h:97

libMesh::Threads::spin_mutex::scoped_lock::smutex
spin_mutex * smutex
Definition: threads_pthread.h:106

value
static const bool value
Definition: xdr_io.C:54

libMesh::Threads::parallel_reduce
void parallel_reduce(const Range &range, Body &body)
Execute the provided reduction operation in parallel on the specified range.
Definition: threads_none.h:101

libMesh::Threads::recursive_mutex
Recursive mutex.
Definition: threads_none.h:150

libMesh::Threads::task_scheduler_init::automatic
static const int automatic
Definition: threads_none.h:51

libMesh::Threads::recursive_mutex::mutex
pthread_mutex_t mutex
Definition: threads_pthread.h:209

libMesh::Threads::recursive_mutex::lock
void lock()
Definition: threads_pthread.h:190

libMesh::Threads::atomic::smutex
spin_mutex smutex
Definition: threads_pthread.h:532

libMesh::Threads::num_pthreads
unsigned int num_pthreads(Range &range)
Definition: threads_pthread.h:214

libMesh::Threads::task_scheduler_init::task_scheduler_init
task_scheduler_init(int=automatic)
Definition: threads_pthread.h:248

libMesh::Threads::RangeBody::body
Body * body
Definition: threads_pthread.h:225