libMesh
threads_tbb.h
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 #ifndef LIBMESH_THREADS_TBB_H
20 #define LIBMESH_THREADS_TBB_H
21 
22 // Do not try to #include this header directly, it is designed to be
23 // #included directly by threads.h
24 #ifndef LIBMESH_SQUASH_HEADER_WARNING
25 # warning "This file is designed to be included through libmesh/threads.h"
26 #else
27 
28 #ifdef LIBMESH_HAVE_TBB_API
29 
30 // Standard library includes needed for oneTBB compatibility wrappers.
31 // Placed outside the warning-suppressor block since these are standard headers.
32 #ifdef LIBMESH_HAVE_ONETBB
33 # include <atomic>
34 # include <memory>
35 # include <mutex>
36 # include <thread>
37 #endif
38 
39 // libMesh includes
40 #include "libmesh/ignore_warnings.h"
41 
42 // Threading building blocks includes — era-specific
43 #ifdef LIBMESH_HAVE_ONETBB
44 // oneTBB (>= 2021): tbb_stddef.h, task_scheduler_init.h, atomic.h,
45 // tbb_thread.h, and recursive_mutex.h no longer exist.
46 # include "tbb/version.h"
47 # include "tbb/global_control.h"
48 #else
49 // Legacy Intel TBB (< 2021)
50 # include "tbb/tbb_stddef.h"
51 # include "tbb/task_scheduler_init.h"
52 # include "tbb/atomic.h"
53 # include "tbb/tbb_thread.h"
54 # include "tbb/recursive_mutex.h"
55 #endif
56 
57 // Headers present in both eras (oneTBB provides tbb/*.h compatibility wrappers)
58 #include "tbb/blocked_range.h"
59 #include "tbb/parallel_for.h"
60 #include "tbb/parallel_reduce.h"
61 #include "tbb/partitioner.h"
62 #include "tbb/spin_mutex.h"
63 #include "tbb/enumerable_thread_specific.h"
64 #include "tbb/task_arena.h"
65 
66 #include "libmesh/restore_warnings.h"
67 
68 #define TBB_VERSION_LESS_THAN(major,minor) \
69  ((LIBMESH_DETECTED_TBB_VERSION_MAJOR < (major) || \
70  (LIBMESH_DETECTED_TBB_VERSION_MAJOR == (major) && (LIBMESH_DETECTED_TBB_VERSION_MINOR < (minor)))) ? 1 : 0)
71 
72 // Thread-Local-Storage macros
73 #define LIBMESH_TLS_TYPE(type) tbb::enumerable_thread_specific<type>
74 #define LIBMESH_TLS_REF(value) (value).local()
75 
76 namespace libMesh
77 {
78 
79 namespace Threads
80 {
81 
88 #ifndef LIBMESH_HAVE_ONETBB
89 typedef tbb::tbb_thread Thread;
90 #else
91 typedef std::thread Thread;
92 #endif
93 
101 #ifndef LIBMESH_HAVE_ONETBB
103 #else
105 {
106 public:
107  static const int automatic = -1;
108 
109  explicit task_scheduler_init (int n = automatic)
110  {
111  if (n != automatic && n > 0)
112  _gc = std::make_unique<tbb::global_control>(
113  tbb::global_control::max_allowed_parallelism,
114  static_cast<std::size_t>(n));
115  }
116 
117  void initialize (int n = automatic)
118  {
119  if (n != automatic && n > 0)
120  _gc = std::make_unique<tbb::global_control>(
121  tbb::global_control::max_allowed_parallelism,
122  static_cast<std::size_t>(n));
123  }
124 
125  void terminate () { _gc.reset(); }
126 
127 private:
128  std::unique_ptr<tbb::global_control> _gc;
129 };
130 #endif // LIBMESH_HAVE_ONETBB
131 
137 
142 template <typename Range, typename Body>
143 inline
144 void parallel_for (const Range & range, const Body & body,
145  unsigned int n_threads = libMesh::n_threads())
146 {
147  libmesh_error_msg_if(n_threads > libMesh::n_threads(),
148  "Requested n_threads (" << n_threads << ") exceeds the "
149  "global thread count (" << libMesh::n_threads() << ").");
150  BoolAcquire set_in_threads(in_threads);
151 
152  if (n_threads > 1)
153  {
155 
156  DisablePerfLogInScope disable_perf;
157  if (n_threads == libMesh::n_threads())
158  tbb::parallel_for (range, body, tbb::auto_partitioner());
159  else
160  {
161  tbb::task_arena arena(static_cast<int>(n_threads));
162  arena.execute([&]{ tbb::parallel_for(range, body, tbb::auto_partitioner()); });
163  }
164  }
165  else
166  body(range);
167 }
168 
169 
170 
175 template <typename Range, typename Body, typename Partitioner>
176 inline
177 void parallel_for (const Range & range, const Body & body, const Partitioner & partitioner,
178  unsigned int n_threads = libMesh::n_threads())
179 {
180  libmesh_error_msg_if(n_threads > libMesh::n_threads(),
181  "Requested n_threads (" << n_threads << ") exceeds the "
182  "global thread count (" << libMesh::n_threads() << ").");
183  BoolAcquire set_in_threads(in_threads);
184 
185  if (n_threads > 1)
186  {
188 
189  DisablePerfLogInScope disable_perf;
190  if (n_threads == libMesh::n_threads())
191  tbb::parallel_for (range, body, partitioner);
192  else
193  {
194  tbb::task_arena arena(static_cast<int>(n_threads));
195  arena.execute([&]{ tbb::parallel_for(range, body, partitioner); });
196  }
197  }
198  else
199  body(range);
200 }
201 
202 
203 
208 template <typename Range, typename Body>
209 inline
210 void parallel_reduce (const Range & range, Body & body,
211  unsigned int n_threads = libMesh::n_threads())
212 {
213  libmesh_error_msg_if(n_threads > libMesh::n_threads(),
214  "Requested n_threads (" << n_threads << ") exceeds the "
215  "global thread count (" << libMesh::n_threads() << ").");
216  BoolAcquire set_in_threads(in_threads);
217 
218  if (n_threads > 1)
219  {
220  Threads::RAIIAcquire<int> set_active_threads(Threads::active_threads, n_threads);
221 
222  DisablePerfLogInScope disable_perf;
223  if (n_threads == libMesh::n_threads())
224  tbb::parallel_reduce (range, body, tbb::auto_partitioner());
225  else
226  {
227  tbb::task_arena arena(static_cast<int>(n_threads));
228  arena.execute([&]{ tbb::parallel_reduce(range, body, tbb::auto_partitioner()); });
229  }
230  }
231  else
232  body(range);
233 }
234 
235 
236 
241 template <typename Range, typename Body, typename Partitioner>
242 inline
243 void parallel_reduce (const Range & range, Body & body, const Partitioner & partitioner,
244  unsigned int n_threads = libMesh::n_threads())
245 {
246  libmesh_error_msg_if(n_threads > libMesh::n_threads(),
247  "Requested n_threads (" << n_threads << ") exceeds the "
248  "global thread count (" << libMesh::n_threads() << ").");
249  BoolAcquire set_in_threads(in_threads);
250 
251  if (n_threads > 1)
252  {
253  Threads::RAIIAcquire<int> set_active_threads(Threads::active_threads, n_threads);
254 
255  DisablePerfLogInScope disable_perf;
256  if (n_threads == libMesh::n_threads())
257  tbb::parallel_reduce (range, body, partitioner);
258  else
259  {
260  tbb::task_arena arena(static_cast<int>(n_threads));
261  arena.execute([&]{ tbb::parallel_reduce(range, body, partitioner); });
262  }
263  }
264  else
265  body(range);
266 }
267 
268 
269 
277 
286 #ifndef LIBMESH_HAVE_ONETBB
288 #else
289 class recursive_mutex
290 {
291 public:
292  void lock () { _m.lock(); }
293  void unlock () { _m.unlock(); }
294 
295  class scoped_lock
296  {
297  public:
298  scoped_lock () : _rm(nullptr) {}
299  explicit scoped_lock (recursive_mutex & rm) : _rm(nullptr) { acquire(rm); }
301 
302  void acquire (recursive_mutex & rm) { _rm = &rm; _rm->lock(); }
303  void release () { if (_rm) { _rm->unlock(); _rm = nullptr; } }
304 
305  private:
307  };
308 
309 private:
311 };
312 #endif // LIBMESH_HAVE_ONETBB
313 
322 #ifndef LIBMESH_HAVE_ONETBB
323 template <typename T>
324 class atomic : public tbb::atomic<T> {};
325 #else
326 template <typename T>
327 class atomic : public std::atomic<T>
328 {
329 public:
330  atomic () : std::atomic<T>(0) {}
331 };
332 #endif // LIBMESH_HAVE_ONETBB
333 
334 } // namespace Threads
335 
336 } // namespace libMesh
337 
338 #endif // LIBMESH_HAVE_TBB_API
339 
340 #endif // LIBMESH_SQUASH_HEADER_WARNING
341 
342 #endif // LIBMESH_THREADS_TBB_H
void parallel_for(const Range &range, const Body &body, unsigned int n_threads=libMesh::n_threads())
Execute the provided function object in parallel on the specified range.
Definition: threads_none.h:73
unsigned int n_threads()
Definition: libmesh_base.h:109
We use a class to turn Threads::in_threads on and off, to be exception-safe.
Definition: threads.h:65
The libMesh namespace provides an interface to certain functionality in the library.
std::unique_ptr< tbb::global_control > _gc
Definition: threads_tbb.h:128
bool in_threads
A boolean which is true iff we are in a Threads:: function It may be useful to assert(!Threadsin_thre...
Definition: threads.C:33
We use a class to turn perf logging off and on within threads, to be exception-safe and to avoid forc...
Definition: threads.h:94
void initialize(int n=automatic)
Definition: threads_tbb.h:117
The Partitioner class provides a uniform interface for partitioning algorithms.
Definition: partitioner.h:51
tbb::task_scheduler_init task_scheduler_init
Scheduler to manage the TBB thread pool.
Definition: threads_tbb.h:102
void parallel_reduce(const Range &range, Body &body, unsigned int n_threads=libMesh::n_threads())
Execute the provided reduction operation in parallel on the specified range.
Definition: threads_none.h:109
tbb::spin_mutex spin_mutex
Spin mutex.
tbb::split split
Dummy "splitting object" used to distinguish splitting constructors from copy constructors.
Definition: threads_tbb.h:136
Defines atomic operations which can only be executed on a single thread at a time.
Definition: threads_none.h:188
int active_threads
An integer which is set to the number of active threads when we are in a Threads:: parallel operation...
Definition: threads.C:32
RAIIAcquire< bool, true, true > BoolAcquire
Definition: threads.h:84
Scheduler to manage threads.
Definition: threads_none.h:48
tbb::recursive_mutex recursive_mutex
Recursive mutex.
Definition: threads_tbb.h:287
NonConcurrentThread Thread
Use the non-concurrent placeholder.
Definition: threads_none.h:43