TIMPI
op_function.h
Go to the documentation of this file.
1 // The TIMPI Message-Passing Parallelism Library.
2 // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 #ifndef TIMPI_OP_FUNCTION_H
20 #define TIMPI_OP_FUNCTION_H
21 
22 #include "timpi/timpi_config.h"
23 
24 #include "timpi/semipermanent.h"
25 #include "timpi/timpi_init.h"
26 #include "timpi/timpi_call_mpi.h"
27 
28 #ifdef TIMPI_HAVE_MPI
29 # include "timpi/ignore_warnings.h"
30 # include "mpi.h"
31 # include "timpi/restore_warnings.h"
32 #endif // TIMPI_HAVE_MPI
33 
34 // Boost include if necessary for float128
35 #ifdef TIMPI_DEFAULT_QUADRUPLE_PRECISION
36 # include <boost/multiprecision/float128.hpp>
37 #endif
38 
39 // C++ includes
40 #include <functional>
41 #include <type_traits>
42 
43 
44 
45 namespace TIMPI
46 {
47 #ifdef TIMPI_DEFAULT_QUADRUPLE_PRECISION
48 # ifdef TIMPI_HAVE_MPI
49 # define TIMPI_MPI_QUAD_BINARY(funcname) \
50 inline void \
51 timpi_mpi_quad_##funcname(void * a, void * b, int * len, MPI_Datatype *) \
52 { \
53  const int size = *len; \
54  \
55  TIMPI_DEFAULT_SCALAR_TYPE *in = static_cast<TIMPI_DEFAULT_SCALAR_TYPE*>(a); \
56  TIMPI_DEFAULT_SCALAR_TYPE *inout = static_cast<TIMPI_DEFAULT_SCALAR_TYPE*>(b); \
57  for (int i=0; i != size; ++i) \
58  inout[i] = std::funcname(in[i],inout[i]); \
59 }
60 
61 # define TIMPI_MPI_QUAD_LOCATOR(funcname) \
62 inline void \
63 timpi_mpi_quad_##funcname##_location(void * a, void * b, int * len, MPI_Datatype *) \
64 { \
65  const int size = *len; \
66  \
67  typedef std::pair<TIMPI_DEFAULT_SCALAR_TYPE, int> dtype; \
68  \
69  dtype *in = static_cast<dtype*>(a); \
70  dtype *inout = static_cast<dtype*>(b); \
71  for (int i=0; i != size; ++i) \
72  { \
73  TIMPI_DEFAULT_SCALAR_TYPE old_inout = inout[i].first; \
74  inout[i].first = std::funcname(in[i].first,inout[i].first); \
75  if (old_inout != inout[i].first) \
76  inout[i].second = in[i].second; \
77  } \
78 }
79 
80 
81 # define TIMPI_MPI_QUAD_BINARY_FUNCTOR(funcname) \
82 inline void \
83 timpi_mpi_quad_##funcname(void * a, void * b, int * len, MPI_Datatype *) \
84 { \
85  const int size = *len; \
86  \
87  TIMPI_DEFAULT_SCALAR_TYPE *in = static_cast<TIMPI_DEFAULT_SCALAR_TYPE*>(a); \
88  TIMPI_DEFAULT_SCALAR_TYPE *inout = static_cast<TIMPI_DEFAULT_SCALAR_TYPE*>(b); \
89  for (int i=0; i != size; ++i) \
90  inout[i] = std::funcname<TIMPI_DEFAULT_SCALAR_TYPE>()(in[i],inout[i]); \
91 }
92 
93 
94 TIMPI_MPI_QUAD_BINARY(max)
95 TIMPI_MPI_QUAD_BINARY(min)
96 TIMPI_MPI_QUAD_LOCATOR(max)
97 TIMPI_MPI_QUAD_LOCATOR(min)
98 TIMPI_MPI_QUAD_BINARY_FUNCTOR(plus)
99 TIMPI_MPI_QUAD_BINARY_FUNCTOR(multiplies)
100 
101 # endif // TIMPI_HAVE_MPI
102 #endif // TIMPI_DEFAULT_QUADRUPLE_PRECISION
103 
104 
105 //-------------------------------------------------------------------
106 
107 // Templated helper class to be used with static_assert.
108 template<typename T>
109 struct opfunction_dependent_false : std::false_type
110 {};
111 
119 template <typename T>
121 {
122  // Get a slightly better compiler diagnostic if we have C++11
123  static_assert(opfunction_dependent_false<T>::value,
124  "Only specializations of OpFunction may be used, did you forget to include a header file (e.g. parallel_algebra.h)?");
125 
126  /*
127  * The unspecialized class defines none of these functions;
128  * specializations will need to define any functions that need to be
129  * usable.
130  *
131  * Most specializations will just return MPI_MIN, etc, but we'll use
132  * a whitelist rather than a default implementation, so that any
133  * attempt to perform a reduction on an unspecialized type will be a
134  * compile-time rather than a run-time failure.
135  */
136  // static MPI_Op max();
137  // static MPI_Op min();
138  // static MPI_Op sum();
139  // static MPI_Op product();
140  // static MPI_Op logical_and();
141  // static MPI_Op bitwise_and();
142  // static MPI_Op logical_or();
143  // static MPI_Op bitwise_or();
144  // static MPI_Op logical_xor();
145  // static MPI_Op bitwise_xor();
146  // static MPI_Op max_loc();
147  // static MPI_Op min_loc();
148 };
149 
150 
151 
152 // ------------------------------------------------------------
153 // Declare OpFunction specializations for C++ built-in types
154 
155 #ifdef TIMPI_HAVE_MPI
156 
157 #define TIMPI_PARALLEL_INTEGER_OPS(cxxtype) \
158  template<> \
159  class OpFunction<cxxtype> \
160  { \
161  public: \
162  static MPI_Op max() { return MPI_MAX; } \
163  static MPI_Op min() { return MPI_MIN; } \
164  static MPI_Op sum() { return MPI_SUM; } \
165  static MPI_Op product() { return MPI_PROD; } \
166  static MPI_Op logical_and() { return MPI_LAND; } \
167  static MPI_Op bitwise_and() { return MPI_BAND; } \
168  static MPI_Op logical_or() { return MPI_LOR; } \
169  static MPI_Op bitwise_or() { return MPI_BOR; } \
170  static MPI_Op logical_xor() { return MPI_LXOR; } \
171  static MPI_Op bitwise_xor() { return MPI_BXOR; } \
172  static MPI_Op max_location() { return MPI_MAXLOC; } \
173  static MPI_Op min_location() { return MPI_MINLOC; } \
174  }
175 
176 #define TIMPI_PARALLEL_FLOAT_OPS(cxxtype) \
177  template<> \
178  class OpFunction<cxxtype> \
179  { \
180  public: \
181  static MPI_Op max() { return MPI_MAX; } \
182  static MPI_Op min() { return MPI_MIN; } \
183  static MPI_Op sum() { return MPI_SUM; } \
184  static MPI_Op product() { return MPI_PROD; } \
185  static MPI_Op max_location() { return MPI_MAXLOC; } \
186  static MPI_Op min_location() { return MPI_MINLOC; } \
187  }
188 
189 #else
190 
191 #define TIMPI_PARALLEL_INTEGER_OPS(cxxtype) \
192  template<> \
193  class OpFunction<cxxtype> \
194  { \
195  }
196 
197 #define TIMPI_PARALLEL_FLOAT_OPS(cxxtype) \
198  template<> \
199  class OpFunction<cxxtype> \
200  { \
201  }
202 
203 #endif
204 
206 TIMPI_PARALLEL_INTEGER_OPS(signed char);
207 TIMPI_PARALLEL_INTEGER_OPS(unsigned char);
208 TIMPI_PARALLEL_INTEGER_OPS(short int);
209 TIMPI_PARALLEL_INTEGER_OPS(unsigned short int);
211 TIMPI_PARALLEL_INTEGER_OPS(unsigned int);
213 TIMPI_PARALLEL_INTEGER_OPS(long long);
214 TIMPI_PARALLEL_INTEGER_OPS(unsigned long);
215 TIMPI_PARALLEL_INTEGER_OPS(unsigned long long);
216 
219 TIMPI_PARALLEL_FLOAT_OPS(long double);
220 
221 #ifdef TIMPI_HAVE_MPI
222 // Helper class to avoid leaking MPI_Op when TIMPI exits
223 class ManageOp : public SemiPermanent
224 {
225 public:
226  ManageOp(MPI_User_function * func, int commute, MPI_Op * op)
227  : _op(op)
228  {
229  timpi_call_mpi(MPI_Op_create(func, commute, _op));
230  }
231 
232  virtual ~ManageOp() override {
233  MPI_Op_free(_op);
234  }
235 private:
236  MPI_Op * _op;
237 };
238 #endif
239 
240 #define TIMPI_MPI_OPFUNCTION(mpiname, funcname) \
241  static MPI_Op mpiname() { \
242  static MPI_Op TIMPI_MPI_##mpiname = MPI_OP_NULL; \
243  if (TIMPI_MPI_##mpiname == MPI_OP_NULL) \
244  SemiPermanent::add \
245  (std::make_unique<ManageOp>(timpi_mpi_##funcname, true, &TIMPI_MPI_##mpiname)); \
246  return TIMPI_MPI_##mpiname; \
247  }
248 
249 #ifdef TIMPI_DEFAULT_QUADRUPLE_PRECISION
250 # ifdef TIMPI_HAVE_MPI
251  template<>
252  class OpFunction<TIMPI_DEFAULT_SCALAR_TYPE>
253  {
254  public:
255  TIMPI_MPI_OPFUNCTION(max, quad_max)
256  TIMPI_MPI_OPFUNCTION(min, quad_min)
257  TIMPI_MPI_OPFUNCTION(sum, quad_plus)
258  TIMPI_MPI_OPFUNCTION(product, quad_multiplies)
259 
260  TIMPI_MPI_OPFUNCTION(max_location, quad_max_location)
261  TIMPI_MPI_OPFUNCTION(min_location, quad_min_location)
262  };
263 
264 # else
265  TIMPI_PARALLEL_FLOAT_OPS(TIMPI_DEFAULT_SCALAR_TYPE);
266 # endif
267 #endif // TIMPI_DEFAULT_QUADRUPLE_PRECISION
268 
269 #ifdef TIMPI_HAVE_MPI
270 
271 # define TIMPI_MPI_PAIR_BINARY(funcname) \
272 static inline void \
273 timpi_mpi_pair_##funcname(void * a, void * b, int * len, MPI_Datatype *) \
274 { \
275  const int size = *len; \
276  \
277  const std::pair<T,U> * in = static_cast<std::pair<T,U> *>(a); \
278  std::pair<T,U> * inout = static_cast<std::pair<T,U> *>(b); \
279  for (int i=0; i != size; ++i) \
280  { \
281  inout[i].first = std::funcname(in[i].first,inout[i].first); \
282  inout[i].second = std::funcname(in[i].second,inout[i].second); \
283  } \
284 }
285 
286 # define TIMPI_MPI_PAIR_LOCATOR(funcname) \
287 static inline void \
288 timpi_mpi_pair_##funcname##_location(void * a, void * b, int * len, MPI_Datatype *) \
289 { \
290  const int size = *len; \
291  \
292  typedef std::pair<std::pair<T,U>, int> dtype; \
293  \
294  dtype *in = static_cast<dtype*>(a); \
295  dtype *inout = static_cast<dtype*>(b); \
296  for (int i=0; i != size; ++i) \
297  { \
298  std::pair<T,U> old_inout = inout[i].first; \
299  inout[i].first.first = std::funcname(in[i].first.first, inout[i].first.first); \
300  inout[i].first.second = std::funcname(in[i].first.second,inout[i].first.second); \
301  if (old_inout != inout[i].first) \
302  inout[i].second = in[i].second; \
303  } \
304 }
305 
306 
307 # define TIMPI_MPI_PAIR_BINARY_FUNCTOR(funcname) \
308 static inline void \
309 timpi_mpi_pair_##funcname(void * a, void * b, int * len, MPI_Datatype *) \
310 { \
311  const int size = *len; \
312  \
313  const std::pair<T,U> * in = static_cast<std::pair<T,U> *>(a); \
314  std::pair<T,U> * inout = static_cast<std::pair<T,U> *>(b); \
315  for (int i=0; i != size; ++i) \
316  { \
317  inout[i].first = std::funcname<T>()(in[i].first, inout[i].first); \
318  inout[i].second = std::funcname<T>()(in[i].second,inout[i].second); \
319  } \
320 }
321 
322 
323  template<typename T, typename U>
324  class OpFunction<std::pair<T,U>>
325  {
326  TIMPI_MPI_PAIR_BINARY(max)
327  TIMPI_MPI_PAIR_BINARY(min)
328  TIMPI_MPI_PAIR_LOCATOR(max)
329  TIMPI_MPI_PAIR_LOCATOR(min)
330  TIMPI_MPI_PAIR_BINARY_FUNCTOR(plus)
331  TIMPI_MPI_PAIR_BINARY_FUNCTOR(multiplies)
332 
333  public:
334  TIMPI_MPI_OPFUNCTION(max, pair_max)
335  TIMPI_MPI_OPFUNCTION(min, pair_min)
336  TIMPI_MPI_OPFUNCTION(sum, pair_plus)
337  TIMPI_MPI_OPFUNCTION(product, pair_multiplies)
338 
339  TIMPI_MPI_OPFUNCTION(max_location, pair_max_location)
340  TIMPI_MPI_OPFUNCTION(min_location, pair_min_location)
341  };
342 # else // TIMPI_HAVE_MPI
343  template<typename T, typename U>
344  class OpFunction<std::pair<T,U>> {};
345 #endif
346 
347 } // namespace TIMPI
348 
349 #endif // TIMPI_OP_FUNCTION_H
ManageOp(MPI_User_function *func, int commute, MPI_Op *op)
Definition: op_function.h:226
virtual ~ManageOp() override
Definition: op_function.h:232
TIMPI_PARALLEL_FLOAT_OPS(float)
The SemiPermanent "class" is basically just a place for a destructor vtable.
Definition: semipermanent.h:48
TIMPI_PARALLEL_INTEGER_OPS(char)
Templated class to provide the appropriate MPI reduction operations for use with built-in C types or ...
Definition: op_function.h:120