19 #ifndef TIMPI_PARALLEL_IMPLEMENTATION_H 20 #define TIMPI_PARALLEL_IMPLEMENTATION_H 43 #ifndef TIMPI_HAVE_MPI 48 #ifdef TIMPI_DEFAULT_QUADRUPLE_PRECISION 49 # include <boost/multiprecision/float128.hpp> 54 #define TIMPI_LOG_SCOPE(f,c) 67 #include <type_traits> 83 #endif // TIMPI_HAVE_MPI 104 template <
typename T,
typename A1,
typename A2>
105 inline void pack_vector_bool(
const std::vector<bool,A1> & vec_in,
106 std::vector<T,A2> & vec_out)
108 unsigned int data_bits = 8*
sizeof(T);
109 std::size_t in_size = vec_in.size();
110 std::size_t out_size = in_size/data_bits + ((in_size%data_bits)?1:0);
112 vec_out.resize(out_size);
113 for (std::size_t i=0; i != in_size; ++i)
115 std::size_t index = i/data_bits;
116 std::size_t offset = i%data_bits;
117 vec_out[index] += (vec_in[i]?1u:0u) << offset;
124 template <
typename T,
typename A1,
typename A2>
125 inline void unpack_vector_bool(
const std::vector<T,A1> & vec_in,
126 std::vector<bool,A2> & vec_out)
128 unsigned int data_bits = 8*
sizeof(T);
130 std::size_t out_size = vec_out.size();
131 timpi_assert_equal_to
132 (out_size/data_bits + (out_size%data_bits?1:0), vec_in.size());
134 for (std::size_t i=0; i != out_size; ++i)
136 std::size_t index = i/data_bits;
137 std::size_t offset = i%data_bits;
138 vec_out[i] = (vec_in[index] >> offset) & 1;
143 #ifdef TIMPI_HAVE_MPI 146 template <
typename T1,
typename T2,
typename A1,
typename A2,
typename A3,
typename A4>
147 inline void send_receive_vec_of_vec(
const unsigned int dest_processor_id,
148 const std::vector<std::vector<T1,A1>,A2> & send_data,
149 const unsigned int source_processor_id,
150 std::vector<std::vector<T2,A3>,A4> & recv_data,
155 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
157 if (dest_processor_id == comm.
rank() &&
158 source_processor_id == comm.
rank())
160 recv_data = send_data;
165 comm.
send (dest_processor_id, send_data, req, send_tag);
166 comm.
receive (source_processor_id, recv_data, recv_tag);
170 #endif // TIMPI_HAVE_MPI 179 #ifdef TIMPI_HAVE_MPI 198 template <
typename T>
200 std::pair<data_type, std::unique_ptr<StandardType<std::pair<T,int>>>>
203 std::pair<data_type, std::unique_ptr<StandardType<std::pair<T,int>>>> return_val;
204 return_val.first = dataplusint_type<T>();
205 if (return_val.first == MPI_DATATYPE_NULL)
207 return_val.second.reset(
new StandardType<std::pair<T,int>>());
208 return_val.first = *return_val.second;
217 # define TIMPI_COUNT_TYPE MPI_COUNT 218 # define TIMPI_PACK_SIZE MPI_Pack_size_c 219 # define TIMPI_SEND MPI_Send_c 220 # define TIMPI_SSEND MPI_Ssend_c 221 # define TIMPI_ALLREDUCE MPI_Allreduce_c 222 # define TIMPI_IALLREDUCE MPI_Iallreduce_c 223 # define TIMPI_ISEND MPI_Isend_c 224 # define TIMPI_ISSEND MPI_Issend_c 225 # define TIMPI_PACK MPI_Pack_c 226 # define TIMPI_UNPACK MPI_Unpack_c 227 # define TIMPI_RECV MPI_Recv_c 228 # define TIMPI_IRECV MPI_Irecv_c 229 # define TIMPI_SENDRECV MPI_Sendrecv_c 230 # define TIMPI_ALLGATHERV MPI_Allgatherv_c 231 # define TIMPI_ALLGATHER MPI_Allgather_c 232 # define TIMPI_BCAST MPI_Bcast_c 233 # define TIMPI_GATHER MPI_Gather_c 234 # define TIMPI_GATHERV MPI_Gatherv_c 235 # define TIMPI_SCATTER MPI_Scatter_c 236 # define TIMPI_SCATTERV MPI_Scatterv_c 237 # define TIMPI_ALLTOALL MPI_Alltoall_c 240 # define TIMPI_COUNT_TYPE MPI_INT 241 # define TIMPI_PACK_SIZE MPI_Pack_size 242 # define TIMPI_SEND MPI_Send 243 # define TIMPI_SSEND MPI_Ssend 244 # define TIMPI_ALLREDUCE MPI_Allreduce 245 # define TIMPI_IALLREDUCE MPI_Iallreduce 246 # define TIMPI_ISEND MPI_Isend 247 # define TIMPI_ISSEND MPI_Issend 248 # define TIMPI_PACK MPI_Pack 249 # define TIMPI_UNPACK MPI_Unpack 250 # define TIMPI_RECV MPI_Recv 251 # define TIMPI_IRECV MPI_Irecv 252 # define TIMPI_SENDRECV MPI_Sendrecv 253 # define TIMPI_ALLGATHERV MPI_Allgatherv 254 # define TIMPI_ALLGATHER MPI_Allgather 255 # define TIMPI_BCAST MPI_Bcast 256 # define TIMPI_GATHER MPI_Gather 257 # define TIMPI_GATHERV MPI_Gatherv 258 # define TIMPI_SCATTER MPI_Scatter 259 # define TIMPI_SCATTERV MPI_Scatterv 260 # define TIMPI_ALLTOALL MPI_Alltoall 265 template <
typename T,
typename A1,
typename A2>
275 (TIMPI_PACK_SIZE (1, TIMPI_COUNT_TYPE, this->
get(), &packedsize));
277 std::size_t sendsize = packedsize;
279 const std::size_t n_vecs = buf.size();
281 for (std::size_t i = 0; i != n_vecs; ++i)
285 (TIMPI_PACK_SIZE (1, TIMPI_COUNT_TYPE, this->
get(), &packedsize));
287 sendsize += packedsize;
291 (TIMPI_PACK_SIZE (cast_int<CountType>(buf[i].
size()),
292 type, this->
get(), &packedsize));
294 sendsize += packedsize;
297 timpi_assert (sendsize );
304 const std::basic_string<T> & buf,
307 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
309 T * dataptr = buf.empty() ? nullptr :
const_cast<T *
>(buf.data());
311 timpi_assert_less(dest_processor_id, this->
size());
315 TIMPI_SSEND : TIMPI_SEND)
316 (dataptr, cast_int<CountType>(buf.size()),
323 template <
typename T>
325 const std::basic_string<T> & buf,
329 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
331 T * dataptr = buf.empty() ? nullptr :
const_cast<T *
>(buf.data());
333 timpi_assert_less(dest_processor_id, this->
size());
337 TIMPI_ISSEND : TIMPI_ISEND)
338 (dataptr, cast_int<CountType>(buf.size()),
340 this->
get(), req.
get()));
349 template <
typename T>
354 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
356 T * dataptr =
const_cast<T*
> (&buf);
358 timpi_assert_less(dest_processor_id, this->
size());
362 TIMPI_SSEND : TIMPI_SEND)
364 tag.
value(), this->
get()));
369 template <
typename T>
375 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
377 T * dataptr =
const_cast<T*
>(&buf);
379 timpi_assert_less(dest_processor_id, this->
size());
383 TIMPI_ISSEND : TIMPI_ISEND)
385 tag.
value(), this->
get(), req.
get()));
394 template <
typename T,
typename C,
typename A>
396 const std::set<T,C,A> & buf,
399 this->
send(dest_processor_id, buf,
405 template <
typename T,
typename C,
typename A>
407 const std::set<T,C,A> & buf,
409 const MessageTag & tag)
const 411 this->
send(dest_processor_id, buf,
412 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
417 template <
typename T,
typename C,
typename A>
419 const std::set<T,C,A> & buf,
420 const DataType & type,
421 const MessageTag & tag)
const 423 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
425 std::vector<T> vecbuf(buf.begin(), buf.end());
426 this->
send(dest_processor_id, vecbuf, type, tag);
431 template <
typename T,
typename C,
typename A>
433 const std::set<T,C,A> & buf,
434 const DataType & type,
436 const MessageTag & tag)
const 438 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
442 std::vector<T> * vecbuf =
443 new std::vector<T,A>(buf.begin(), buf.end());
446 req.add_post_wait_work
447 (
new PostWaitDeleteBuffer<std::vector<T,A>>(vecbuf));
449 this->
send(dest_processor_id, *vecbuf, type, req, tag);
454 template <
typename T,
typename A>
456 const std::vector<T,A> & buf,
457 const MessageTag & tag)
const 459 this->
send(dest_processor_id, buf,
460 StandardType<T>(buf.empty() ? nullptr : &buf.front()), tag);
465 template <
typename T,
typename A,
466 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
468 const std::vector<T,A> & buf,
470 const MessageTag & tag)
const 472 this->
send(dest_processor_id, buf,
473 StandardType<T>(buf.empty() ? nullptr : &buf.front()), req, tag);
476 template <
typename T,
typename A,
477 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
479 const std::vector<T,A> & buf,
481 const MessageTag & tag)
const 492 template <
typename T,
typename A>
494 const std::vector<T,A> & buf,
495 const DataType & type,
496 const MessageTag & tag)
const 498 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
502 TIMPI_SSEND : TIMPI_SEND)
503 (buf.empty() ? nullptr :
const_cast<T*
>(buf.data()),
504 cast_int<CountType>(buf.size()), type, dest_processor_id,
505 tag.value(), this->
get()));
510 template <
typename T,
typename A,
typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
512 const std::vector<T,A> & buf,
513 const DataType & type,
515 const MessageTag & tag)
const 517 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
519 timpi_assert_less(dest_processor_id, this->
size());
523 TIMPI_ISSEND : TIMPI_ISEND)
524 (buf.empty() ? nullptr :
const_cast<T*
>(buf.data()),
525 cast_int<CountType>(buf.size()), type, dest_processor_id,
526 tag.value(), this->
get(), req.get()));
529 req.add_post_wait_work
530 (
new PostWaitDereferenceTag(tag));
533 template <
typename T,
typename A,
typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
535 const std::vector<T,A> & buf,
536 const NotADataType &,
538 const MessageTag & tag)
const 540 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
542 timpi_assert_less(dest_processor_id, this->
size());
554 template <
typename T,
typename A1,
typename A2>
556 const std::vector<std::vector<T,A1>,A2> & buf,
557 const MessageTag & tag)
const 559 this->
send(dest_processor_id, buf,
560 StandardType<T>((buf.empty() || buf.front().empty()) ?
561 nullptr : &(buf.front().front())), tag);
566 template <
typename T,
typename A1,
typename A2>
568 const std::vector<std::vector<T,A1>,A2> & buf,
570 const MessageTag & tag)
const 572 this->
send(dest_processor_id, buf,
573 StandardType<T>((buf.empty() || buf.front().empty()) ?
574 nullptr : &(buf.front().front())), req, tag);
579 template <
typename T,
typename A1,
typename A2>
581 const std::vector<std::vector<T,A1>,A2> & buf,
582 const DataType & type,
583 const MessageTag & tag)
const 588 this->
send(dest_processor_id, buf, type, req, tag);
594 template <
typename T,
typename A1,
typename A2>
596 const std::vector<std::vector<T,A1>,A2> & send_vecs,
597 const DataType & type,
599 const MessageTag & tag)
const 607 std::vector<char> * sendbuf =
new std::vector<char>(sendsize);
613 const std::size_t n_vecs = send_vecs.size();
614 const CountType mpi_n_vecs = cast_int<CountType>(n_vecs);
617 (TIMPI_PACK (&mpi_n_vecs, 1, TIMPI_COUNT_TYPE, sendbuf->data(),
618 sendsize, &pos, this->
get()));
620 for (std::size_t i = 0; i != n_vecs; ++i)
624 cast_int<CountType>(send_vecs[i].size());
627 (TIMPI_PACK (&subvec_size, 1, TIMPI_COUNT_TYPE,
628 sendbuf->data(), sendsize, &pos, this->
get()));
631 if (!send_vecs[i].empty())
633 (TIMPI_PACK (const_cast<T*>(send_vecs[i].data()),
634 subvec_size, type, sendbuf->data(), sendsize,
638 timpi_assert_equal_to (pos, sendsize);
640 req.add_post_wait_work
641 (
new PostWaitDeleteBuffer<std::vector<char>> (sendbuf));
643 this->
send (dest_processor_id, *sendbuf, MPI_PACKED, req, tag);
647 template <
typename Context,
typename Iter>
649 const Context * context,
651 const Iter range_end,
653 std::size_t approx_buffer_size)
const 657 typedef typename std::iterator_traits<Iter>::value_type T;
659 std::size_t total_buffer_size =
662 this->
send(dest_processor_id, total_buffer_size, tag);
665 std::size_t used_buffer_size = 0;
668 while (range_begin != range_end)
670 timpi_assert_greater (std::distance(range_begin, range_end), 0);
672 std::vector<typename Packing<T>::buffer_type> buffer;
675 (context, range_begin, range_end, buffer, approx_buffer_size);
677 timpi_assert_greater (std::distance(range_begin, next_range_begin), 0);
679 range_begin = next_range_begin;
682 used_buffer_size += buffer.size();
686 this->
send(dest_processor_id, buffer, tag);
690 timpi_assert_equal_to(used_buffer_size, total_buffer_size);
695 template <
typename Context,
typename Iter>
697 const Context * context,
699 const Iter range_end,
702 std::size_t approx_buffer_size)
const 706 typedef typename std::iterator_traits<Iter>::value_type T;
709 std::size_t total_buffer_size =
715 std::size_t * total_buffer_size_buffer =
new std::size_t;
716 *total_buffer_size_buffer = total_buffer_size;
722 this->
send(dest_processor_id, *total_buffer_size_buffer, intermediate_req, tag);
729 std::size_t used_buffer_size = 0;
732 while (range_begin != range_end)
734 timpi_assert_greater (std::distance(range_begin, range_end), 0);
736 std::vector<buffer_t> * buffer =
new std::vector<buffer_t>();
739 (context, range_begin, range_end, *buffer, approx_buffer_size);
741 timpi_assert_greater (std::distance(range_begin, next_range_begin), 0);
743 range_begin = next_range_begin;
746 used_buffer_size += buffer->size();
751 Request * my_req = (range_begin == range_end) ? &req : &next_intermediate_req;
759 this->
send(dest_processor_id, *buffer, *my_req, tag);
761 if (range_begin != range_end)
766 timpi_assert_equal_to(used_buffer_size, total_buffer_size);
776 template <
typename Context,
typename Iter>
778 const Context * context,
780 const Iter range_end,
786 typedef typename std::iterator_traits<Iter>::value_type T;
789 if (range_begin != range_end)
791 std::vector<buffer_t> * buffer =
new std::vector<buffer_t>();
801 std::numeric_limits<CountType>::max());
803 if (range_begin != range_end)
804 timpi_error_msg(
"Non-blocking packed range sends cannot exceed " << std::numeric_limits<CountType>::max() <<
"in size");
812 this->
send(dest_processor_id, *buffer, req, tag);
817 template <
typename T>
819 std::basic_string<T> & buf,
822 std::vector<T> tempbuf;
826 buf.assign(tempbuf.begin(), tempbuf.end());
832 template <
typename T>
834 std::basic_string<T> & buf,
841 std::vector<T> * tempbuf =
new std::vector<T>(buf.size());
849 std::back_insert_iterator<std::basic_string<T>>>
850 (*tempbuf, std::back_inserter(buf)));
856 this->
receive(src_processor_id, *tempbuf, req, tag);
861 template <
typename T>
866 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
872 timpi_assert(src_processor_id < this->
size() ||
877 tag.
value(), this->
get(), stat.
get()));
884 template <
typename T>
890 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
892 timpi_assert(src_processor_id < this->
size() ||
897 tag.
value(), this->
get(), req.
get()));
906 template <
typename T,
typename C,
typename A>
908 std::set<T,C,A> & buf,
912 (src_processor_id, buf,
923 template <
typename T,
typename C,
typename A>
925 std::set<T,C,A> & buf,
927 const MessageTag & tag)
const 929 this->
receive (src_processor_id, buf,
930 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
936 template <
typename T,
typename C,
typename A>
938 std::set<T,C,A> & buf,
939 const DataType & type,
940 const MessageTag & tag)
const 942 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
944 std::vector<T> vecbuf;
945 Status stat = this->
receive(src_processor_id, vecbuf, type, tag);
947 buf.insert(vecbuf.begin(), vecbuf.end());
959 template <
typename T,
typename C,
typename A>
961 std::set<T,C,A> & buf,
962 const DataType & type,
964 const MessageTag & tag)
const 966 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
970 std::vector<T> * vecbuf =
new std::vector<T>();
976 req.add_post_wait_work
977 (
new PostWaitCopyBuffer<std::vector<T>,
978 std::insert_iterator<std::set<T,C,A>>>
979 (*vecbuf, std::inserter(buf,buf.end())));
982 req.add_post_wait_work
983 (
new PostWaitDeleteBuffer<std::vector<T>>(vecbuf));
985 this->
receive(src_processor_id, *vecbuf, type, req, tag);
991 template <
typename T,
typename A>
993 std::vector<T,A> & buf,
994 const MessageTag & tag)
const 997 (src_processor_id, buf,
998 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), tag);
1003 template <
typename T,
typename A>
1005 std::vector<T,A> & buf,
1007 const MessageTag & tag)
const 1009 this->
receive (src_processor_id, buf,
1010 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
1015 template <
typename T,
typename A>
1017 std::vector<T,A> & buf,
1018 const DataType & type,
1019 const MessageTag & tag)
const 1021 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
1025 Status stat(this->
probe(src_processor_id, tag), type);
1027 buf.resize(stat.size());
1029 timpi_assert(src_processor_id < this->
size() ||
1036 (TIMPI_RECV (buf.empty() ? nullptr : buf.data(),
1037 cast_int<CountType>(buf.size()), type, stat.source(),
1038 stat.tag(), this->
get(), stat.get()));
1040 timpi_assert_equal_to (cast_int<std::size_t>(stat.size()),
1048 template <
typename T,
typename A,
1049 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
1051 std::vector<T,A> & buf,
1052 const DataType & type,
1053 const MessageTag & tag)
const 1058 stat = this->packed_range_probe<T>(src_processor_id, tag, flag);
1062 std::inserter(buf, buf.end()),
1063 type, req, stat, tag);
1070 template <
typename T,
typename A,
1071 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
1073 std::vector<T,A> & buf,
1074 const NotADataType &,
1075 const MessageTag & tag)
const 1080 stat = this->packed_range_probe<T>(src_processor_id, tag, flag);
1084 std::inserter(buf, buf.end()),
1085 buf.data(), req, stat, tag);
1092 template <
typename T,
typename A>
1094 std::vector<T,A> & buf,
1095 const DataType & type,
1097 const MessageTag & tag)
const 1099 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
1101 timpi_assert(src_processor_id < this->
size() ||
1105 (TIMPI_IRECV(buf.empty() ? nullptr : buf.data(),
1106 cast_int<CountType>(buf.size()), type, src_processor_id,
1107 tag.value(), this->
get(), req.get()));
1110 req.add_post_wait_work
1111 (
new PostWaitDereferenceTag(tag));
1116 template <
typename T,
typename A1,
typename A2>
1118 std::vector<std::vector<T,A1>,A2> & buf,
1119 const MessageTag & tag)
const 1122 (src_processor_id, buf,
1123 StandardType<T>((buf.empty() || buf.front().empty()) ?
1124 nullptr : &(buf.front().front())), tag);
1129 template <
typename T,
typename A1,
typename A2>
1131 std::vector<std::vector<T,A1>,A2> & buf,
1133 const MessageTag & tag)
const 1135 this->
receive (src_processor_id, buf,
1136 StandardType<T>((buf.empty() || buf.front().empty()) ?
1137 nullptr : &(buf.front().front())), req, tag);
1142 template <
typename T,
typename A1,
typename A2>
1144 std::vector<std::vector<T,A1>,A2> & recv,
1145 const DataType & type,
1146 const MessageTag & tag)
const 1150 std::vector<char> recvbuf;
1152 Status stat = this->
receive (src_processor_id, recvbuf, MPI_PACKED, tag);
1155 timpi_assert (!recvbuf.empty());
1158 CountType bufsize = cast_int<CountType>(recvbuf.size());
1161 (TIMPI_UNPACK(recvbuf.data(), bufsize, &pos, &recvsize, 1,
1162 TIMPI_COUNT_TYPE, this->
get()));
1165 recv.resize (recvsize);
1167 const std::size_t n_vecs = recvsize;
1168 for (std::size_t i = 0; i != n_vecs; ++i)
1173 (TIMPI_UNPACK (recvbuf.data(), bufsize, &pos, &subvec_size, 1,
1174 TIMPI_COUNT_TYPE, this->
get()));
1177 recv[i].resize (subvec_size);
1180 if (!recv[i].empty())
1182 (TIMPI_UNPACK(recvbuf.data(), bufsize, &pos, recv[i].data(),
1183 subvec_size, type, this->
get()));
1191 template <
typename T,
typename A1,
typename A2>
1193 std::vector<std::vector<T,A1>,A2> & buf,
1194 const DataType & type,
1196 const MessageTag & tag)
const 1205 std::vector<char> * recvbuf =
new std::vector<char>(sendsize);
1208 this->
receive (src_processor_id, *recvbuf, MPI_PACKED, req, tag);
1211 req.add_post_wait_work
1212 (
new PostWaitUnpackNestedBuffer<std::vector<std::vector<T,A1>,A2>>
1213 (*recvbuf, buf, type, *
this));
1216 req.add_post_wait_work
1217 (
new PostWaitDeleteBuffer<std::vector<char>>(recvbuf));
1220 req.add_post_wait_work
1221 (
new PostWaitDereferenceTag(tag));
1225 template <
typename Context,
typename OutputIter,
typename T>
1228 OutputIter out_iter,
1229 const T * output_type,
1235 std::size_t total_buffer_size = 0;
1236 Status stat = this->
receive(src_processor_id, total_buffer_size, tag);
1242 std::size_t received_buffer_size = 0;
1246 std::unique_ptr<OutputIter> next_out_iter =
1247 std::make_unique<OutputIter>(out_iter);
1249 while (received_buffer_size < total_buffer_size)
1251 std::vector<buffer_t> buffer;
1253 received_buffer_size += buffer.size();
1255 (buffer, context, *next_out_iter, output_type);
1256 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
1289 template <
typename Context,
typename OutputIter,
typename T>
1304 std::vector<buffer_t> * buffer =
new std::vector<buffer_t>(stat.
size());
1305 this->
receive(src_processor_id, *buffer, req, tag);
1309 (
new PostWaitUnpackBuffer<std::vector<buffer_t>, Context, OutputIter, T>(*buffer, context, out));
1322 template <
typename T1,
typename T2,
typename A1,
typename A2>
1324 const std::vector<T1,A1> & sendvec,
1326 const unsigned int source_processor_id,
1327 std::vector<T2,A2> & recv,
1332 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
1334 if (dest_processor_id == this->
rank() &&
1335 source_processor_id == this->
rank())
1343 this->
send (dest_processor_id, sendvec, type1, req, send_tag);
1345 this->
receive (source_processor_id, recv, type2, recv_tag);
1351 template <
typename T1,
typename T2,
typename A1,
typename A2,
1352 typename std::enable_if<Has_buffer_type<Packing<T1>>::value &&
1353 Has_buffer_type<Packing<T2>>::value,
int>::type>
1357 const std::vector<T1,A1> & send_data,
1358 const unsigned int source_processor_id,
1359 std::vector<T2,A2> &recv_data,
1360 const MessageTag &send_tag,
1361 const MessageTag &recv_tag)
const 1364 send_data.begin(), send_data.end(),
1365 source_processor_id, (
void *)(
nullptr),
1366 std::back_inserter(recv_data),
1367 (
const T2 *)(
nullptr),
1368 send_tag, recv_tag);
1372 template <
typename T,
typename A,
1373 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
1377 const std::vector<T,A> & send_data,
1378 const unsigned int source_processor_id,
1379 std::vector<T,A> &recv_data,
1384 send_data.begin(), send_data.end(),
1385 source_processor_id, (
void *)(
nullptr),
1386 std::back_inserter(recv_data),
1387 (
const T *)(
nullptr),
1388 send_tag, recv_tag);
1393 template <
typename T1,
typename T2,
1394 typename std::enable_if<std::is_base_of<DataType, StandardType<T1>>::value &&
1395 std::is_base_of<DataType, StandardType<T2>>::value,
1399 const unsigned int source_processor_id,
1404 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
1406 if (dest_processor_id == this->
rank() &&
1407 source_processor_id == this->
rank())
1413 timpi_assert_less(dest_processor_id, this->
size());
1414 timpi_assert(source_processor_id < this->
size() ||
1422 dest_processor_id, send_tag.
value(), &recv, 1,
1424 recv_tag.
value(), this->
get(), MPI_STATUS_IGNORE));
1436 template <
typename T,
typename A,
1437 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
1440 const std::vector<T,A> & sendvec,
1441 const unsigned int source_processor_id,
1442 std::vector<T,A> & recv,
1446 if (dest_processor_id == this->
rank() &&
1447 source_processor_id == this->
rank())
1449 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
1454 const T* example = sendvec.empty() ?
1455 (recv.empty() ? nullptr : recv.data()) : sendvec.data();
1460 StandardType<T>(example),
1461 source_processor_id, recv,
1462 StandardType<T>(example),
1463 send_tag, recv_tag);
1467 template <
typename T1,
typename T2,
typename A1,
typename A2,
1468 typename std::enable_if<std::is_base_of<DataType, StandardType<T1>>::value &&
1469 std::is_base_of<DataType, StandardType<T2>>::value,
1472 const std::vector<T1,A1> & sendvec,
1473 const unsigned int source_processor_id,
1474 std::vector<T2,A2> & recv,
1475 const MessageTag & send_tag,
1476 const MessageTag & recv_tag)
const 1481 StandardType<T1>(sendvec.empty() ? nullptr : sendvec.data()),
1482 source_processor_id, recv,
1483 StandardType<T2>(recv.empty() ? nullptr : recv.data()),
1484 send_tag, recv_tag);
1490 template <
typename T1,
typename T2,
typename A1,
typename A2,
typename A3,
typename A4>
1492 const std::vector<std::vector<T1,A1>,A2> & sendvec,
1493 const unsigned int source_processor_id,
1494 std::vector<std::vector<T2,A3>,A4> & recv,
1495 const MessageTag & ,
1496 const MessageTag & )
const 1499 send_receive_vec_of_vec
1500 (dest_processor_id, sendvec, source_processor_id, recv,
1508 template <
typename T,
typename A1,
typename A2>
1510 const std::vector<std::vector<T,A1>,A2> & sendvec,
1511 const unsigned int source_processor_id,
1512 std::vector<std::vector<T,A1>,A2> & recv,
1516 send_receive_vec_of_vec
1517 (dest_processor_id, sendvec, source_processor_id, recv,
1518 send_tag, recv_tag, *
this);
1524 template <
typename Context1,
typename RangeIter,
typename Context2,
1525 typename OutputIter,
typename T>
1528 const Context1 * context1,
1529 RangeIter send_begin,
1530 const RangeIter send_end,
1531 const unsigned int source_processor_id,
1532 Context2 * context2,
1533 OutputIter out_iter,
1534 const T * output_type,
1537 std::size_t approx_buffer_size)
const 1539 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
1541 timpi_assert_equal_to
1542 ((dest_processor_id == this->
rank()),
1543 (source_processor_id == this->
rank()));
1545 if (dest_processor_id == this->
rank() &&
1546 source_processor_id == this->
rank())
1554 std::unique_ptr<OutputIter> next_out_iter =
1555 std::make_unique<OutputIter>(out_iter);
1558 while (send_begin != send_end)
1560 std::vector<buffer_t> buffer;
1562 (context1, send_begin, send_end, buffer, approx_buffer_size);
1564 (buffer, context2, *next_out_iter, output_type);
1565 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
1573 req, send_tag, approx_buffer_size);
1576 output_type, recv_tag);
1583 template <
typename Context,
typename Iter>
1585 const Context * context,
1587 const Iter range_end,
1589 std::shared_ptr<std::vector<
typename Packing<
typename std::iterator_traits<Iter>::value_type>::buffer_type>> & buffer,
1594 typedef typename std::iterator_traits<Iter>::value_type T;
1597 if (range_begin != range_end)
1599 if (buffer ==
nullptr)
1600 buffer = std::make_shared<std::vector<buffer_t>>();
1612 std::numeric_limits<CountType>::max());
1614 if (range_begin != range_end)
1615 timpi_error_msg(
"Non-blocking packed range sends cannot exceed " << std::numeric_limits<CountType>::max() <<
"in size");
1622 this->
send(dest_processor_id, *buffer, req, tag);
1628 template <
typename T,
typename A>
1630 std::vector<std::basic_string<T>,A> & recv,
1631 const bool identical_buffer_sizes)
const 1633 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
1635 timpi_assert(this->
size());
1636 recv.assign(this->
size(),
"");
1639 if (this->
size() < 2)
1646 std::vector<CountType>
1647 sendlengths (this->
size(), 0);
1648 std::vector<DispType>
1649 displacements(this->
size(), 0);
1651 const CountType mysize = cast_int<CountType>(sendval.size());
1653 if (identical_buffer_sizes)
1654 sendlengths.assign(this->
size(), mysize);
1662 for (
unsigned int i=0; i != this->
size(); ++i)
1664 displacements[i] = globalsize;
1665 globalsize += sendlengths[i];
1669 if (globalsize == 0)
1673 std::basic_string<T> r(globalsize, 0);
1677 (TIMPI_ALLGATHERV(const_cast<T*>(mysize ? sendval.data() :
nullptr),
1679 &r[0], sendlengths.data(), displacements.data(),
1683 for (
unsigned int i=0; i != this->
size(); ++i)
1684 recv[i] = r.substr(displacements[i], sendlengths[i]);
1690 const unsigned int root_id,
1693 if (this->
size() == 1)
1695 timpi_assert (!this->
rank());
1696 timpi_assert (!root_id);
1700 timpi_assert_less (root_id, this->
size());
1702 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1706 char char_data = data;
1708 timpi_assert_less(root_id, this->
size());
1712 (TIMPI_BCAST (&char_data, 1, StandardType<char>(&char_data),
1713 root_id, this->
get()));
1719 template <
typename T>
1721 const unsigned int root_id,
1722 const bool identical_sizes)
const 1724 if (this->
size() == 1)
1726 timpi_assert (!this->
rank());
1727 timpi_assert (!root_id);
1731 timpi_assert_less (root_id, this->
size());
1732 timpi_assert (this->
verify(identical_sizes));
1734 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1736 std::size_t data_size = data.size();
1738 if (identical_sizes)
1739 timpi_assert(this->
verify(data_size));
1743 std::vector<T> data_c(data_size);
1745 std::basic_string<T> orig(data);
1748 if (this->
rank() == root_id)
1749 for (std::size_t i=0; i<data.size(); i++)
1750 data_c[i] = data[i];
1754 data.assign(data_c.begin(), data_c.end());
1757 if (this->
rank() == root_id)
1758 timpi_assert_equal_to (data, orig);
1763 template <
typename T,
typename A>
1765 const unsigned int root_id,
1766 const bool identical_sizes)
const 1768 if (this->
size() == 1)
1770 timpi_assert (!this->
rank());
1771 timpi_assert (!root_id);
1775 timpi_assert_less (root_id, this->
size());
1776 timpi_assert (this->
verify(identical_sizes));
1778 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1780 std::size_t bufsize=0;
1781 if (root_id == this->
rank() || identical_sizes)
1783 for (std::size_t i=0; i<data.size(); ++i)
1784 bufsize += data[i].
size() + 1;
1787 if (identical_sizes)
1788 timpi_assert(this->
verify(bufsize));
1793 std::vector<unsigned int> temp; temp.reserve(bufsize);
1795 if (root_id == this->
rank())
1797 for (std::size_t i=0; i<data.size(); ++i)
1799 temp.push_back(cast_int<unsigned int>(data[i].
size()));
1800 for (std::size_t j=0; j != data[i].size(); ++j)
1805 temp.push_back(data[i][j]);
1809 temp.resize(bufsize);
1815 if (root_id != this->
rank())
1818 typename std::vector<unsigned int>::const_iterator iter = temp.begin();
1819 while (iter != temp.end())
1821 std::size_t curr_len = *iter++;
1822 data.push_back(std::basic_string<T>(iter, iter+curr_len));
1830 template <
typename T,
typename A1,
typename A2>
1832 const unsigned int root_id,
1833 const bool identical_sizes)
const 1835 if (this->
size() == 1)
1837 timpi_assert (!this->
rank());
1838 timpi_assert (!root_id);
1842 timpi_assert_less (root_id, this->
size());
1843 timpi_assert (this->
verify(identical_sizes));
1845 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1847 std::size_t size_sizes = data.size();
1848 if (identical_sizes)
1849 timpi_assert(this->
verify(size_sizes));
1852 std::vector<std::size_t> sizes(size_sizes);
1854 if (root_id == this->
rank() || identical_sizes)
1855 for (std::size_t i=0; i<size_sizes; ++i)
1856 sizes[i] = data[i].
size();
1858 if (identical_sizes)
1859 timpi_assert(this->
verify(sizes));
1863 std::size_t bufsize = 0;
1864 for (std::size_t i=0; i<size_sizes; ++i)
1865 bufsize += sizes[i];
1867 std::vector<T> temp; temp.reserve(bufsize);
1869 if (root_id == this->
rank())
1872 for (std::size_t i=0; i<size_sizes; ++i)
1873 temp.insert(temp.end(), data[i].begin(), data[i].end());
1876 temp.resize(bufsize);
1882 if (root_id != this->
rank())
1885 data.resize(size_sizes);
1886 typename std::vector<T>::const_iterator iter = temp.begin();
1887 for (std::size_t i=0; i<size_sizes; ++i)
1889 data[i].insert(data[i].end(), iter, iter+sizes[i]);
1898 template <
typename T,
typename C,
typename A>
1900 const unsigned int root_id,
1901 const bool identical_sizes)
const 1903 if (this->
size() == 1)
1905 timpi_assert (!this->
rank());
1906 timpi_assert (!root_id);
1910 timpi_assert_less (root_id, this->
size());
1911 timpi_assert (this->
verify(identical_sizes));
1913 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1915 std::vector<T> vecdata;
1916 if (this->
rank() == root_id)
1917 vecdata.assign(data.begin(), data.end());
1919 std::size_t vecsize = vecdata.size();
1920 if (identical_sizes)
1921 timpi_assert(this->
verify(vecsize));
1924 if (this->
rank() != root_id)
1925 vecdata.resize(vecsize);
1928 if (this->
rank() != root_id)
1931 data.insert(vecdata.begin(), vecdata.end());
1936 template <
typename Context,
typename OutputIter,
typename T>
1947 if (buffer ==
nullptr)
1948 buffer = std::make_shared<std::vector<typename Packing<T>::buffer_type>>();
1956 buffer->resize(stat.
size());
1957 this->
receive(src_processor_id, *buffer, req, tag);
1970 template <
typename T,
typename A,
typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
1972 std::vector<T,A> & buf,
1977 TIMPI_LOG_SCOPE(
"possibly_receive()",
"Parallel");
1983 timpi_assert(src_processor_id < this->
size() ||
1986 timpi_call_mpi(MPI_Iprobe(
int(src_processor_id),
1994 buf.resize(stat.
size());
1996 src_processor_id = stat.
source();
1999 (TIMPI_IRECV(buf.data(), cast_int<CountType>(buf.size()), type,
2000 src_processor_id, tag.
value(), this->
get(),
2011 template <
typename T,
typename A,
typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
2013 std::vector<T,A> & buf,
2018 TIMPI_LOG_SCOPE(
"possibly_receive()",
"Parallel");
2022 std::inserter(buf, buf.end()),
2030 template <
typename T,
typename A1,
typename A2>
2032 std::vector<std::vector<T,A1>,A2> & buf,
2037 TIMPI_LOG_SCOPE(
"possibly_receive()",
"Parallel");
2043 timpi_assert(src_processor_id < this->
size() ||
2046 timpi_call_mpi(MPI_Iprobe(
int(src_processor_id),
2054 src_processor_id = stat.
source();
2056 std::vector<char> * recvbuf =
2059 this->
receive(src_processor_id, *recvbuf, MPI_PACKED, req, tag);
2064 (*recvbuf, buf, type, *
this));
2080 #endif // TIMPI_HAVE_MPI 2091 template <
typename T>
2096 T tempmin = r, tempmax = r;
2099 bool verified = (r == tempmin) &&
2101 this->
min(verified);
2106 "Tried to verify an unverifiable type");
2111 template <
typename T>
2118 tempmin = tempmax = *r;
2126 bool invalid = r && ((*r != tempmin) ||
2133 "Tried to semiverify an unverifiable type");
2140 template <
typename T,
typename A>
2145 std::size_t rsize = r ? r->size() : 0;
2146 std::size_t * psize = r ? &rsize :
nullptr;
2153 std::vector<T,A> tempmin, tempmax;
2156 tempmin = tempmax = *r;
2160 tempmin.resize(rsize);
2161 tempmax.resize(rsize);
2162 Attributes<std::vector<T,A>>::set_highest(tempmin);
2163 Attributes<std::vector<T,A>>::set_lowest(tempmax);
2167 bool invalid = r && ((*r != tempmin) ||
2174 "Tried to semiverify a vector of an unverifiable type");
2181 template <
typename A>
2184 if (this->
size() > 1 && !r.empty())
2186 TIMPI_LOG_SCOPE(
"min(vector<bool>)",
"Parallel");
2188 timpi_assert(this->
verify(r.size()));
2190 std::vector<unsigned int> ruint;
2191 pack_vector_bool(r, ruint);
2192 std::vector<unsigned int> temp(ruint.size());
2195 (ruint.data(), temp.data(),
2196 cast_int<CountType>(ruint.size()),
2198 unpack_vector_bool(temp, r);
2203 template <
typename T>
2205 unsigned int & min_id)
const 2207 if (this->
size() > 1)
2209 TIMPI_LOG_SCOPE(
"minloc(scalar)",
"Parallel");
2217 (TIMPI_ALLREDUCE (MPI_IN_PLACE, &data_in, 1,
2218 dataplusint_type_acquire<T>().first,
2221 min_id = data_in.
rank;
2224 min_id = this->
rank();
2228 template <
typename T,
typename A1,
typename A2>
2230 std::vector<unsigned int,A2> & min_id)
const 2232 if (this->
size() > 1 && !r.empty())
2234 TIMPI_LOG_SCOPE(
"minloc(vector)",
"Parallel");
2236 timpi_assert(this->
verify(r.size()));
2238 std::vector<DataPlusInt<T>> data_in(r.size());
2239 for (std::size_t i=0; i != r.size(); ++i)
2241 data_in[i].val = r[i];
2242 data_in[i].rank = this->
rank();
2244 std::vector<DataPlusInt<T>> data_out(r.size());
2247 (TIMPI_ALLREDUCE (data_in.data(), data_out.data(),
2248 cast_int<CountType>(r.size()),
2249 dataplusint_type_acquire<T>().first,
2251 for (std::size_t i=0; i != r.size(); ++i)
2253 r[i] = data_out[i].val;
2254 min_id[i] = data_out[i].rank;
2257 else if (!r.empty())
2259 for (std::size_t i=0; i != r.size(); ++i)
2260 min_id[i] = this->
rank();
2265 template <
typename A1,
typename A2>
2267 std::vector<unsigned int,A2> & min_id)
const 2269 if (this->
size() > 1 && !r.empty())
2271 TIMPI_LOG_SCOPE(
"minloc(vector<bool>)",
"Parallel");
2273 timpi_assert(this->
verify(r.size()));
2275 std::vector<DataPlusInt<int>> data_in(r.size());
2276 for (std::size_t i=0; i != r.size(); ++i)
2278 data_in[i].val = r[i];
2279 data_in[i].rank = this->
rank();
2281 std::vector<DataPlusInt<int>> data_out(r.size());
2284 (data_in.data(), data_out.data(),
2287 for (std::size_t i=0; i != r.size(); ++i)
2289 r[i] = data_out[i].val;
2290 min_id[i] = data_out[i].rank;
2293 else if (!r.empty())
2295 for (std::size_t i=0; i != r.size(); ++i)
2296 min_id[i] = this->
rank();
2302 template <
typename A>
2305 if (this->
size() > 1 && !r.empty())
2307 TIMPI_LOG_SCOPE(
"max(vector<bool>)",
"Parallel");
2309 timpi_assert(this->
verify(r.size()));
2311 std::vector<unsigned int> ruint;
2312 pack_vector_bool(r, ruint);
2313 std::vector<unsigned int> temp(ruint.size());
2315 (TIMPI_ALLREDUCE (ruint.data(), temp.data(),
2316 cast_int<CountType>(ruint.size()),
2319 unpack_vector_bool(temp, r);
2325 template <
typename Map,
2326 typename std::enable_if<std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
2327 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value,
2331 if (this->
size() > 1)
2333 TIMPI_LOG_SCOPE(
"max(map)",
"Parallel");
2341 std::vector<std::pair<typename Map::key_type, typename Map::mapped_type>>
2342 vecdata(data.begin(), data.end());
2348 for (
const auto & pr : vecdata)
2353 auto result = data.insert(pr);
2355 bool inserted = result.second;
2359 auto it = result.first;
2360 it->second = std::max(it->second, pr.second);
2368 template <
typename Map,
2369 typename std::enable_if<!(std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
2370 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value),
2374 if (this->size() > 1)
2376 TIMPI_LOG_SCOPE(
"max(map)",
"Parallel");
2384 std::vector<typename Map::key_type> keys;
2385 std::vector<typename Map::mapped_type> vals;
2387 auto data_size = data.size();
2388 keys.reserve(data_size);
2389 vals.reserve(data_size);
2391 for (
const auto & pr : data)
2393 keys.push_back(pr.first);
2394 vals.push_back(pr.second);
2397 this->allgather(keys,
false);
2398 this->allgather(vals,
false);
2402 for (std::size_t i=0; i<keys.size(); ++i)
2407 auto pr = data.emplace(keys[i], vals[i]);
2409 bool emplaced = pr.second;
2414 it->second = std::max(it->second, vals[i]);
2422 template <
typename K,
typename V,
typename C,
typename A>
2431 template <
typename K,
typename V,
typename H,
typename E,
typename A>
2440 template <
typename T>
2442 unsigned int & max_id)
const 2444 if (this->
size() > 1)
2446 TIMPI_LOG_SCOPE(
"maxloc(scalar)",
"Parallel");
2454 (TIMPI_ALLREDUCE (MPI_IN_PLACE, &data_in, 1,
2455 dataplusint_type_acquire<T>().first,
2458 max_id = data_in.
rank;
2461 max_id = this->
rank();
2465 template <
typename T,
typename A1,
typename A2>
2467 std::vector<unsigned int,A2> & max_id)
const 2469 if (this->
size() > 1 && !r.empty())
2471 TIMPI_LOG_SCOPE(
"maxloc(vector)",
"Parallel");
2473 timpi_assert(this->
verify(r.size()));
2475 std::vector<DataPlusInt<T>> data_in(r.size());
2476 for (std::size_t i=0; i != r.size(); ++i)
2478 data_in[i].val = r[i];
2479 data_in[i].rank = this->
rank();
2481 std::vector<DataPlusInt<T>> data_out(r.size());
2484 (TIMPI_ALLREDUCE(data_in.data(), data_out.data(),
2485 cast_int<CountType>(r.size()),
2486 dataplusint_type_acquire<T>().first,
2489 for (std::size_t i=0; i != r.size(); ++i)
2491 r[i] = data_out[i].val;
2492 max_id[i] = data_out[i].rank;
2495 else if (!r.empty())
2497 for (std::size_t i=0; i != r.size(); ++i)
2498 max_id[i] = this->
rank();
2503 template <
typename A1,
typename A2>
2505 std::vector<unsigned int,A2> & max_id)
const 2507 if (this->
size() > 1 && !r.empty())
2509 TIMPI_LOG_SCOPE(
"maxloc(vector<bool>)",
"Parallel");
2511 timpi_assert(this->
verify(r.size()));
2513 std::vector<DataPlusInt<int>> data_in(r.size());
2514 for (std::size_t i=0; i != r.size(); ++i)
2516 data_in[i].val = r[i];
2517 data_in[i].rank = this->
rank();
2519 std::vector<DataPlusInt<int>> data_out(r.size());
2521 (TIMPI_ALLREDUCE(data_in.data(), data_out.data(),
2522 cast_int<CountType>(r.size()),
2526 for (std::size_t i=0; i != r.size(); ++i)
2528 r[i] = data_out[i].val;
2529 max_id[i] = data_out[i].rank;
2532 else if (!r.empty())
2534 for (std::size_t i=0; i != r.size(); ++i)
2535 max_id[i] = this->
rank();
2539 #define TIMPI_DEFINE_COMMUNICATOR_OPS(OPNAME) \ 2540 template <typename T> \ 2541 inline void Communicator::OPNAME(T &timpi_mpi_var(r)) const { \ 2542 if (this->size() > 1) { \ 2543 TIMPI_LOG_SCOPE(#OPNAME "(scalar, blocking)", "Parallel"); \ 2545 timpi_call_mpi(TIMPI_ALLREDUCE(MPI_IN_PLACE, &r, 1, StandardType<T>(&r), \ 2546 OpFunction<T>::OPNAME(), this->get())); \ 2550 template <typename T, typename A> \ 2551 inline void Communicator::OPNAME(std::vector<T, A> &r) const { \ 2552 if (this->size() > 1 && !r.empty()) { \ 2553 TIMPI_LOG_SCOPE(#OPNAME "(vector, blocking)", "Parallel"); \ 2555 timpi_assert(this->verify(r.size())); \ 2557 timpi_call_mpi(TIMPI_ALLREDUCE( \ 2558 MPI_IN_PLACE, r.data(), cast_int<CountType>(r.size()), \ 2559 StandardType<T>(r.data()), OpFunction<T>::OPNAME(), this->get())); \ 2562 template <typename T> \ 2563 inline void Communicator::OPNAME(const T &r, T &o, Request &req) const { \ 2564 if (this->size() > 1) { \ 2565 TIMPI_LOG_SCOPE(#OPNAME "(scalar, nonblocking)", "Parallel"); \ 2567 timpi_call_mpi(TIMPI_IALLREDUCE(&r, &o, 1, StandardType<T>(&r), \ 2568 OpFunction<T>::OPNAME(), this->get(), \ 2572 req = Request::null_request; \ 2576 TIMPI_DEFINE_COMMUNICATOR_OPS(sum)
2577 TIMPI_DEFINE_COMMUNICATOR_OPS(max)
2578 TIMPI_DEFINE_COMMUNICATOR_OPS(min)
2579 TIMPI_DEFINE_COMMUNICATOR_OPS(product)
2580 TIMPI_DEFINE_COMMUNICATOR_OPS(logical_and)
2581 TIMPI_DEFINE_COMMUNICATOR_OPS(bitwise_and)
2582 TIMPI_DEFINE_COMMUNICATOR_OPS(logical_or)
2583 TIMPI_DEFINE_COMMUNICATOR_OPS(bitwise_or)
2584 TIMPI_DEFINE_COMMUNICATOR_OPS(logical_xor)
2585 TIMPI_DEFINE_COMMUNICATOR_OPS(bitwise_xor)
2591 template <
typename T>
2594 if (this->
size() > 1)
2596 TIMPI_LOG_SCOPE(
"sum()",
"Parallel");
2599 (TIMPI_ALLREDUCE(MPI_IN_PLACE, &r, 2,
2607 template <
typename T,
typename A>
2610 if (this->
size() > 1 && !r.empty())
2612 TIMPI_LOG_SCOPE(
"sum()",
"Parallel");
2614 timpi_assert(this->
verify(r.size()));
2617 (TIMPI_ALLREDUCE(MPI_IN_PLACE, r.data(),
2618 cast_int<CountType>(r.size() * 2),
2619 StandardType<T>(
nullptr),
2620 OpFunction<T>::sum(), this->
get()));
2628 template <
typename Map,
2629 typename std::enable_if<std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
2630 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value,
2634 if (this->
size() > 1)
2636 TIMPI_LOG_SCOPE(
"sum(map)",
"Parallel");
2645 std::vector<std::pair<typename Map::key_type, typename Map::mapped_type>>
2646 vecdata(data.begin(), data.end());
2651 for (
const auto & pr : vecdata)
2652 data[pr.first] += pr.second;
2660 template <
typename Map,
2661 typename std::enable_if<!(std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
2662 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value),
2666 if (this->size() > 1)
2668 TIMPI_LOG_SCOPE(
"sum(map)",
"Parallel");
2673 std::vector<typename Map::key_type> keys;
2674 std::vector<typename Map::mapped_type> vals;
2676 auto data_size = data.size();
2677 keys.reserve(data_size);
2678 vals.reserve(data_size);
2680 for (
const auto & pr : data)
2682 keys.push_back(pr.first);
2683 vals.push_back(pr.second);
2686 this->allgather(keys,
false);
2687 this->allgather(vals,
false);
2691 for (std::size_t i=0; i<keys.size(); ++i)
2692 data[keys[i]] += vals[i];
2698 template <
typename K,
typename V,
typename C,
typename A>
2706 template <
typename K,
typename V,
typename H,
typename E,
typename A>
2714 template <
typename T,
typename A1,
typename A2,
2715 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
2717 std::vector<std::vector<T,A1>,A2> & recv,
2718 const bool identical_buffer_sizes)
const 2720 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
2722 timpi_assert(this->
size());
2725 if (this->
size() < 2)
2733 recv.resize(this->
size());
2735 std::vector<CountType>
2736 sendlengths (this->
size(), 0);
2737 std::vector<DispType>
2738 displacements(this->
size(), 0);
2740 const CountType mysize = cast_int<CountType>(sendval.size());
2742 if (identical_buffer_sizes)
2743 sendlengths.assign(this->
size(), mysize);
2751 for (
unsigned int i=0; i != this->
size(); ++i)
2753 displacements[i] = globalsize;
2754 globalsize += sendlengths[i];
2758 if (globalsize == 0)
2762 std::vector<T,A1> r(globalsize, 0);
2766 (TIMPI_ALLGATHERV(const_cast<T*>(mysize ? sendval.data() :
nullptr),
2767 mysize, StandardType<T>(),
2768 &r[0], sendlengths.data(), displacements.data(),
2769 StandardType<T>(), this->
get()));
2772 for (
unsigned int i=0; i != this->
size(); ++i)
2773 recv[i].
assign(r.begin()+displacements[i],
2774 r.begin()+displacements[i]+sendlengths[i]);
2779 template <
typename T,
typename A1,
typename A2,
2780 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
2782 std::vector<std::vector<T,A1>,A2> & recv,
2785 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
2787 typedef typename Packing<T>::buffer_type buffer_t;
2789 std::vector<buffer_t> buffer;
2790 auto next_iter =
pack_range ((
void *)
nullptr, sendval.begin(),
2791 sendval.end(), buffer,
2792 std::numeric_limits<CountType>::max());
2794 if (next_iter != sendval.end())
2795 timpi_error_msg(
"Non-blocking packed range sends cannot exceed " << std::numeric_limits<CountType>::max() <<
"in size");
2797 std::vector<std::vector<buffer_t>> allbuffers;
2799 timpi_assert(this->
size());
2801 recv.resize(this->
size());
2805 this->
allgather(buffer, allbuffers,
false);
2809 std::back_inserter(recv[i]), (T*)
nullptr);
2814 template <
typename T,
typename C,
typename A>
2816 const unsigned int root_id)
const 2818 if (this->
size() > 1)
2820 std::vector<T> vecdata(data.begin(), data.end());
2821 this->
gather(root_id, vecdata);
2822 if (this->
rank() == root_id)
2823 data.insert(vecdata.begin(), vecdata.end());
2829 template <
typename T,
typename C,
typename A>
2832 if (this->
size() > 1)
2834 std::vector<T> vecdata(data.begin(), data.end());
2836 data.insert(vecdata.begin(), vecdata.end());
2842 template <
typename T,
typename C,
typename A>
2844 const unsigned int root_id)
const 2846 if (this->
size() > 1)
2848 std::vector<T> vecdata(data.begin(), data.end());
2849 this->
gather(root_id, vecdata);
2850 if (this->
rank() == root_id)
2854 data.insert(vecdata.begin(), vecdata.end());
2860 template <
typename T,
typename C,
typename A>
2863 if (this->
size() > 1)
2865 std::vector<T> vecdata(data.begin(), data.end());
2871 data.insert(vecdata.begin(), vecdata.end());
2877 template <
typename T1,
typename T2,
typename C,
typename A>
2879 const unsigned int root_id)
const 2881 if (this->
size() > 1)
2883 std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
2884 this->
gather(root_id, vecdata);
2886 if (this->
rank() == root_id)
2893 data.insert(vecdata.begin(), vecdata.end());
2900 template <
typename T1,
typename T2,
typename C,
typename A>
2903 if (this->
size() > 1)
2905 std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
2912 data.insert(vecdata.begin(), vecdata.end());
2918 template <
typename T1,
typename T2,
typename C,
typename A>
2920 const unsigned int root_id)
const 2922 if (this->
size() > 1)
2924 std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
2925 this->
gather(root_id, vecdata);
2927 if (this->
rank() == root_id)
2932 data.insert(vecdata.begin(), vecdata.end());
2939 template <
typename T1,
typename T2,
typename C,
typename A>
2942 if (this->
size() > 1)
2944 std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
2950 data.insert(vecdata.begin(), vecdata.end());
2956 template <
typename K,
typename H,
typename KE,
typename A>
2958 const unsigned int root_id)
const 2960 if (this->
size() > 1)
2962 std::vector<K> vecdata(data.begin(), data.end());
2963 this->
gather(root_id, vecdata);
2964 if (this->
rank() == root_id)
2965 data.insert(vecdata.begin(), vecdata.end());
2971 template <
typename K,
typename H,
typename KE,
typename A>
2974 if (this->
size() > 1)
2976 std::vector<K> vecdata(data.begin(), data.end());
2978 data.insert(vecdata.begin(), vecdata.end());
2984 template <
typename K,
typename H,
typename KE,
typename A>
2986 const unsigned int root_id)
const 2988 if (this->
size() > 1)
2990 std::vector<K> vecdata(data.begin(), data.end());
2991 this->
gather(root_id, vecdata);
2992 if (this->
rank() == root_id)
2997 data.insert(vecdata.begin(), vecdata.end());
3004 template <
typename K,
typename H,
typename KE,
typename A>
3007 if (this->
size() > 1)
3009 std::vector<K> vecdata(data.begin(), data.end());
3015 data.insert(vecdata.begin(), vecdata.end());
3021 template <
typename K,
typename T,
typename H,
typename KE,
typename A>
3023 const unsigned int root_id)
const 3025 if (this->
size() > 1)
3027 std::vector<std::pair<K,T>> vecdata(data.begin(), data.end());
3028 this->
gather(root_id, vecdata);
3030 if (this->
rank() == root_id)
3037 data.insert(vecdata.begin(), vecdata.end());
3044 template <
typename K,
typename T,
typename H,
typename KE,
typename A>
3047 if (this->
size() > 1)
3049 std::vector<std::pair<K,T>> vecdata(data.begin(), data.end());
3056 data.insert(vecdata.begin(), vecdata.end());
3062 template <
typename K,
typename T,
typename H,
typename KE,
typename A>
3064 const unsigned int root_id)
const 3066 if (this->
size() > 1)
3068 std::vector<std::pair<K,T>> vecdata(data.begin(), data.end());
3069 this->
gather(root_id, vecdata);
3071 if (this->
rank() == root_id)
3076 data.insert(vecdata.begin(), vecdata.end());
3083 template <
typename K,
typename T,
typename H,
typename KE,
typename A>
3086 if (this->
size() > 1)
3088 std::vector<std::pair<K,T>> vecdata(data.begin(), data.end());
3094 data.insert(vecdata.begin(), vecdata.end());
3100 template <
typename T,
typename A>
3103 std::vector<T,A> & recv)
const 3105 timpi_assert_less (root_id, this->
size());
3107 if (this->
rank() == root_id)
3108 recv.resize(this->
size());
3110 if (this->
size() > 1)
3112 TIMPI_LOG_SCOPE(
"gather()",
"Parallel");
3114 StandardType<T> send_type(&sendval);
3116 timpi_assert_less(root_id, this->
size());
3119 (TIMPI_GATHER(const_cast<T*>(&sendval), 1, send_type,
3120 recv.empty() ? nullptr : recv.data(), 1, send_type,
3121 root_id, this->
get()));
3129 template <
typename T,
typename A,
3130 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
3132 std::vector<T,A> & r)
const 3134 if (this->
size() == 1)
3136 timpi_assert (!this->
rank());
3137 timpi_assert (!root_id);
3141 timpi_assert_less (root_id, this->
size());
3143 std::vector<CountType>
3144 sendlengths (this->
size(), 0);
3145 std::vector<DispType>
3146 displacements(this->
size(), 0);
3148 const CountType mysize = cast_int<CountType>(r.size());
3151 TIMPI_LOG_SCOPE(
"gather()",
"Parallel");
3156 for (
unsigned int i=0; i != this->
size(); ++i)
3158 displacements[i] = globalsize;
3159 globalsize += sendlengths[i];
3163 if (globalsize == 0)
3167 std::vector<T,A> r_src(r);
3171 if (root_id == this->
rank())
3172 r.resize(globalsize);
3174 timpi_assert_less(root_id, this->
size());
3178 (TIMPI_GATHERV(r_src.empty() ? nullptr : r_src.data(), mysize,
3179 StandardType<T>(), r.empty() ? nullptr : r.data(),
3180 sendlengths.data(), displacements.data(),
3181 StandardType<T>(), root_id, this->
get()));
3185 template <
typename T,
typename A,
3186 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3188 std::vector<T,A> & r)
const 3190 std::vector<T,A> gathered;
3193 std::inserter(gathered, gathered.end()));
3200 template <
typename T,
typename A>
3202 const std::basic_string<T> & sendval,
3203 std::vector<std::basic_string<T>,A> & recv,
3204 const bool identical_buffer_sizes)
const 3206 timpi_assert_less (root_id, this->
size());
3208 if (this->
rank() == root_id)
3209 recv.resize(this->
size());
3211 if (this->
size() > 1)
3213 TIMPI_LOG_SCOPE (
"gather()",
"Parallel");
3215 std::vector<CountType>
3216 sendlengths (this->
size(), 0);
3217 std::vector<DispType>
3218 displacements(this->
size(), 0);
3220 const CountType mysize = cast_int<CountType>(sendval.size());
3222 if (identical_buffer_sizes)
3223 sendlengths.assign(this->
size(), mysize);
3226 this->
gather(root_id, mysize, sendlengths);
3231 for (
unsigned int i=0; i < this->
size(); ++i)
3233 displacements[i] = globalsize;
3234 globalsize += sendlengths[i];
3238 std::basic_string<T> r;
3239 if (this->
rank() == root_id)
3240 r.resize(globalsize, 0);
3242 timpi_assert_less(root_id, this->
size());
3246 (TIMPI_GATHERV(const_cast<T*>(sendval.data()),
3247 mysize, StandardType<T>(),
3248 this->
rank() == root_id ? &r[0] :
nullptr,
3249 sendlengths.data(), displacements.data(),
3250 StandardType<T>(), root_id, this->
get()));
3253 if (this->
rank() == root_id)
3254 for (
unsigned int i=0; i != this->
size(); ++i)
3255 recv[i] = r.substr(displacements[i], sendlengths[i]);
3263 template <
typename T,
typename A,
3264 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
3266 std::vector<T,A> & recv)
const 3268 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
3270 timpi_assert(this->
size());
3271 recv.resize(this->
size());
3273 const unsigned int comm_size = this->
size();
3276 StandardType<T> send_type(&sendval);
3279 (TIMPI_ALLGATHER(const_cast<T*>(&sendval), 1, send_type, recv.data(), 1,
3280 send_type, this->
get()));
3282 else if (comm_size > 0)
3286 template <
typename T,
typename A,
3287 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3289 std::vector<T,A> & recv)
const 3291 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
3293 timpi_assert(this->
size());
3294 recv.resize(this->
size());
3296 static const std::size_t approx_total_buffer_size = 1e8;
3297 const std::size_t approx_each_buffer_size =
3298 approx_total_buffer_size / this->
size();
3300 unsigned int comm_size = this->
size();
3303 std::vector<T> range = {sendval};
3306 approx_each_buffer_size);
3308 else if (comm_size > 0)
3312 template <
typename T,
typename A,
3313 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
3315 const bool identical_buffer_sizes)
const 3317 if (this->
size() < 2)
3320 TIMPI_LOG_SCOPE(
"allgather()",
"Parallel");
3322 if (identical_buffer_sizes)
3324 timpi_assert(this->
verify(r.size()));
3328 std::vector<T,A> r_src(r.size()*this->
size());
3330 StandardType<T> send_type(r_src.data());
3333 (TIMPI_ALLGATHER(r_src.data(), cast_int<CountType>(r_src.size()),
3334 send_type, r.data(), cast_int<CountType>(r_src.size()),
3335 send_type, this->
get()));
3340 std::vector<CountType>
3341 sendlengths (this->
size(), 0);
3342 std::vector<DispType>
3343 displacements(this->
size(), 0);
3345 const CountType mysize = cast_int<CountType>(r.size());
3351 for (
unsigned int i=0; i != this->
size(); ++i)
3353 displacements[i] = globalsize;
3354 globalsize += sendlengths[i];
3358 if (globalsize == 0)
3362 std::vector<T,A> r_src(globalsize);
3365 StandardType<T> send_type(r.data());
3370 (TIMPI_ALLGATHERV(r_src.empty() ? nullptr : r_src.data(), mysize,
3371 send_type, r.data(), sendlengths.data(),
3372 displacements.data(), send_type, this->
get()));
3375 template <
typename T,
typename A,
3376 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3378 const bool identical_buffer_sizes)
const 3380 if (this->
size() < 2)
3383 TIMPI_LOG_SCOPE(
"allgather()",
"Parallel");
3385 if (identical_buffer_sizes)
3387 timpi_assert(this->
verify(r.size()));
3392 std::vector<T,A> r_src(r.size()*this->
size());
3402 std::vector<CountType>
3403 sendlengths (this->
size(), 0);
3404 std::vector<DispType>
3405 displacements(this->
size(), 0);
3407 const CountType mysize = cast_int<CountType>(r.size());
3412 for (
unsigned int i=0; i != this->
size(); ++i)
3413 globalsize += sendlengths[i];
3416 if (globalsize == 0)
3420 std::vector<T,A> r_src(globalsize);
3429 template <
typename T,
typename A>
3431 const bool identical_buffer_sizes)
const 3433 if (this->
size() < 2)
3436 TIMPI_LOG_SCOPE(
"allgather()",
"Parallel");
3438 if (identical_buffer_sizes)
3440 timpi_assert(this->
verify(r.size()));
3450 std::vector<CountType> mystrlengths (r.size());
3451 std::vector<T> concat_src;
3454 for (std::size_t i=0; i != r.size(); ++i)
3456 CountType stringlen = cast_int<CountType>(r[i].size());
3457 mystrlengths[i] = stringlen;
3458 myconcatsize += stringlen;
3460 concat_src.reserve(myconcatsize);
3461 for (std::size_t i=0; i != r.size(); ++i)
3463 (concat_src.end(), r[i].begin(), r[i].end());
3466 std::vector<CountType> strlengths = mystrlengths;
3467 this->
allgather(strlengths, identical_buffer_sizes);
3470 r.resize(strlengths.size());
3473 std::vector<CountType> concat_sizes;
3474 this->
allgather(myconcatsize, concat_sizes);
3478 std::vector<DispType> displacements(this->
size(), 0);
3480 for (
unsigned int i=0; i != this->
size(); ++i)
3482 displacements[i] = globalsize;
3483 globalsize += concat_sizes[i];
3487 if (globalsize == 0)
3492 std::vector<T> concat(globalsize);
3496 StandardType<T> send_type(concat.data());
3499 (TIMPI_ALLGATHERV(concat_src.empty() ?
3500 nullptr : concat_src.data(), myconcatsize,
3501 send_type, concat.data(), concat_sizes.data(),
3502 displacements.data(), send_type, this->
get()));
3505 const T * begin = concat.data();
3506 for (std::size_t i=0; i != r.size(); ++i)
3508 const T * end = begin + strlengths[i];
3509 r[i].assign(begin, end);
3516 template <
typename T,
typename A>
3519 const unsigned int root_id)
const 3522 timpi_assert_less (root_id, this->
size());
3526 timpi_assert (this->
rank() != root_id || this->
size() == data.size());
3528 if (this->
size() == 1)
3530 timpi_assert (!this->
rank());
3531 timpi_assert (!root_id);
3536 TIMPI_LOG_SCOPE(
"scatter()",
"Parallel");
3538 T * data_ptr =
const_cast<T*
>(data.empty() ? nullptr : data.data());
3541 timpi_assert_less(root_id, this->
size());
3544 (TIMPI_SCATTER(data_ptr, 1, StandardType<T>(data_ptr),
3545 &recv, 1, StandardType<T>(&recv), root_id, this->
get()));
3550 template <
typename T,
typename A>
3552 std::vector<T,A> & recv,
3553 const unsigned int root_id)
const 3555 timpi_assert_less (root_id, this->
size());
3557 if (this->
size() == 1)
3559 timpi_assert (!this->
rank());
3560 timpi_assert (!root_id);
3561 recv.assign(data.begin(), data.end());
3565 TIMPI_LOG_SCOPE(
"scatter()",
"Parallel");
3567 std::size_t recv_buffer_size = 0;
3568 if (this->
rank() == root_id)
3570 timpi_assert(data.size() % this->
size() == 0);
3571 recv_buffer_size = cast_int<std::size_t>(data.size() / this->
size());
3575 recv.resize(recv_buffer_size);
3577 T * data_ptr =
const_cast<T*
>(data.empty() ? nullptr : data.data());
3578 T * recv_ptr = recv.empty() ? nullptr : recv.data();
3579 ignore(data_ptr, recv_ptr);
3581 timpi_assert_less(root_id, this->
size());
3584 (TIMPI_SCATTER(data_ptr, recv_buffer_size, StandardType<T>(data_ptr),
3585 recv_ptr, recv_buffer_size, StandardType<T>(recv_ptr),
3586 root_id, this->
get()));
3591 template <
typename T,
typename A1,
typename A2>
3593 const std::vector<CountType,A2> counts,
3594 std::vector<T,A1> & recv,
3595 const unsigned int root_id)
const 3597 timpi_assert_less (root_id, this->
size());
3599 if (this->
size() == 1)
3601 timpi_assert (!this->
rank());
3602 timpi_assert (!root_id);
3603 timpi_assert (counts.size() == this->
size());
3604 recv.assign(data.begin(), data.begin() + counts[0]);
3608 std::vector<DispType> displacements(this->
size(), 0);
3609 if (root_id == this->
rank())
3611 timpi_assert(counts.size() == this->
size());
3614 std::size_t globalsize = 0;
3615 for (
unsigned int i=0; i < this->
size(); ++i)
3617 displacements[i] = globalsize;
3618 globalsize += counts[i];
3621 timpi_assert(data.size() == globalsize);
3624 TIMPI_LOG_SCOPE(
"scatter()",
"Parallel");
3628 this->
scatter(counts, recv_buffer_size, root_id);
3629 recv.resize(recv_buffer_size);
3631 T * data_ptr =
const_cast<T*
>(data.empty() ? nullptr : data.data());
3632 CountType * count_ptr =
const_cast<CountType*
>(counts.empty() ? nullptr : counts.data());
3633 T * recv_ptr = recv.empty() ? nullptr : recv.data();
3634 ignore(data_ptr, count_ptr, recv_ptr);
3636 timpi_assert_less(root_id, this->
size());
3640 (TIMPI_SCATTERV(data_ptr, count_ptr, displacements.data(), StandardType<T>(data_ptr),
3641 recv_ptr, recv_buffer_size, StandardType<T>(recv_ptr), root_id, this->
get()));
3645 #ifdef TIMPI_HAVE_MPI 3650 template <
typename T,
typename A1,
typename A2>
3652 const std::vector<int,A2> counts,
3653 std::vector<T,A1> & recv,
3654 const unsigned int root_id)
const 3656 std::vector<CountType> full_counts(counts.begin(), counts.end());
3657 this->
scatter(data, full_counts, recv, root_id);
3664 template <
typename T,
typename A1,
typename A2>
3666 std::vector<T,A1> & recv,
3667 const unsigned int root_id,
3668 const bool identical_buffer_sizes)
const 3670 timpi_assert_less (root_id, this->
size());
3672 if (this->
size() == 1)
3674 timpi_assert (!this->
rank());
3675 timpi_assert (!root_id);
3676 timpi_assert (data.size() == this->
size());
3677 recv.assign(data[0].begin(), data[0].end());
3681 std::vector<T,A1> stacked_data;
3682 std::vector<CountType> counts;
3684 if (root_id == this->
rank())
3686 timpi_assert (data.size() == this->
size());
3688 if (!identical_buffer_sizes)
3689 counts.resize(this->
size());
3691 for (std::size_t i=0; i < data.size(); ++i)
3693 if (!identical_buffer_sizes)
3694 counts[i] = cast_int<CountType>(data[i].size());
3698 timpi_assert(!i || data[i-1].
size() == data[i].
size());
3700 std::copy(data[i].begin(), data[i].end(), std::back_inserter(stacked_data));
3704 if (identical_buffer_sizes)
3705 this->
scatter(stacked_data, recv, root_id);
3707 this->
scatter(stacked_data, counts, recv, root_id);
3712 template <
typename T,
typename A>
3715 if (this->
size() < 2 || buf.empty())
3718 TIMPI_LOG_SCOPE(
"alltoall()",
"Parallel");
3724 cast_int<CountType>(buf.size()/this->
size());
3727 timpi_assert_equal_to (buf.size()%this->
size(), 0);
3729 timpi_assert(this->
verify(size_per_proc));
3731 StandardType<T> send_type(buf.data());
3734 (TIMPI_ALLTOALL(MPI_IN_PLACE, size_per_proc, send_type, buf.data(),
3735 size_per_proc, send_type, this->
get()));
3740 template <
typename T
3741 #ifdef TIMPI_HAVE_MPI 3743 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type
3747 const unsigned int root_id,
3751 if (this->
size() == 1)
3753 timpi_assert (!this->
rank());
3754 timpi_assert (!root_id);
3758 timpi_assert_less (root_id, this->
size());
3760 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
3764 (TIMPI_BCAST(&data, 1, StandardType<T>(&data), root_id,
3768 #ifdef TIMPI_HAVE_MPI 3769 template <
typename T,
3770 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3772 const unsigned int root_id,
3776 if (this->
size() == 1)
3778 timpi_assert (!this->
rank());
3779 timpi_assert (!root_id);
3783 timpi_assert_less (root_id, this->
size());
3791 std::vector<T> range = {data};
3805 template <
typename T,
typename A,
3806 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
3808 const unsigned int root_id,
3809 const bool timpi_mpi_var(identical_sizes))
const 3812 if (this->
size() == 1)
3814 timpi_assert (!this->
rank());
3815 timpi_assert (!root_id);
3819 #ifdef TIMPI_HAVE_MPI 3821 timpi_assert_less (root_id, this->
size());
3822 timpi_assert (this->
verify(identical_sizes));
3824 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
3826 std::size_t data_size = data.size();
3828 if (identical_sizes)
3829 timpi_assert(this->
verify(data_size));
3833 data.resize(data_size);
3837 T * data_ptr = data.empty() ? nullptr : data.data();
3839 timpi_assert_less(root_id, this->
size());
3842 (TIMPI_BCAST(data_ptr, cast_int<CountType>(data.size()),
3843 StandardType<T>(data_ptr), root_id, this->
get()));
3847 template <
typename T,
typename A,
3848 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3850 const unsigned int root_id,
3851 const bool identical_sizes)
const 3853 if (this->
size() == 1)
3855 timpi_assert (!this->
rank());
3856 timpi_assert (!root_id);
3860 timpi_assert_less (root_id, this->
size());
3861 timpi_assert (this->
verify(identical_sizes));
3863 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
3865 std::size_t data_size = data.size();
3867 if (identical_sizes)
3868 timpi_assert(this->
verify(data_size));
3872 data.resize(data_size);
3874 timpi_assert_less(root_id, this->
size());
3884 template <
typename Map,
3885 typename std::enable_if<std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
3886 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value,
3889 const unsigned int root_id,
3890 const bool timpi_mpi_var(identical_sizes))
const 3893 if (this->
size() == 1)
3895 timpi_assert (!this->
rank());
3896 timpi_assert (!root_id);
3900 #ifdef TIMPI_HAVE_MPI 3901 timpi_assert_less (root_id, this->
size());
3902 timpi_assert (this->
verify(identical_sizes));
3904 TIMPI_LOG_SCOPE(
"broadcast(map)",
"Parallel");
3906 std::size_t data_size=data.size();
3907 if (identical_sizes)
3908 timpi_assert(this->
verify(data_size));
3912 std::vector<std::pair<
typename Map::key_type,
3913 typename Map::mapped_type>> comm_data;
3915 if (root_id == this->
rank())
3916 comm_data.assign(data.begin(), data.end());
3918 comm_data.resize(data_size);
3920 this->
broadcast(comm_data, root_id,
true);
3922 if (this->
rank() != root_id)
3925 data.insert(comm_data.begin(), comm_data.end());
3930 template <
typename Map,
3931 typename std::enable_if<!(std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
3932 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value),
3935 const unsigned int root_id,
3936 const bool timpi_mpi_var(identical_sizes))
const 3939 if (this->size() == 1)
3941 timpi_assert (!this->rank());
3942 timpi_assert (!root_id);
3946 #ifdef TIMPI_HAVE_MPI 3947 timpi_assert_less (root_id, this->size());
3948 timpi_assert (this->verify(identical_sizes));
3950 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
3952 std::size_t data_size=data.size();
3953 if (identical_sizes)
3954 timpi_assert(this->verify(data_size));
3956 this->broadcast(data_size, root_id);
3958 std::vector<typename Map::key_type> pair_first; pair_first.reserve(data_size);
3959 std::vector<typename Map::mapped_type> pair_second; pair_first.reserve(data_size);
3961 if (root_id == this->rank())
3963 for (
const auto & pr : data)
3965 pair_first.push_back(pr.first);
3966 pair_second.push_back(pr.second);
3971 pair_first.resize(data_size);
3972 pair_second.resize(data_size);
3976 (pair_first, root_id,
3979 (pair_second, root_id,
3982 timpi_assert(pair_first.size() == pair_first.size());
3984 if (this->rank() != root_id)
3987 for (std::size_t i=0; i<pair_first.size(); ++i)
3988 data[pair_first[i]] = pair_second[i];
3993 template <
typename T1,
typename T2,
typename C,
typename A>
3995 const unsigned int root_id,
3996 const bool identical_sizes)
const 4003 template <
typename K,
typename V,
typename H,
typename E,
typename A>
4005 const unsigned int root_id,
4006 const bool identical_sizes)
const 4011 template <
typename Context,
typename OutputContext,
4012 typename Iter,
typename OutputIter>
4015 const Iter range_end,
4016 OutputContext * context2,
4017 OutputIter out_iter,
4018 const unsigned int root_id,
4019 std::size_t approx_buffer_size)
const 4021 typedef typename std::iterator_traits<Iter>::value_type T;
4022 typedef typename Packing<T>::buffer_type buffer_t;
4024 if (this->
size() == 1)
4026 timpi_assert (!this->
rank());
4027 timpi_assert (!root_id);
4035 std::vector<buffer_t> buffer;
4037 if (this->
rank() == root_id)
4039 (context1, range_begin, range_end, buffer, approx_buffer_size);
4043 std::size_t buffer_size = buffer.size();
4050 buffer.resize(buffer_size);
4057 std::unique_ptr<OutputIter> next_out_iter =
4058 std::make_unique<OutputIter>(out_iter);
4060 if (this->
rank() != root_id)
4063 (buffer, context2, *next_out_iter, (T*)
nullptr);
4064 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
4070 template <
typename Context,
typename Iter,
typename OutputIter>
4074 const Iter range_end,
4075 OutputIter out_iter,
4076 std::size_t approx_buffer_size)
const 4078 typedef typename std::iterator_traits<Iter>::value_type T;
4079 typedef typename Packing<T>::buffer_type buffer_t;
4081 bool nonempty_range = (range_begin != range_end);
4082 this->
max(nonempty_range);
4086 std::unique_ptr<OutputIter> next_out_iter =
4087 std::make_unique<OutputIter>(out_iter);
4089 while (nonempty_range)
4093 std::vector<buffer_t> buffer;
4096 (context, range_begin, range_end, buffer, approx_buffer_size);
4098 this->
gather(root_id, buffer);
4101 (buffer, context, *next_out_iter, (T*)(
nullptr));
4102 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
4104 nonempty_range = (range_begin != range_end);
4105 this->
max(nonempty_range);
4110 template <
typename Context,
typename Iter,
typename OutputIter>
4113 const Iter range_end,
4114 OutputIter out_iter,
4115 std::size_t approx_buffer_size)
const 4117 typedef typename std::iterator_traits<Iter>::value_type T;
4118 typedef typename Packing<T>::buffer_type buffer_t;
4120 bool nonempty_range = (range_begin != range_end);
4121 this->
max(nonempty_range);
4125 std::unique_ptr<OutputIter> next_out_iter =
4126 std::make_unique<OutputIter>(out_iter);
4128 while (nonempty_range)
4132 std::vector<buffer_t> buffer;
4135 (context, range_begin, range_end, buffer, approx_buffer_size);
4139 timpi_assert(buffer.size());
4142 (buffer, context, *next_out_iter, (T*)
nullptr);
4143 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
4145 nonempty_range = (range_begin != range_end);
4146 this->
max(nonempty_range);
4152 template<
typename T>
4154 const MessageTag & tag,
4157 TIMPI_LOG_SCOPE(
"packed_range_probe()",
"Parallel");
4159 ignore(src_processor_id, tag);
4161 Status stat((StandardType<
typename Packing<T>::buffer_type>()));
4165 timpi_assert(src_processor_id < this->
size() ||
4168 timpi_call_mpi(MPI_Iprobe(
int(src_processor_id),
4181 template <
typename T,
typename A,
4182 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
4184 std::vector<T,A> & buf,
4186 const MessageTag & tag)
const 4188 T * dataptr = buf.empty() ? nullptr : buf.data();
4190 return this->
possibly_receive(src_processor_id, buf, StandardType<T>(dataptr), req, tag);
4193 template <
typename T,
typename A,
4194 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
4196 std::vector<T,A> & buf,
4198 const MessageTag & tag)
const 4210 template <
typename T,
typename A1,
typename A2>
4212 std::vector<std::vector<T,A1>,A2> & buf,
4214 const MessageTag & tag)
const 4216 T * dataptr = buf.empty() ? nullptr : (buf[0].empty() ? nullptr : buf[0].data());
4218 return this->
possibly_receive(src_processor_id, buf, StandardType<T>(dataptr), req, tag);
4222 template <
typename Context,
typename OutputIter,
typename T>
4228 const MessageTag & tag)
const 4230 TIMPI_LOG_SCOPE(
"possibly_receive_packed_range()",
"Parallel");
4234 auto stat = packed_range_probe<T>(src_processor_id, tag, int_flag);
4238 src_processor_id = stat.source();
4249 req.add_post_wait_work
4250 (
new PostWaitDereferenceTag(tag));
4253 timpi_assert(!int_flag || (int_flag &&
4254 src_processor_id < this->
size() &&
4263 #endif // TIMPI_PARALLEL_IMPLEMENTATION_H void gather_packed_range(const unsigned int root_id, Context *context, Iter range_begin, const Iter range_end, OutputIter out, std::size_t approx_buffer_size=1000000) const
Take a range of local variables, combine it with ranges from all processors, and write the output to ...
void add_prior_request(const Request &req)
data_type dataplusint_type< short int >()
MPI_Request request
Request object for non-blocking I/O.
void allgather(const T &send_data, std::vector< T, A > &recv_data) const
Take a vector of length this->size(), and fill in recv[processor_id] = the value of send on that proc...
void nonblocking_send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, Request &req, const MessageTag &tag=no_tag) const
Similar to the above Nonblocking send_packed_range with a few important differences: ...
void broadcast_packed_range(const Context *context1, Iter range_begin, const Iter range_end, OutputContext *context2, OutputIter out, const unsigned int root_id=0, std::size_t approx_buffer_size=1000000) const
Blocking-broadcast range-of-pointers to one processor.
OutputIter unpack_range(const std::vector< buffertype > &buffer, Context *context, OutputIter out_iter, const T *)
Helper function for range unpacking.
Status packed_range_probe(const unsigned int src_processor_id, const MessageTag &tag, bool &flag) const
Non-Blocking message probe for a packed range message.
void scatter(const std::vector< T, A > &data, T &recv, const unsigned int root_id=0) const
Take a vector of local variables and scatter the ith item to the ith processor in the communicator...
void map_max(Map &data) const
Private implementation function called by the map-based max() specializations.
void minloc(T &r, unsigned int &min_id) const
Take a local variable and replace it with the minimum of it's values on all processors, returning the minimum rank of a processor which originally held the minimum value.
Types combined with an int.
void gather(const unsigned int root_id, const T &send_data, std::vector< T, A > &recv) const
Take a vector of length comm.size(), and on processor root_id fill in recv[processor_id] = the value ...
const MessageTag any_tag
Default message tag ids.
void sum(T &r) const
Take a local variable and replace it with the sum of it's values on all processors.
void alltoall(std::vector< T, A > &r) const
Effectively transposes the input vector across all processors.
processor_id_type rank() const
Encapsulates the MPI_Datatype.
void allgather_packed_range(Context *context, Iter range_begin, const Iter range_end, OutputIter out, std::size_t approx_buffer_size=1000000) const
Take a range of local variables, combine it with ranges from all processors, and write the output to ...
void map_broadcast(Map &data, const unsigned int root_id, const bool identical_sizes) const
Private implementation function called by the map-based broadcast() specializations.
Templated class to provide the appropriate MPI datatype for use with built-in C types or simple C++ c...
void assign(const communicator &comm)
Utility function for setting our member variables from an MPI communicator.
data_type dataplusint_type< float >()
void ignore(const Args &...)
Define data types and (un)serialization functions for use when encoding a potentially-variable-size o...
data_type dataplusint_type()
Templated function to return the appropriate MPI datatype for use with built-in C types when combined...
void send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, const MessageTag &tag=no_tag, std::size_t approx_buffer_size=1000000) const
Blocking-send range-of-pointers to one processor.
void nonblocking_receive_packed_range(const unsigned int src_processor_id, Context *context, OutputIter out, const T *output_type, Request &req, Status &stat, const MessageTag &tag=any_tag) const
Non-Blocking-receive range-of-pointers from one processor.
const unsigned int any_source
Processor id meaning "Accept from any source".
bool possibly_receive(unsigned int &src_processor_id, std::vector< T, A > &buf, Request &req, const MessageTag &tag) const
Nonblocking-receive from one processor with user-defined type.
Encapsulates the MPI_Comm object.
processor_id_type size() const
std::size_t packed_size_of(const std::vector< std::vector< T, A1 >, A2 > &buf, const DataType &type) const
uint8_t processor_id_type
Status receive(const unsigned int dest_processor_id, T &buf, const MessageTag &tag=any_tag) const
Blocking-receive from one processor with data-defined type.
Encapsulates the MPI tag integers.
void min(const T &r, T &o, Request &req) const
Non-blocking minimum of the local value r into o with the request req.
void receive_packed_range(const unsigned int dest_processor_id, Context *context, OutputIter out, const T *output_type, const MessageTag &tag=any_tag) const
Blocking-receive range-of-pointers from one processor.
status probe(const unsigned int src_processor_id, const MessageTag &tag=any_tag) const
Blocking message probe.
data_type dataplusint_type< int >()
Iter pack_range(const Context *context, Iter range_begin, const Iter range_end, std::vector< buffertype > &buffer, std::size_t approx_buffer_size)
Helper function for range packing.
static const bool has_min_max
void maxloc(T &r, unsigned int &max_id) const
Take a local variable and replace it with the maximum of it's values on all processors, returning the minimum rank of a processor which originally held the maximum value.
void send_receive(const unsigned int dest_processor_id, const T1 &send_data, const unsigned int source_processor_id, T2 &recv_data, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag) const
Send data send to one processor while simultaneously receiving other data recv from a (potentially di...
bool possibly_receive_packed_range(unsigned int &src_processor_id, Context *context, OutputIter out, const T *output_type, Request &req, const MessageTag &tag) const
Nonblocking packed range receive from one processor with user-defined type.
timpi_pure bool semiverify(const T *r) const
Check whether a local pointer points to the same value on all processors where it is not null...
static const bool is_fixed_type
data_type dataplusint_type< long >()
void broadcast(T &data, const unsigned int root_id=0, const bool identical_sizes=false) const
Take a local value and broadcast it to all processors.
void add_post_wait_work(PostWaitWork *work)
StandardType<T>'s which do not define a way to MPI_Type T should inherit from this class...
data_type dataplusint_type< long double >()
CountType size(const data_type &type) const
void map_sum(Map &data) const
Private implementation function called by the map-based sum() specializations.
Encapsulates the MPI_Request.
timpi_pure bool verify(const T &r) const
Check whether a local variable has the same value on all processors, returning true if it does or fal...
void max(const T &r, T &o, Request &req) const
Non-blocking maximum of the local value r into o with the request req.
void send(const unsigned int dest_processor_id, const T &buf, const MessageTag &tag=no_tag) const
Blocking-send to one processor with data-defined type.
data_type dataplusint_type< double >()
SendMode send_mode() const
Gets the user-requested SendMode.
void send_receive_packed_range(const unsigned int dest_processor_id, const Context1 *context1, RangeIter send_begin, const RangeIter send_end, const unsigned int source_processor_id, Context2 *context2, OutputIter out, const T *output_type, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag, std::size_t approx_buffer_size=1000000) const
Send a range-of-pointers to one processor while simultaneously receiving another range from a (potent...
Encapsulates the MPI_Status struct.
std::size_t packed_range_size(const Context *context, Iter range_begin, const Iter range_end)
Helper function for range packing.
std::pair< data_type, std::unique_ptr< StandardType< std::pair< T, int > > > > dataplusint_type_acquire()
void set_union(T &data, const unsigned int root_id) const
Take a container (set, map, unordered_set, multimap, etc) of local variables on each processor...
Templated class to provide the appropriate MPI reduction operations for use with built-in C types or ...