19 #ifndef TIMPI_PARALLEL_IMPLEMENTATION_H 20 #define TIMPI_PARALLEL_IMPLEMENTATION_H 43 #ifndef TIMPI_HAVE_MPI 48 #ifdef TIMPI_DEFAULT_QUADRUPLE_PRECISION 49 # include <boost/multiprecision/float128.hpp> 54 #define TIMPI_LOG_SCOPE(f,c) 67 #include <type_traits> 83 #endif // TIMPI_HAVE_MPI 104 template <
typename T,
typename A1,
typename A2>
105 inline void pack_vector_bool(
const std::vector<bool,A1> & vec_in,
106 std::vector<T,A2> & vec_out)
108 unsigned int data_bits = 8*
sizeof(T);
109 std::size_t in_size = vec_in.size();
110 std::size_t out_size = in_size/data_bits + ((in_size%data_bits)?1:0);
112 vec_out.resize(out_size);
113 for (std::size_t i=0; i != in_size; ++i)
115 std::size_t index = i/data_bits;
116 std::size_t offset = i%data_bits;
117 vec_out[index] += (vec_in[i]?1u:0u) << offset;
124 template <
typename T,
typename A1,
typename A2>
125 inline void unpack_vector_bool(
const std::vector<T,A1> & vec_in,
126 std::vector<bool,A2> & vec_out)
128 unsigned int data_bits = 8*
sizeof(T);
130 std::size_t out_size = vec_out.size();
131 timpi_assert_equal_to
132 (out_size/data_bits + (out_size%data_bits?1:0), vec_in.size());
134 for (std::size_t i=0; i != out_size; ++i)
136 std::size_t index = i/data_bits;
137 std::size_t offset = i%data_bits;
138 vec_out[i] = (vec_in[index] >> offset) & 1;
143 #ifdef TIMPI_HAVE_MPI 146 template <
typename T1,
typename T2,
typename A1,
typename A2,
typename A3,
typename A4>
147 inline void send_receive_vec_of_vec(
const unsigned int dest_processor_id,
148 const std::vector<std::vector<T1,A1>,A2> & send_data,
149 const unsigned int source_processor_id,
150 std::vector<std::vector<T2,A3>,A4> & recv_data,
155 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
157 if (dest_processor_id == comm.
rank() &&
158 source_processor_id == comm.
rank())
160 recv_data = send_data;
165 comm.
send (dest_processor_id, send_data, req, send_tag);
166 comm.
receive (source_processor_id, recv_data, recv_tag);
170 #endif // TIMPI_HAVE_MPI 179 #ifdef TIMPI_HAVE_MPI 198 template <
typename T>
200 std::pair<data_type, std::unique_ptr<StandardType<std::pair<T,int>>>>
203 std::pair<data_type, std::unique_ptr<StandardType<std::pair<T,int>>>> return_val;
204 return_val.first = dataplusint_type<T>();
205 if (return_val.first == MPI_DATATYPE_NULL)
207 return_val.second.reset(
new StandardType<std::pair<T,int>>());
208 return_val.first = *return_val.second;
217 # define TIMPI_COUNT_TYPE MPI_COUNT 218 # define TIMPI_PACK_SIZE MPI_Pack_size_c 219 # define TIMPI_SEND MPI_Send_c 220 # define TIMPI_SSEND MPI_Ssend_c 221 # define TIMPI_ALLREDUCE MPI_Allreduce_c 222 # define TIMPI_IALLREDUCE MPI_Iallreduce_c 223 # define TIMPI_ISEND MPI_Isend_c 224 # define TIMPI_ISSEND MPI_Issend_c 225 # define TIMPI_PACK MPI_Pack_c 226 # define TIMPI_UNPACK MPI_Unpack_c 227 # define TIMPI_RECV MPI_Recv_c 228 # define TIMPI_IRECV MPI_Irecv_c 229 # define TIMPI_SENDRECV MPI_Sendrecv_c 230 # define TIMPI_ALLGATHERV MPI_Allgatherv_c 231 # define TIMPI_ALLGATHER MPI_Allgather_c 232 # define TIMPI_BCAST MPI_Bcast_c 233 # define TIMPI_GATHER MPI_Gather_c 234 # define TIMPI_GATHERV MPI_Gatherv_c 235 # define TIMPI_SCATTER MPI_Scatter_c 236 # define TIMPI_SCATTERV MPI_Scatterv_c 237 # define TIMPI_ALLTOALL MPI_Alltoall_c 240 # define TIMPI_COUNT_TYPE MPI_INT 241 # define TIMPI_PACK_SIZE MPI_Pack_size 242 # define TIMPI_SEND MPI_Send 243 # define TIMPI_SSEND MPI_Ssend 244 # define TIMPI_ALLREDUCE MPI_Allreduce 245 # define TIMPI_IALLREDUCE MPI_Iallreduce 246 # define TIMPI_ISEND MPI_Isend 247 # define TIMPI_ISSEND MPI_Issend 248 # define TIMPI_PACK MPI_Pack 249 # define TIMPI_UNPACK MPI_Unpack 250 # define TIMPI_RECV MPI_Recv 251 # define TIMPI_IRECV MPI_Irecv 252 # define TIMPI_SENDRECV MPI_Sendrecv 253 # define TIMPI_ALLGATHERV MPI_Allgatherv 254 # define TIMPI_ALLGATHER MPI_Allgather 255 # define TIMPI_BCAST MPI_Bcast 256 # define TIMPI_GATHER MPI_Gather 257 # define TIMPI_GATHERV MPI_Gatherv 258 # define TIMPI_SCATTER MPI_Scatter 259 # define TIMPI_SCATTERV MPI_Scatterv 260 # define TIMPI_ALLTOALL MPI_Alltoall 265 template <
typename T,
typename A1,
typename A2>
275 (TIMPI_PACK_SIZE (1, TIMPI_COUNT_TYPE, this->
get(), &packedsize));
277 std::size_t sendsize = packedsize;
279 const std::size_t n_vecs = buf.size();
281 for (std::size_t i = 0; i != n_vecs; ++i)
285 (TIMPI_PACK_SIZE (1, TIMPI_COUNT_TYPE, this->
get(), &packedsize));
287 sendsize += packedsize;
291 (TIMPI_PACK_SIZE (cast_int<CountType>(buf[i].
size()),
292 type, this->
get(), &packedsize));
294 sendsize += packedsize;
297 timpi_assert (sendsize );
304 const std::basic_string<T> & buf,
307 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
309 T * dataptr = buf.empty() ? nullptr :
const_cast<T *
>(buf.data());
311 timpi_assert_less(dest_processor_id, this->
size());
315 TIMPI_SSEND : TIMPI_SEND)
316 (dataptr, cast_int<CountType>(buf.size()),
323 template <
typename T>
325 const std::basic_string<T> & buf,
329 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
331 T * dataptr = buf.empty() ? nullptr :
const_cast<T *
>(buf.data());
333 timpi_assert_less(dest_processor_id, this->
size());
337 TIMPI_ISSEND : TIMPI_ISEND)
338 (dataptr, cast_int<CountType>(buf.size()),
340 this->
get(), req.
get()));
349 template <
typename T>
354 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
356 T * dataptr =
const_cast<T*
> (&buf);
358 timpi_assert_less(dest_processor_id, this->
size());
362 TIMPI_SSEND : TIMPI_SEND)
364 tag.
value(), this->
get()));
369 template <
typename T>
375 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
377 T * dataptr =
const_cast<T*
>(&buf);
379 timpi_assert_less(dest_processor_id, this->
size());
383 TIMPI_ISSEND : TIMPI_ISEND)
385 tag.
value(), this->
get(), req.
get()));
394 template <
typename T,
typename C,
typename A>
396 const std::set<T,C,A> & buf,
399 this->
send(dest_processor_id, buf,
405 template <
typename T,
typename C,
typename A>
407 const std::set<T,C,A> & buf,
409 const MessageTag & tag)
const 411 this->
send(dest_processor_id, buf,
412 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
417 template <
typename T,
typename C,
typename A>
419 const std::set<T,C,A> & buf,
420 const DataType & type,
421 const MessageTag & tag)
const 423 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
425 std::vector<T> vecbuf(buf.begin(), buf.end());
426 this->
send(dest_processor_id, vecbuf, type, tag);
431 template <
typename T,
typename C,
typename A>
433 const std::set<T,C,A> & buf,
434 const DataType & type,
436 const MessageTag & tag)
const 438 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
442 std::vector<T> * vecbuf =
443 new std::vector<T,A>(buf.begin(), buf.end());
446 req.add_post_wait_work
447 (
new PostWaitDeleteBuffer<std::vector<T,A>>(vecbuf));
449 this->
send(dest_processor_id, *vecbuf, type, req, tag);
454 template <
typename T,
typename A>
456 const std::vector<T,A> & buf,
457 const MessageTag & tag)
const 459 this->
send(dest_processor_id, buf,
460 StandardType<T>(buf.empty() ? nullptr : &buf.front()), tag);
465 template <
typename T,
typename A,
466 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
468 const std::vector<T,A> & buf,
470 const MessageTag & tag)
const 472 this->
send(dest_processor_id, buf,
473 StandardType<T>(buf.empty() ? nullptr : &buf.front()), req, tag);
476 template <
typename T,
typename A,
477 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
479 const std::vector<T,A> & buf,
481 const MessageTag & tag)
const 492 template <
typename T,
typename A>
494 const std::vector<T,A> & buf,
495 const DataType & type,
496 const MessageTag & tag)
const 498 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
502 TIMPI_SSEND : TIMPI_SEND)
503 (buf.empty() ? nullptr :
const_cast<T*
>(buf.data()),
504 cast_int<CountType>(buf.size()), type, dest_processor_id,
505 tag.value(), this->
get()));
510 template <
typename T,
typename A,
typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
512 const std::vector<T,A> & buf,
513 const DataType & type,
515 const MessageTag & tag)
const 517 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
519 timpi_assert_less(dest_processor_id, this->
size());
523 TIMPI_ISSEND : TIMPI_ISEND)
524 (buf.empty() ? nullptr :
const_cast<T*
>(buf.data()),
525 cast_int<CountType>(buf.size()), type, dest_processor_id,
526 tag.value(), this->
get(), req.get()));
529 req.add_post_wait_work
530 (
new PostWaitDereferenceTag(tag));
533 template <
typename T,
typename A,
typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
535 const std::vector<T,A> & buf,
536 const NotADataType &,
538 const MessageTag & tag)
const 540 TIMPI_LOG_SCOPE(
"send()",
"Parallel");
542 timpi_assert_less(dest_processor_id, this->
size());
554 template <
typename T,
typename A1,
typename A2>
556 const std::vector<std::vector<T,A1>,A2> & buf,
557 const MessageTag & tag)
const 559 this->
send(dest_processor_id, buf,
560 StandardType<T>((buf.empty() || buf.front().empty()) ?
561 nullptr : &(buf.front().front())), tag);
566 template <
typename T,
typename A1,
typename A2>
568 const std::vector<std::vector<T,A1>,A2> & buf,
570 const MessageTag & tag)
const 572 this->
send(dest_processor_id, buf,
573 StandardType<T>((buf.empty() || buf.front().empty()) ?
574 nullptr : &(buf.front().front())), req, tag);
579 template <
typename T,
typename A1,
typename A2>
581 const std::vector<std::vector<T,A1>,A2> & buf,
582 const DataType & type,
583 const MessageTag & tag)
const 588 this->
send(dest_processor_id, buf, type, req, tag);
594 template <
typename T,
typename A1,
typename A2>
596 const std::vector<std::vector<T,A1>,A2> & send_vecs,
597 const DataType & type,
599 const MessageTag & tag)
const 607 std::vector<char> * sendbuf =
new std::vector<char>(sendsize);
613 const std::size_t n_vecs = send_vecs.size();
614 const CountType mpi_n_vecs = cast_int<CountType>(n_vecs);
617 (TIMPI_PACK (&mpi_n_vecs, 1, TIMPI_COUNT_TYPE, sendbuf->data(),
618 sendsize, &pos, this->
get()));
620 for (std::size_t i = 0; i != n_vecs; ++i)
624 cast_int<CountType>(send_vecs[i].size());
627 (TIMPI_PACK (&subvec_size, 1, TIMPI_COUNT_TYPE,
628 sendbuf->data(), sendsize, &pos, this->
get()));
631 if (!send_vecs[i].empty())
633 (TIMPI_PACK (const_cast<T*>(send_vecs[i].data()),
634 subvec_size, type, sendbuf->data(), sendsize,
638 timpi_assert_equal_to (pos, sendsize);
640 req.add_post_wait_work
641 (
new PostWaitDeleteBuffer<std::vector<char>> (sendbuf));
643 this->
send (dest_processor_id, *sendbuf, MPI_PACKED, req, tag);
647 template <
typename Context,
typename Iter>
649 const Context * context,
651 const Iter range_end,
653 std::size_t approx_buffer_size)
const 657 typedef typename std::iterator_traits<Iter>::value_type T;
659 std::size_t total_buffer_size =
662 this->
send(dest_processor_id, total_buffer_size, tag);
665 std::size_t used_buffer_size = 0;
668 while (range_begin != range_end)
670 timpi_assert_greater (std::distance(range_begin, range_end), 0);
672 std::vector<typename Packing<T>::buffer_type> buffer;
675 (context, range_begin, range_end, buffer, approx_buffer_size);
677 timpi_assert_greater (std::distance(range_begin, next_range_begin), 0);
679 range_begin = next_range_begin;
682 used_buffer_size += buffer.size();
686 this->
send(dest_processor_id, buffer, tag);
690 timpi_assert_equal_to(used_buffer_size, total_buffer_size);
695 template <
typename Context,
typename Iter>
697 const Context * context,
699 const Iter range_end,
702 std::size_t approx_buffer_size)
const 706 typedef typename std::iterator_traits<Iter>::value_type T;
709 std::size_t total_buffer_size =
715 std::size_t * total_buffer_size_buffer =
new std::size_t;
716 *total_buffer_size_buffer = total_buffer_size;
722 this->
send(dest_processor_id, *total_buffer_size_buffer, intermediate_req, tag);
729 std::size_t used_buffer_size = 0;
732 while (range_begin != range_end)
734 timpi_assert_greater (std::distance(range_begin, range_end), 0);
736 std::vector<buffer_t> * buffer =
new std::vector<buffer_t>();
739 (context, range_begin, range_end, *buffer, approx_buffer_size);
741 timpi_assert_greater (std::distance(range_begin, next_range_begin), 0);
743 range_begin = next_range_begin;
746 used_buffer_size += buffer->size();
751 Request * my_req = (range_begin == range_end) ? &req : &next_intermediate_req;
759 this->
send(dest_processor_id, *buffer, *my_req, tag);
761 if (range_begin != range_end)
772 template <
typename Context,
typename Iter>
774 const Context * context,
776 const Iter range_end,
782 typedef typename std::iterator_traits<Iter>::value_type T;
785 if (range_begin != range_end)
787 std::vector<buffer_t> * buffer =
new std::vector<buffer_t>();
797 std::numeric_limits<CountType>::max());
799 if (range_begin != range_end)
800 timpi_error_msg(
"Non-blocking packed range sends cannot exceed " << std::numeric_limits<CountType>::max() <<
"in size");
808 this->
send(dest_processor_id, *buffer, req, tag);
813 template <
typename T>
815 std::basic_string<T> & buf,
818 std::vector<T> tempbuf;
822 buf.assign(tempbuf.begin(), tempbuf.end());
828 template <
typename T>
830 std::basic_string<T> & buf,
837 std::vector<T> * tempbuf =
new std::vector<T>();
845 std::back_insert_iterator<std::basic_string<T>>>
846 (tempbuf, std::back_inserter(buf)));
852 this->
receive(src_processor_id, tempbuf, req, tag);
857 template <
typename T>
862 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
868 timpi_assert(src_processor_id < this->
size() ||
873 tag.
value(), this->
get(), stat.
get()));
880 template <
typename T>
886 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
888 timpi_assert(src_processor_id < this->
size() ||
893 tag.
value(), this->
get(), req.
get()));
902 template <
typename T,
typename C,
typename A>
904 std::set<T,C,A> & buf,
908 (src_processor_id, buf,
919 template <
typename T,
typename C,
typename A>
921 std::set<T,C,A> & buf,
923 const MessageTag & tag)
const 925 this->
receive (src_processor_id, buf,
926 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
932 template <
typename T,
typename C,
typename A>
934 std::set<T,C,A> & buf,
935 const DataType & type,
936 const MessageTag & tag)
const 938 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
940 std::vector<T> vecbuf;
941 Status stat = this->
receive(src_processor_id, vecbuf, type, tag);
943 buf.insert(vecbuf.begin(), vecbuf.end());
955 template <
typename T,
typename C,
typename A>
957 std::set<T,C,A> & buf,
958 const DataType & type,
960 const MessageTag & tag)
const 962 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
966 std::vector<T> * vecbuf =
new std::vector<T>();
972 req.add_post_wait_work
973 (
new PostWaitCopyBuffer<std::vector<T>,
974 std::insert_iterator<std::set<T,C,A>>>
975 (*vecbuf, std::inserter(buf,buf.end())));
978 req.add_post_wait_work
979 (
new PostWaitDeleteBuffer<std::vector<T>>(vecbuf));
981 this->
receive(src_processor_id, *vecbuf, type, req, tag);
987 template <
typename T,
typename A>
989 std::vector<T,A> & buf,
990 const MessageTag & tag)
const 993 (src_processor_id, buf,
994 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), tag);
999 template <
typename T,
typename A>
1001 std::vector<T,A> & buf,
1003 const MessageTag & tag)
const 1005 this->
receive (src_processor_id, buf,
1006 StandardType<T>(buf.empty() ? nullptr : &(*buf.begin())), req, tag);
1011 template <
typename T,
typename A>
1013 std::vector<T,A> & buf,
1014 const DataType & type,
1015 const MessageTag & tag)
const 1017 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
1021 Status stat(this->
probe(src_processor_id, tag), type);
1023 buf.resize(stat.size());
1025 timpi_assert(src_processor_id < this->
size() ||
1032 (TIMPI_RECV (buf.empty() ? nullptr : buf.data(),
1033 cast_int<CountType>(buf.size()), type, stat.source(),
1034 stat.tag(), this->
get(), stat.get()));
1036 timpi_assert_equal_to (cast_int<std::size_t>(stat.size()),
1044 template <
typename T,
typename A,
1045 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
1047 std::vector<T,A> & buf,
1048 const DataType & type,
1049 const MessageTag & tag)
const 1054 stat = this->packed_range_probe<T>(src_processor_id, tag, flag);
1058 std::inserter(buf, buf.end()),
1059 type, req, stat, tag);
1066 template <
typename T,
typename A,
1067 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
1069 std::vector<T,A> & buf,
1070 const NotADataType &,
1071 const MessageTag & tag)
const 1076 stat = this->packed_range_probe<T>(src_processor_id, tag, flag);
1080 std::inserter(buf, buf.end()),
1081 buf.data(), req, stat, tag);
1088 template <
typename T,
typename A>
1090 std::vector<T,A> & buf,
1091 const DataType & type,
1093 const MessageTag & tag)
const 1095 TIMPI_LOG_SCOPE(
"receive()",
"Parallel");
1097 timpi_assert(src_processor_id < this->
size() ||
1101 (TIMPI_IRECV(buf.empty() ? nullptr : buf.data(),
1102 cast_int<CountType>(buf.size()), type, src_processor_id,
1103 tag.value(), this->
get(), req.get()));
1106 req.add_post_wait_work
1107 (
new PostWaitDereferenceTag(tag));
1112 template <
typename T,
typename A1,
typename A2>
1114 std::vector<std::vector<T,A1>,A2> & buf,
1115 const MessageTag & tag)
const 1118 (src_processor_id, buf,
1119 StandardType<T>((buf.empty() || buf.front().empty()) ?
1120 nullptr : &(buf.front().front())), tag);
1125 template <
typename T,
typename A1,
typename A2>
1127 std::vector<std::vector<T,A1>,A2> & buf,
1129 const MessageTag & tag)
const 1131 this->
receive (src_processor_id, buf,
1132 StandardType<T>((buf.empty() || buf.front().empty()) ?
1133 nullptr : &(buf.front().front())), req, tag);
1138 template <
typename T,
typename A1,
typename A2>
1140 std::vector<std::vector<T,A1>,A2> & recv,
1141 const DataType & type,
1142 const MessageTag & tag)
const 1146 std::vector<char> recvbuf;
1148 Status stat = this->
receive (src_processor_id, recvbuf, MPI_PACKED, tag);
1151 timpi_assert (!recvbuf.empty());
1154 CountType bufsize = cast_int<CountType>(recvbuf.size());
1157 (TIMPI_UNPACK(recvbuf.data(), bufsize, &pos, &recvsize, 1,
1158 TIMPI_COUNT_TYPE, this->
get()));
1161 recv.resize (recvsize);
1163 const std::size_t n_vecs = recvsize;
1164 for (std::size_t i = 0; i != n_vecs; ++i)
1169 (TIMPI_UNPACK (recvbuf.data(), bufsize, &pos, &subvec_size, 1,
1170 TIMPI_COUNT_TYPE, this->
get()));
1173 recv[i].resize (subvec_size);
1176 if (!recv[i].empty())
1178 (TIMPI_UNPACK(recvbuf.data(), bufsize, &pos, recv[i].data(),
1179 subvec_size, type, this->
get()));
1187 template <
typename T,
typename A1,
typename A2>
1189 std::vector<std::vector<T,A1>,A2> & buf,
1190 const DataType & type,
1192 const MessageTag & tag)
const 1201 std::vector<char> * recvbuf =
new std::vector<char>(sendsize);
1204 this->
receive (src_processor_id, *recvbuf, MPI_PACKED, req, tag);
1207 req.add_post_wait_work
1208 (
new PostWaitUnpackNestedBuffer<std::vector<std::vector<T,A1>,A2>>
1209 (*recvbuf, buf, type, *
this));
1212 req.add_post_wait_work
1213 (
new PostWaitDeleteBuffer<std::vector<char>>(recvbuf));
1216 req.add_post_wait_work
1217 (
new PostWaitDereferenceTag(tag));
1221 template <
typename Context,
typename OutputIter,
typename T>
1224 OutputIter out_iter,
1225 const T * output_type,
1231 std::size_t total_buffer_size = 0;
1232 Status stat = this->
receive(src_processor_id, total_buffer_size, tag);
1238 std::size_t received_buffer_size = 0;
1242 std::unique_ptr<OutputIter> next_out_iter =
1243 std::make_unique<OutputIter>(out_iter);
1245 while (received_buffer_size < total_buffer_size)
1247 std::vector<buffer_t> buffer;
1249 received_buffer_size += buffer.size();
1251 (buffer, context, *next_out_iter, output_type);
1252 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
1285 template <
typename Context,
typename OutputIter,
typename T>
1300 std::vector<buffer_t> * buffer =
new std::vector<buffer_t>(stat.
size());
1301 this->
receive(src_processor_id, *buffer, req, tag);
1305 (
new PostWaitUnpackBuffer<std::vector<buffer_t>, Context, OutputIter, T>(*buffer, context, out));
1318 template <
typename T1,
typename T2,
typename A1,
typename A2>
1320 const std::vector<T1,A1> & sendvec,
1322 const unsigned int source_processor_id,
1323 std::vector<T2,A2> & recv,
1328 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
1330 if (dest_processor_id == this->
rank() &&
1331 source_processor_id == this->
rank())
1339 this->
send (dest_processor_id, sendvec, type1, req, send_tag);
1341 this->
receive (source_processor_id, recv, type2, recv_tag);
1347 template <
typename T1,
typename T2,
typename A1,
typename A2,
1348 typename std::enable_if<Has_buffer_type<Packing<T1>>::value &&
1349 Has_buffer_type<Packing<T2>>::value,
int>::type>
1353 const std::vector<T1,A1> & send_data,
1354 const unsigned int source_processor_id,
1355 std::vector<T2,A2> &recv_data,
1356 const MessageTag &send_tag,
1357 const MessageTag &recv_tag)
const 1360 send_data.begin(), send_data.end(),
1361 source_processor_id, (
void *)(
nullptr),
1362 std::back_inserter(recv_data),
1363 (
const T2 *)(
nullptr),
1364 send_tag, recv_tag);
1368 template <
typename T,
typename A,
1369 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
1373 const std::vector<T,A> & send_data,
1374 const unsigned int source_processor_id,
1375 std::vector<T,A> &recv_data,
1380 send_data.begin(), send_data.end(),
1381 source_processor_id, (
void *)(
nullptr),
1382 std::back_inserter(recv_data),
1383 (
const T *)(
nullptr),
1384 send_tag, recv_tag);
1389 template <
typename T1,
typename T2,
1390 typename std::enable_if<std::is_base_of<DataType, StandardType<T1>>::value &&
1391 std::is_base_of<DataType, StandardType<T2>>::value,
1395 const unsigned int source_processor_id,
1400 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
1402 if (dest_processor_id == this->
rank() &&
1403 source_processor_id == this->
rank())
1409 timpi_assert_less(dest_processor_id, this->
size());
1410 timpi_assert(source_processor_id < this->
size() ||
1418 dest_processor_id, send_tag.
value(), &recv, 1,
1420 recv_tag.
value(), this->
get(), MPI_STATUS_IGNORE));
1432 template <
typename T,
typename A,
1433 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
1436 const std::vector<T,A> & sendvec,
1437 const unsigned int source_processor_id,
1438 std::vector<T,A> & recv,
1442 if (dest_processor_id == this->
rank() &&
1443 source_processor_id == this->
rank())
1445 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
1450 const T* example = sendvec.empty() ?
1451 (recv.empty() ? nullptr : recv.data()) : sendvec.data();
1456 StandardType<T>(example),
1457 source_processor_id, recv,
1458 StandardType<T>(example),
1459 send_tag, recv_tag);
1463 template <
typename T1,
typename T2,
typename A1,
typename A2,
1464 typename std::enable_if<std::is_base_of<DataType, StandardType<T1>>::value &&
1465 std::is_base_of<DataType, StandardType<T2>>::value,
1468 const std::vector<T1,A1> & sendvec,
1469 const unsigned int source_processor_id,
1470 std::vector<T2,A2> & recv,
1471 const MessageTag & send_tag,
1472 const MessageTag & recv_tag)
const 1477 StandardType<T1>(sendvec.empty() ? nullptr : sendvec.data()),
1478 source_processor_id, recv,
1479 StandardType<T2>(recv.empty() ? nullptr : recv.data()),
1480 send_tag, recv_tag);
1486 template <
typename T1,
typename T2,
typename A1,
typename A2,
typename A3,
typename A4>
1488 const std::vector<std::vector<T1,A1>,A2> & sendvec,
1489 const unsigned int source_processor_id,
1490 std::vector<std::vector<T2,A3>,A4> & recv,
1491 const MessageTag & ,
1492 const MessageTag & )
const 1495 send_receive_vec_of_vec
1496 (dest_processor_id, sendvec, source_processor_id, recv,
1504 template <
typename T,
typename A1,
typename A2>
1506 const std::vector<std::vector<T,A1>,A2> & sendvec,
1507 const unsigned int source_processor_id,
1508 std::vector<std::vector<T,A1>,A2> & recv,
1512 send_receive_vec_of_vec
1513 (dest_processor_id, sendvec, source_processor_id, recv,
1514 send_tag, recv_tag, *
this);
1520 template <
typename Context1,
typename RangeIter,
typename Context2,
1521 typename OutputIter,
typename T>
1524 const Context1 * context1,
1525 RangeIter send_begin,
1526 const RangeIter send_end,
1527 const unsigned int source_processor_id,
1528 Context2 * context2,
1529 OutputIter out_iter,
1530 const T * output_type,
1533 std::size_t approx_buffer_size)
const 1535 TIMPI_LOG_SCOPE(
"send_receive()",
"Parallel");
1537 timpi_assert_equal_to
1538 ((dest_processor_id == this->
rank()),
1539 (source_processor_id == this->
rank()));
1541 if (dest_processor_id == this->
rank() &&
1542 source_processor_id == this->
rank())
1550 std::unique_ptr<OutputIter> next_out_iter =
1551 std::make_unique<OutputIter>(out_iter);
1554 while (send_begin != send_end)
1556 std::vector<buffer_t> buffer;
1558 (context1, send_begin, send_end, buffer, approx_buffer_size);
1560 (buffer, context2, *next_out_iter, output_type);
1561 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
1569 req, send_tag, approx_buffer_size);
1572 output_type, recv_tag);
1579 template <
typename Context,
typename Iter>
1581 const Context * context,
1583 const Iter range_end,
1585 std::shared_ptr<std::vector<
typename Packing<
typename std::iterator_traits<Iter>::value_type>::buffer_type>> & buffer,
1590 typedef typename std::iterator_traits<Iter>::value_type T;
1593 if (range_begin != range_end)
1595 if (buffer ==
nullptr)
1596 buffer = std::make_shared<std::vector<buffer_t>>();
1608 std::numeric_limits<CountType>::max());
1610 if (range_begin != range_end)
1611 timpi_error_msg(
"Non-blocking packed range sends cannot exceed " << std::numeric_limits<CountType>::max() <<
"in size");
1618 this->
send(dest_processor_id, *buffer, req, tag);
1624 template <
typename T,
typename A>
1626 std::vector<std::basic_string<T>,A> & recv,
1627 const bool identical_buffer_sizes)
const 1629 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
1631 timpi_assert(this->
size());
1632 recv.assign(this->
size(),
"");
1635 if (this->
size() < 2)
1642 std::vector<CountType>
1643 sendlengths (this->
size(), 0);
1644 std::vector<DispType>
1645 displacements(this->
size(), 0);
1647 const CountType mysize = cast_int<CountType>(sendval.size());
1649 if (identical_buffer_sizes)
1650 sendlengths.assign(this->
size(), mysize);
1658 for (
unsigned int i=0; i != this->
size(); ++i)
1660 displacements[i] = globalsize;
1661 globalsize += sendlengths[i];
1665 if (globalsize == 0)
1669 std::basic_string<T> r(globalsize, 0);
1673 (TIMPI_ALLGATHERV(const_cast<T*>(mysize ? sendval.data() :
nullptr),
1675 &r[0], sendlengths.data(), displacements.data(),
1679 for (
unsigned int i=0; i != this->
size(); ++i)
1680 recv[i] = r.substr(displacements[i], sendlengths[i]);
1686 const unsigned int root_id,
1689 if (this->
size() == 1)
1691 timpi_assert (!this->
rank());
1692 timpi_assert (!root_id);
1696 timpi_assert_less (root_id, this->
size());
1698 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1702 char char_data = data;
1704 timpi_assert_less(root_id, this->
size());
1708 (TIMPI_BCAST (&char_data, 1, StandardType<char>(&char_data),
1709 root_id, this->
get()));
1715 template <
typename T>
1717 const unsigned int root_id,
1718 const bool identical_sizes)
const 1720 if (this->
size() == 1)
1722 timpi_assert (!this->
rank());
1723 timpi_assert (!root_id);
1727 timpi_assert_less (root_id, this->
size());
1728 timpi_assert (this->
verify(identical_sizes));
1730 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1732 std::size_t data_size = data.size();
1734 if (identical_sizes)
1735 timpi_assert(this->
verify(data_size));
1739 std::vector<T> data_c(data_size);
1741 std::basic_string<T> orig(data);
1744 if (this->
rank() == root_id)
1745 for (std::size_t i=0; i<data.size(); i++)
1746 data_c[i] = data[i];
1750 data.assign(data_c.begin(), data_c.end());
1753 if (this->
rank() == root_id)
1754 timpi_assert_equal_to (data, orig);
1759 template <
typename T,
typename A>
1761 const unsigned int root_id,
1762 const bool identical_sizes)
const 1764 if (this->
size() == 1)
1766 timpi_assert (!this->
rank());
1767 timpi_assert (!root_id);
1771 timpi_assert_less (root_id, this->
size());
1772 timpi_assert (this->
verify(identical_sizes));
1774 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1776 std::size_t bufsize=0;
1777 if (root_id == this->
rank() || identical_sizes)
1779 for (std::size_t i=0; i<data.size(); ++i)
1780 bufsize += data[i].
size() + 1;
1783 if (identical_sizes)
1784 timpi_assert(this->
verify(bufsize));
1789 std::vector<unsigned int> temp; temp.reserve(bufsize);
1791 if (root_id == this->
rank())
1793 for (std::size_t i=0; i<data.size(); ++i)
1795 temp.push_back(cast_int<unsigned int>(data[i].
size()));
1796 for (std::size_t j=0; j != data[i].size(); ++j)
1801 temp.push_back(data[i][j]);
1805 temp.resize(bufsize);
1811 if (root_id != this->
rank())
1814 typename std::vector<unsigned int>::const_iterator iter = temp.begin();
1815 while (iter != temp.end())
1817 std::size_t curr_len = *iter++;
1818 data.push_back(std::basic_string<T>(iter, iter+curr_len));
1826 template <
typename T,
typename A1,
typename A2>
1828 const unsigned int root_id,
1829 const bool identical_sizes)
const 1831 if (this->
size() == 1)
1833 timpi_assert (!this->
rank());
1834 timpi_assert (!root_id);
1838 timpi_assert_less (root_id, this->
size());
1839 timpi_assert (this->
verify(identical_sizes));
1841 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1843 std::size_t size_sizes = data.size();
1844 if (identical_sizes)
1845 timpi_assert(this->
verify(size_sizes));
1848 std::vector<std::size_t> sizes(size_sizes);
1850 if (root_id == this->
rank() || identical_sizes)
1851 for (std::size_t i=0; i<size_sizes; ++i)
1852 sizes[i] = data[i].
size();
1854 if (identical_sizes)
1855 timpi_assert(this->
verify(sizes));
1859 std::size_t bufsize = 0;
1860 for (std::size_t i=0; i<size_sizes; ++i)
1861 bufsize += sizes[i];
1863 std::vector<T> temp; temp.reserve(bufsize);
1865 if (root_id == this->
rank())
1868 for (std::size_t i=0; i<size_sizes; ++i)
1869 temp.insert(temp.end(), data[i].begin(), data[i].end());
1872 temp.resize(bufsize);
1878 if (root_id != this->
rank())
1881 data.resize(size_sizes);
1882 typename std::vector<T>::const_iterator iter = temp.begin();
1883 for (std::size_t i=0; i<size_sizes; ++i)
1885 data[i].insert(data[i].end(), iter, iter+sizes[i]);
1894 template <
typename T,
typename C,
typename A>
1896 const unsigned int root_id,
1897 const bool identical_sizes)
const 1899 if (this->
size() == 1)
1901 timpi_assert (!this->
rank());
1902 timpi_assert (!root_id);
1906 timpi_assert_less (root_id, this->
size());
1907 timpi_assert (this->
verify(identical_sizes));
1909 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
1911 std::vector<T> vecdata;
1912 if (this->
rank() == root_id)
1913 vecdata.assign(data.begin(), data.end());
1915 std::size_t vecsize = vecdata.size();
1916 if (identical_sizes)
1917 timpi_assert(this->
verify(vecsize));
1920 if (this->
rank() != root_id)
1921 vecdata.resize(vecsize);
1924 if (this->
rank() != root_id)
1927 data.insert(vecdata.begin(), vecdata.end());
1932 template <
typename Context,
typename OutputIter,
typename T>
1943 if (buffer ==
nullptr)
1944 buffer = std::make_shared<std::vector<typename Packing<T>::buffer_type>>();
1952 buffer->resize(stat.
size());
1953 this->
receive(src_processor_id, *buffer, req, tag);
1966 template <
typename T,
typename A,
typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
1968 std::vector<T,A> & buf,
1973 TIMPI_LOG_SCOPE(
"possibly_receive()",
"Parallel");
1979 timpi_assert(src_processor_id < this->
size() ||
1982 timpi_call_mpi(MPI_Iprobe(
int(src_processor_id),
1990 buf.resize(stat.
size());
1992 src_processor_id = stat.
source();
1995 (TIMPI_IRECV(buf.data(), cast_int<CountType>(buf.size()), type,
1996 src_processor_id, tag.
value(), this->
get(),
2007 template <
typename T,
typename A,
typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
2009 std::vector<T,A> & buf,
2014 TIMPI_LOG_SCOPE(
"possibly_receive()",
"Parallel");
2018 std::inserter(buf, buf.end()),
2026 template <
typename T,
typename A1,
typename A2>
2028 std::vector<std::vector<T,A1>,A2> & buf,
2033 TIMPI_LOG_SCOPE(
"possibly_receive()",
"Parallel");
2039 timpi_assert(src_processor_id < this->
size() ||
2042 timpi_call_mpi(MPI_Iprobe(
int(src_processor_id),
2050 src_processor_id = stat.
source();
2052 std::vector<char> * recvbuf =
2055 this->
receive(src_processor_id, *recvbuf, MPI_PACKED, req, tag);
2060 (*recvbuf, buf, type, *
this));
2076 #endif // TIMPI_HAVE_MPI 2087 template <
typename T>
2092 T tempmin = r, tempmax = r;
2095 bool verified = (r == tempmin) &&
2097 this->
min(verified);
2102 "Tried to verify an unverifiable type");
2107 template <
typename T>
2114 tempmin = tempmax = *r;
2122 bool invalid = r && ((*r != tempmin) ||
2129 "Tried to semiverify an unverifiable type");
2136 template <
typename T,
typename A>
2141 std::size_t rsize = r ? r->size() : 0;
2142 std::size_t * psize = r ? &rsize :
nullptr;
2149 std::vector<T,A> tempmin, tempmax;
2152 tempmin = tempmax = *r;
2156 tempmin.resize(rsize);
2157 tempmax.resize(rsize);
2158 Attributes<std::vector<T,A>>::set_highest(tempmin);
2159 Attributes<std::vector<T,A>>::set_lowest(tempmax);
2163 bool invalid = r && ((*r != tempmin) ||
2170 "Tried to semiverify a vector of an unverifiable type");
2178 template <
typename T>
2183 if (this->
size() > 1)
2185 TIMPI_LOG_SCOPE(
"min()",
"Parallel");
2201 template <
typename T>
2204 if (this->
size() > 1)
2206 TIMPI_LOG_SCOPE(
"min(scalar)",
"Parallel");
2209 (TIMPI_ALLREDUCE(MPI_IN_PLACE, &r, 1,
2217 template <
typename T,
typename A>
2220 if (this->
size() > 1 && !r.empty())
2222 TIMPI_LOG_SCOPE(
"min(vector)",
"Parallel");
2224 timpi_assert(this->
verify(r.size()));
2228 (MPI_IN_PLACE, r.data(), cast_int<CountType>(r.size()),
2235 template <
typename A>
2238 if (this->
size() > 1 && !r.empty())
2240 TIMPI_LOG_SCOPE(
"min(vector<bool>)",
"Parallel");
2242 timpi_assert(this->
verify(r.size()));
2244 std::vector<unsigned int> ruint;
2245 pack_vector_bool(r, ruint);
2246 std::vector<unsigned int> temp(ruint.size());
2249 (ruint.data(), temp.data(),
2250 cast_int<CountType>(ruint.size()),
2252 unpack_vector_bool(temp, r);
2257 template <
typename T>
2259 unsigned int & min_id)
const 2261 if (this->
size() > 1)
2263 TIMPI_LOG_SCOPE(
"minloc(scalar)",
"Parallel");
2271 (TIMPI_ALLREDUCE (MPI_IN_PLACE, &data_in, 1,
2272 dataplusint_type_acquire<T>().first,
2275 min_id = data_in.
rank;
2278 min_id = this->
rank();
2282 template <
typename T,
typename A1,
typename A2>
2284 std::vector<unsigned int,A2> & min_id)
const 2286 if (this->
size() > 1 && !r.empty())
2288 TIMPI_LOG_SCOPE(
"minloc(vector)",
"Parallel");
2290 timpi_assert(this->
verify(r.size()));
2292 std::vector<DataPlusInt<T>> data_in(r.size());
2293 for (std::size_t i=0; i != r.size(); ++i)
2295 data_in[i].val = r[i];
2296 data_in[i].rank = this->
rank();
2298 std::vector<DataPlusInt<T>> data_out(r.size());
2301 (TIMPI_ALLREDUCE (data_in.data(), data_out.data(),
2302 cast_int<CountType>(r.size()),
2303 dataplusint_type_acquire<T>().first,
2305 for (std::size_t i=0; i != r.size(); ++i)
2307 r[i] = data_out[i].val;
2308 min_id[i] = data_out[i].rank;
2311 else if (!r.empty())
2313 for (std::size_t i=0; i != r.size(); ++i)
2314 min_id[i] = this->
rank();
2319 template <
typename A1,
typename A2>
2321 std::vector<unsigned int,A2> & min_id)
const 2323 if (this->
size() > 1 && !r.empty())
2325 TIMPI_LOG_SCOPE(
"minloc(vector<bool>)",
"Parallel");
2327 timpi_assert(this->
verify(r.size()));
2329 std::vector<DataPlusInt<int>> data_in(r.size());
2330 for (std::size_t i=0; i != r.size(); ++i)
2332 data_in[i].val = r[i];
2333 data_in[i].rank = this->
rank();
2335 std::vector<DataPlusInt<int>> data_out(r.size());
2338 (data_in.data(), data_out.data(),
2341 for (std::size_t i=0; i != r.size(); ++i)
2343 r[i] = data_out[i].val;
2344 min_id[i] = data_out[i].rank;
2347 else if (!r.empty())
2349 for (std::size_t i=0; i != r.size(); ++i)
2350 min_id[i] = this->
rank();
2355 template <
typename T>
2360 if (this->
size() > 1)
2362 TIMPI_LOG_SCOPE(
"max()",
"Parallel");
2377 template <
typename T>
2380 if (this->
size() > 1)
2382 TIMPI_LOG_SCOPE(
"max(scalar)",
"Parallel");
2391 template <
typename T,
typename A>
2394 if (this->
size() > 1 && !r.empty())
2396 TIMPI_LOG_SCOPE(
"max(vector)",
"Parallel");
2398 timpi_assert(this->
verify(r.size()));
2401 (TIMPI_ALLREDUCE (MPI_IN_PLACE, r.data(),
2402 cast_int<CountType>(r.size()),
2409 template <
typename A>
2412 if (this->
size() > 1 && !r.empty())
2414 TIMPI_LOG_SCOPE(
"max(vector<bool>)",
"Parallel");
2416 timpi_assert(this->
verify(r.size()));
2418 std::vector<unsigned int> ruint;
2419 pack_vector_bool(r, ruint);
2420 std::vector<unsigned int> temp(ruint.size());
2422 (TIMPI_ALLREDUCE (ruint.data(), temp.data(),
2423 cast_int<CountType>(ruint.size()),
2426 unpack_vector_bool(temp, r);
2432 template <
typename Map,
2433 typename std::enable_if<std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
2434 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value,
2438 if (this->
size() > 1)
2440 TIMPI_LOG_SCOPE(
"max(map)",
"Parallel");
2448 std::vector<std::pair<typename Map::key_type, typename Map::mapped_type>>
2449 vecdata(data.begin(), data.end());
2455 for (
const auto & pr : vecdata)
2460 auto result = data.insert(pr);
2462 bool inserted = result.second;
2466 auto it = result.first;
2467 it->second = std::max(it->second, pr.second);
2475 template <
typename Map,
2476 typename std::enable_if<!(std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
2477 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value),
2481 if (this->size() > 1)
2483 TIMPI_LOG_SCOPE(
"max(map)",
"Parallel");
2491 std::vector<typename Map::key_type> keys;
2492 std::vector<typename Map::mapped_type> vals;
2494 auto data_size = data.size();
2495 keys.reserve(data_size);
2496 vals.reserve(data_size);
2498 for (
const auto & pr : data)
2500 keys.push_back(pr.first);
2501 vals.push_back(pr.second);
2504 this->allgather(keys,
false);
2505 this->allgather(vals,
false);
2509 for (std::size_t i=0; i<keys.size(); ++i)
2514 auto pr = data.emplace(keys[i], vals[i]);
2516 bool emplaced = pr.second;
2521 it->second = std::max(it->second, vals[i]);
2529 template <
typename K,
typename V,
typename C,
typename A>
2538 template <
typename K,
typename V,
typename H,
typename E,
typename A>
2547 template <
typename T>
2549 unsigned int & max_id)
const 2551 if (this->
size() > 1)
2553 TIMPI_LOG_SCOPE(
"maxloc(scalar)",
"Parallel");
2561 (TIMPI_ALLREDUCE (MPI_IN_PLACE, &data_in, 1,
2562 dataplusint_type_acquire<T>().first,
2565 max_id = data_in.
rank;
2568 max_id = this->
rank();
2572 template <
typename T,
typename A1,
typename A2>
2574 std::vector<unsigned int,A2> & max_id)
const 2576 if (this->
size() > 1 && !r.empty())
2578 TIMPI_LOG_SCOPE(
"maxloc(vector)",
"Parallel");
2580 timpi_assert(this->
verify(r.size()));
2582 std::vector<DataPlusInt<T>> data_in(r.size());
2583 for (std::size_t i=0; i != r.size(); ++i)
2585 data_in[i].val = r[i];
2586 data_in[i].rank = this->
rank();
2588 std::vector<DataPlusInt<T>> data_out(r.size());
2591 (TIMPI_ALLREDUCE(data_in.data(), data_out.data(),
2592 cast_int<CountType>(r.size()),
2593 dataplusint_type_acquire<T>().first,
2596 for (std::size_t i=0; i != r.size(); ++i)
2598 r[i] = data_out[i].val;
2599 max_id[i] = data_out[i].rank;
2602 else if (!r.empty())
2604 for (std::size_t i=0; i != r.size(); ++i)
2605 max_id[i] = this->
rank();
2610 template <
typename A1,
typename A2>
2612 std::vector<unsigned int,A2> & max_id)
const 2614 if (this->
size() > 1 && !r.empty())
2616 TIMPI_LOG_SCOPE(
"maxloc(vector<bool>)",
"Parallel");
2618 timpi_assert(this->
verify(r.size()));
2620 std::vector<DataPlusInt<int>> data_in(r.size());
2621 for (std::size_t i=0; i != r.size(); ++i)
2623 data_in[i].val = r[i];
2624 data_in[i].rank = this->
rank();
2626 std::vector<DataPlusInt<int>> data_out(r.size());
2628 (TIMPI_ALLREDUCE(data_in.data(), data_out.data(),
2629 cast_int<CountType>(r.size()),
2633 for (std::size_t i=0; i != r.size(); ++i)
2635 r[i] = data_out[i].val;
2636 max_id[i] = data_out[i].rank;
2639 else if (!r.empty())
2641 for (std::size_t i=0; i != r.size(); ++i)
2642 max_id[i] = this->
rank();
2647 template <
typename T>
2652 #ifdef TIMPI_HAVE_MPI 2653 if (this->
size() > 1)
2655 TIMPI_LOG_SCOPE(
"sum()",
"Parallel");
2671 template <
typename T>
2674 if (this->
size() > 1)
2676 TIMPI_LOG_SCOPE(
"sum()",
"Parallel");
2679 (TIMPI_ALLREDUCE(MPI_IN_PLACE, &r, 1,
2687 template <
typename T,
typename A>
2690 if (this->
size() > 1 && !r.empty())
2692 TIMPI_LOG_SCOPE(
"sum()",
"Parallel");
2694 timpi_assert(this->
verify(r.size()));
2697 (TIMPI_ALLREDUCE(MPI_IN_PLACE, r.data(),
2698 cast_int<CountType>(r.size()),
2708 template <
typename T>
2711 if (this->
size() > 1)
2713 TIMPI_LOG_SCOPE(
"sum()",
"Parallel");
2716 (TIMPI_ALLREDUCE(MPI_IN_PLACE, &r, 2,
2724 template <
typename T,
typename A>
2727 if (this->
size() > 1 && !r.empty())
2729 TIMPI_LOG_SCOPE(
"sum()",
"Parallel");
2731 timpi_assert(this->
verify(r.size()));
2734 (TIMPI_ALLREDUCE(MPI_IN_PLACE, r.data(),
2735 cast_int<CountType>(r.size() * 2),
2745 template <
typename Map,
2746 typename std::enable_if<std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
2747 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value,
2751 if (this->
size() > 1)
2753 TIMPI_LOG_SCOPE(
"sum(map)",
"Parallel");
2762 std::vector<std::pair<typename Map::key_type, typename Map::mapped_type>>
2763 vecdata(data.begin(), data.end());
2768 for (
const auto & pr : vecdata)
2769 data[pr.first] += pr.second;
2777 template <
typename Map,
2778 typename std::enable_if<!(std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
2779 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value),
2783 if (this->size() > 1)
2785 TIMPI_LOG_SCOPE(
"sum(map)",
"Parallel");
2790 std::vector<typename Map::key_type> keys;
2791 std::vector<typename Map::mapped_type> vals;
2793 auto data_size = data.size();
2794 keys.reserve(data_size);
2795 vals.reserve(data_size);
2797 for (
const auto & pr : data)
2799 keys.push_back(pr.first);
2800 vals.push_back(pr.second);
2803 this->allgather(keys,
false);
2804 this->allgather(vals,
false);
2808 for (std::size_t i=0; i<keys.size(); ++i)
2809 data[keys[i]] += vals[i];
2815 template <
typename K,
typename V,
typename C,
typename A>
2823 template <
typename K,
typename V,
typename H,
typename E,
typename A>
2831 template <
typename T,
typename A1,
typename A2,
2832 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
2834 std::vector<std::vector<T,A1>,A2> & recv,
2835 const bool identical_buffer_sizes)
const 2837 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
2839 timpi_assert(this->
size());
2842 if (this->
size() < 2)
2850 recv.resize(this->
size());
2852 std::vector<CountType>
2853 sendlengths (this->
size(), 0);
2854 std::vector<DispType>
2855 displacements(this->
size(), 0);
2857 const CountType mysize = cast_int<CountType>(sendval.size());
2859 if (identical_buffer_sizes)
2860 sendlengths.assign(this->
size(), mysize);
2868 for (
unsigned int i=0; i != this->
size(); ++i)
2870 displacements[i] = globalsize;
2871 globalsize += sendlengths[i];
2875 if (globalsize == 0)
2879 std::vector<T,A1> r(globalsize, 0);
2883 (TIMPI_ALLGATHERV(const_cast<T*>(mysize ? sendval.data() :
nullptr),
2885 &r[0], sendlengths.data(), displacements.data(),
2889 for (
unsigned int i=0; i != this->
size(); ++i)
2890 recv[i].
assign(r.begin()+displacements[i],
2891 r.begin()+displacements[i]+sendlengths[i]);
2896 template <
typename T,
typename A1,
typename A2,
2897 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
2899 std::vector<std::vector<T,A1>,A2> & recv,
2902 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
2906 std::vector<buffer_t> buffer;
2907 auto next_iter =
pack_range ((
void *)
nullptr, sendval.begin(),
2908 sendval.end(), buffer,
2909 std::numeric_limits<CountType>::max());
2911 if (next_iter != sendval.end())
2912 timpi_error_msg(
"Non-blocking packed range sends cannot exceed " << std::numeric_limits<CountType>::max() <<
"in size");
2914 std::vector<std::vector<buffer_t>> allbuffers;
2916 timpi_assert(this->
size());
2918 recv.resize(this->
size());
2922 this->
allgather(buffer, allbuffers,
false);
2926 std::back_inserter(recv[i]), (T*)
nullptr);
2931 template <
typename T,
typename C,
typename A>
2933 const unsigned int root_id)
const 2935 if (this->
size() > 1)
2937 std::vector<T> vecdata(data.begin(), data.end());
2938 this->
gather(root_id, vecdata);
2939 if (this->
rank() == root_id)
2940 data.insert(vecdata.begin(), vecdata.end());
2946 template <
typename T,
typename C,
typename A>
2949 if (this->
size() > 1)
2951 std::vector<T> vecdata(data.begin(), data.end());
2953 data.insert(vecdata.begin(), vecdata.end());
2959 template <
typename T,
typename C,
typename A>
2961 const unsigned int root_id)
const 2963 if (this->
size() > 1)
2965 std::vector<T> vecdata(data.begin(), data.end());
2966 this->
gather(root_id, vecdata);
2967 if (this->
rank() == root_id)
2971 data.insert(vecdata.begin(), vecdata.end());
2977 template <
typename T,
typename C,
typename A>
2980 if (this->
size() > 1)
2982 std::vector<T> vecdata(data.begin(), data.end());
2988 data.insert(vecdata.begin(), vecdata.end());
2994 template <
typename T1,
typename T2,
typename C,
typename A>
2996 const unsigned int root_id)
const 2998 if (this->
size() > 1)
3000 std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
3001 this->
gather(root_id, vecdata);
3003 if (this->
rank() == root_id)
3010 data.insert(vecdata.begin(), vecdata.end());
3017 template <
typename T1,
typename T2,
typename C,
typename A>
3020 if (this->
size() > 1)
3022 std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
3029 data.insert(vecdata.begin(), vecdata.end());
3035 template <
typename T1,
typename T2,
typename C,
typename A>
3037 const unsigned int root_id)
const 3039 if (this->
size() > 1)
3041 std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
3042 this->
gather(root_id, vecdata);
3044 if (this->
rank() == root_id)
3049 data.insert(vecdata.begin(), vecdata.end());
3056 template <
typename T1,
typename T2,
typename C,
typename A>
3059 if (this->
size() > 1)
3061 std::vector<std::pair<T1,T2>> vecdata(data.begin(), data.end());
3067 data.insert(vecdata.begin(), vecdata.end());
3073 template <
typename K,
typename H,
typename KE,
typename A>
3075 const unsigned int root_id)
const 3077 if (this->
size() > 1)
3079 std::vector<K> vecdata(data.begin(), data.end());
3080 this->
gather(root_id, vecdata);
3081 if (this->
rank() == root_id)
3082 data.insert(vecdata.begin(), vecdata.end());
3088 template <
typename K,
typename H,
typename KE,
typename A>
3091 if (this->
size() > 1)
3093 std::vector<K> vecdata(data.begin(), data.end());
3095 data.insert(vecdata.begin(), vecdata.end());
3101 template <
typename K,
typename H,
typename KE,
typename A>
3103 const unsigned int root_id)
const 3105 if (this->
size() > 1)
3107 std::vector<K> vecdata(data.begin(), data.end());
3108 this->
gather(root_id, vecdata);
3109 if (this->
rank() == root_id)
3114 data.insert(vecdata.begin(), vecdata.end());
3121 template <
typename K,
typename H,
typename KE,
typename A>
3124 if (this->
size() > 1)
3126 std::vector<K> vecdata(data.begin(), data.end());
3132 data.insert(vecdata.begin(), vecdata.end());
3138 template <
typename K,
typename T,
typename H,
typename KE,
typename A>
3140 const unsigned int root_id)
const 3142 if (this->
size() > 1)
3144 std::vector<std::pair<K,T>> vecdata(data.begin(), data.end());
3145 this->
gather(root_id, vecdata);
3147 if (this->
rank() == root_id)
3154 data.insert(vecdata.begin(), vecdata.end());
3161 template <
typename K,
typename T,
typename H,
typename KE,
typename A>
3164 if (this->
size() > 1)
3166 std::vector<std::pair<K,T>> vecdata(data.begin(), data.end());
3173 data.insert(vecdata.begin(), vecdata.end());
3179 template <
typename K,
typename T,
typename H,
typename KE,
typename A>
3181 const unsigned int root_id)
const 3183 if (this->
size() > 1)
3185 std::vector<std::pair<K,T>> vecdata(data.begin(), data.end());
3186 this->
gather(root_id, vecdata);
3188 if (this->
rank() == root_id)
3193 data.insert(vecdata.begin(), vecdata.end());
3200 template <
typename K,
typename T,
typename H,
typename KE,
typename A>
3203 if (this->
size() > 1)
3205 std::vector<std::pair<K,T>> vecdata(data.begin(), data.end());
3211 data.insert(vecdata.begin(), vecdata.end());
3217 template <
typename T,
typename A>
3220 std::vector<T,A> & recv)
const 3222 timpi_assert_less (root_id, this->
size());
3224 if (this->
rank() == root_id)
3225 recv.resize(this->
size());
3227 if (this->
size() > 1)
3229 TIMPI_LOG_SCOPE(
"gather()",
"Parallel");
3233 timpi_assert_less(root_id, this->
size());
3236 (TIMPI_GATHER(const_cast<T*>(&sendval), 1, send_type,
3237 recv.empty() ? nullptr : recv.data(), 1, send_type,
3238 root_id, this->
get()));
3246 template <
typename T,
typename A,
3247 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
3249 std::vector<T,A> & r)
const 3251 if (this->
size() == 1)
3253 timpi_assert (!this->
rank());
3254 timpi_assert (!root_id);
3258 timpi_assert_less (root_id, this->
size());
3260 std::vector<CountType>
3261 sendlengths (this->
size(), 0);
3262 std::vector<DispType>
3263 displacements(this->
size(), 0);
3265 const CountType mysize = cast_int<CountType>(r.size());
3268 TIMPI_LOG_SCOPE(
"gather()",
"Parallel");
3273 for (
unsigned int i=0; i != this->
size(); ++i)
3275 displacements[i] = globalsize;
3276 globalsize += sendlengths[i];
3280 if (globalsize == 0)
3284 std::vector<T,A> r_src(r);
3288 if (root_id == this->
rank())
3289 r.resize(globalsize);
3291 timpi_assert_less(root_id, this->
size());
3295 (TIMPI_GATHERV(r_src.empty() ? nullptr : r_src.data(), mysize,
3297 sendlengths.data(), displacements.data(),
3302 template <
typename T,
typename A,
3303 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3305 std::vector<T,A> & r)
const 3307 std::vector<T,A> gathered;
3310 std::inserter(gathered, gathered.end()));
3317 template <
typename T,
typename A>
3319 const std::basic_string<T> & sendval,
3320 std::vector<std::basic_string<T>,A> & recv,
3321 const bool identical_buffer_sizes)
const 3323 timpi_assert_less (root_id, this->
size());
3325 if (this->
rank() == root_id)
3326 recv.resize(this->
size());
3328 if (this->
size() > 1)
3330 TIMPI_LOG_SCOPE (
"gather()",
"Parallel");
3332 std::vector<CountType>
3333 sendlengths (this->
size(), 0);
3334 std::vector<DispType>
3335 displacements(this->
size(), 0);
3337 const CountType mysize = cast_int<CountType>(sendval.size());
3339 if (identical_buffer_sizes)
3340 sendlengths.assign(this->
size(), mysize);
3343 this->
gather(root_id, mysize, sendlengths);
3348 for (
unsigned int i=0; i < this->
size(); ++i)
3350 displacements[i] = globalsize;
3351 globalsize += sendlengths[i];
3355 std::basic_string<T> r;
3356 if (this->
rank() == root_id)
3357 r.resize(globalsize, 0);
3359 timpi_assert_less(root_id, this->
size());
3363 (TIMPI_GATHERV(const_cast<T*>(sendval.data()),
3365 this->
rank() == root_id ? &r[0] :
nullptr,
3366 sendlengths.data(), displacements.data(),
3370 if (this->
rank() == root_id)
3371 for (
unsigned int i=0; i != this->
size(); ++i)
3372 recv[i] = r.substr(displacements[i], sendlengths[i]);
3380 template <
typename T,
typename A,
3381 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
3383 std::vector<T,A> & recv)
const 3385 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
3387 timpi_assert(this->
size());
3388 recv.resize(this->
size());
3390 const unsigned int comm_size = this->
size();
3396 (TIMPI_ALLGATHER(const_cast<T*>(&sendval), 1, send_type, recv.data(), 1,
3397 send_type, this->
get()));
3399 else if (comm_size > 0)
3403 template <
typename T,
typename A,
3404 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3406 std::vector<T,A> & recv)
const 3408 TIMPI_LOG_SCOPE (
"allgather()",
"Parallel");
3410 timpi_assert(this->
size());
3411 recv.resize(this->
size());
3413 static const std::size_t approx_total_buffer_size = 1e8;
3414 const std::size_t approx_each_buffer_size =
3415 approx_total_buffer_size / this->
size();
3417 unsigned int comm_size = this->
size();
3420 std::vector<T> range = {sendval};
3423 approx_each_buffer_size);
3425 else if (comm_size > 0)
3429 template <
typename T,
typename A,
3430 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
3432 const bool identical_buffer_sizes)
const 3434 if (this->
size() < 2)
3437 TIMPI_LOG_SCOPE(
"allgather()",
"Parallel");
3439 if (identical_buffer_sizes)
3441 timpi_assert(this->
verify(r.size()));
3445 std::vector<T,A> r_src(r.size()*this->
size());
3450 (TIMPI_ALLGATHER(r_src.data(), cast_int<CountType>(r_src.size()),
3451 send_type, r.data(), cast_int<CountType>(r_src.size()),
3452 send_type, this->
get()));
3457 std::vector<CountType>
3458 sendlengths (this->
size(), 0);
3459 std::vector<DispType>
3460 displacements(this->
size(), 0);
3462 const CountType mysize = cast_int<CountType>(r.size());
3468 for (
unsigned int i=0; i != this->
size(); ++i)
3470 displacements[i] = globalsize;
3471 globalsize += sendlengths[i];
3475 if (globalsize == 0)
3479 std::vector<T,A> r_src(globalsize);
3487 (TIMPI_ALLGATHERV(r_src.empty() ? nullptr : r_src.data(), mysize,
3488 send_type, r.data(), sendlengths.data(),
3489 displacements.data(), send_type, this->
get()));
3492 template <
typename T,
typename A,
3493 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3495 const bool identical_buffer_sizes)
const 3497 if (this->
size() < 2)
3500 TIMPI_LOG_SCOPE(
"allgather()",
"Parallel");
3502 if (identical_buffer_sizes)
3504 timpi_assert(this->
verify(r.size()));
3509 std::vector<T,A> r_src(r.size()*this->
size());
3519 std::vector<CountType>
3520 sendlengths (this->
size(), 0);
3521 std::vector<DispType>
3522 displacements(this->
size(), 0);
3524 const CountType mysize = cast_int<CountType>(r.size());
3529 for (
unsigned int i=0; i != this->
size(); ++i)
3530 globalsize += sendlengths[i];
3533 if (globalsize == 0)
3537 std::vector<T,A> r_src(globalsize);
3546 template <
typename T,
typename A>
3548 const bool identical_buffer_sizes)
const 3550 if (this->
size() < 2)
3553 TIMPI_LOG_SCOPE(
"allgather()",
"Parallel");
3555 if (identical_buffer_sizes)
3557 timpi_assert(this->
verify(r.size()));
3567 std::vector<CountType> mystrlengths (r.size());
3568 std::vector<T> concat_src;
3571 for (std::size_t i=0; i != r.size(); ++i)
3573 CountType stringlen = cast_int<CountType>(r[i].size());
3574 mystrlengths[i] = stringlen;
3575 myconcatsize += stringlen;
3577 concat_src.reserve(myconcatsize);
3578 for (std::size_t i=0; i != r.size(); ++i)
3580 (concat_src.end(), r[i].begin(), r[i].end());
3583 std::vector<CountType> strlengths = mystrlengths;
3584 this->
allgather(strlengths, identical_buffer_sizes);
3587 r.resize(strlengths.size());
3590 std::vector<CountType> concat_sizes;
3591 this->
allgather(myconcatsize, concat_sizes);
3595 std::vector<DispType> displacements(this->
size(), 0);
3597 for (
unsigned int i=0; i != this->
size(); ++i)
3599 displacements[i] = globalsize;
3600 globalsize += concat_sizes[i];
3604 if (globalsize == 0)
3609 std::vector<T> concat(globalsize);
3616 (TIMPI_ALLGATHERV(concat_src.empty() ?
3617 nullptr : concat_src.data(), myconcatsize,
3618 send_type, concat.data(), concat_sizes.data(),
3619 displacements.data(), send_type, this->
get()));
3622 const T * begin = concat.data();
3623 for (std::size_t i=0; i != r.size(); ++i)
3625 const T * end = begin + strlengths[i];
3626 r[i].assign(begin, end);
3633 template <
typename T,
typename A>
3636 const unsigned int root_id)
const 3639 timpi_assert_less (root_id, this->
size());
3643 timpi_assert (this->
rank() != root_id || this->
size() == data.size());
3645 if (this->
size() == 1)
3647 timpi_assert (!this->
rank());
3648 timpi_assert (!root_id);
3653 TIMPI_LOG_SCOPE(
"scatter()",
"Parallel");
3655 T * data_ptr =
const_cast<T*
>(data.empty() ? nullptr : data.data());
3658 timpi_assert_less(root_id, this->
size());
3667 template <
typename T,
typename A>
3669 std::vector<T,A> & recv,
3670 const unsigned int root_id)
const 3672 timpi_assert_less (root_id, this->
size());
3674 if (this->
size() == 1)
3676 timpi_assert (!this->
rank());
3677 timpi_assert (!root_id);
3678 recv.assign(data.begin(), data.end());
3682 TIMPI_LOG_SCOPE(
"scatter()",
"Parallel");
3684 std::size_t recv_buffer_size = 0;
3685 if (this->
rank() == root_id)
3687 timpi_assert(data.size() % this->
size() == 0);
3688 recv_buffer_size = cast_int<std::size_t>(data.size() / this->
size());
3692 recv.resize(recv_buffer_size);
3694 T * data_ptr =
const_cast<T*
>(data.empty() ? nullptr : data.data());
3695 T * recv_ptr = recv.empty() ? nullptr : recv.data();
3696 ignore(data_ptr, recv_ptr);
3698 timpi_assert_less(root_id, this->
size());
3703 root_id, this->
get()));
3708 template <
typename T,
typename A1,
typename A2>
3710 const std::vector<CountType,A2> counts,
3711 std::vector<T,A1> & recv,
3712 const unsigned int root_id)
const 3714 timpi_assert_less (root_id, this->
size());
3716 if (this->
size() == 1)
3718 timpi_assert (!this->
rank());
3719 timpi_assert (!root_id);
3720 timpi_assert (counts.size() == this->
size());
3721 recv.assign(data.begin(), data.begin() + counts[0]);
3725 std::vector<DispType> displacements(this->
size(), 0);
3726 if (root_id == this->
rank())
3728 timpi_assert(counts.size() == this->
size());
3731 std::size_t globalsize = 0;
3732 for (
unsigned int i=0; i < this->
size(); ++i)
3734 displacements[i] = globalsize;
3735 globalsize += counts[i];
3738 timpi_assert(data.size() == globalsize);
3741 TIMPI_LOG_SCOPE(
"scatter()",
"Parallel");
3745 this->
scatter(counts, recv_buffer_size, root_id);
3746 recv.resize(recv_buffer_size);
3748 T * data_ptr =
const_cast<T*
>(data.empty() ? nullptr : data.data());
3749 CountType * count_ptr =
const_cast<CountType*
>(counts.empty() ? nullptr : counts.data());
3750 T * recv_ptr = recv.empty() ? nullptr : recv.data();
3751 ignore(data_ptr, count_ptr, recv_ptr);
3753 timpi_assert_less(root_id, this->
size());
3757 (TIMPI_SCATTERV(data_ptr, count_ptr, displacements.data(),
StandardType<T>(data_ptr),
3758 recv_ptr, recv_buffer_size,
StandardType<T>(recv_ptr), root_id, this->
get()));
3762 #ifdef TIMPI_HAVE_MPI 3767 template <
typename T,
typename A1,
typename A2>
3769 const std::vector<int,A2> counts,
3770 std::vector<T,A1> & recv,
3771 const unsigned int root_id)
const 3773 std::vector<CountType> full_counts(counts.begin(), counts.end());
3774 this->
scatter(data, full_counts, recv, root_id);
3781 template <
typename T,
typename A1,
typename A2>
3783 std::vector<T,A1> & recv,
3784 const unsigned int root_id,
3785 const bool identical_buffer_sizes)
const 3787 timpi_assert_less (root_id, this->
size());
3789 if (this->
size() == 1)
3791 timpi_assert (!this->
rank());
3792 timpi_assert (!root_id);
3793 timpi_assert (data.size() == this->
size());
3794 recv.assign(data[0].begin(), data[0].end());
3798 std::vector<T,A1> stacked_data;
3799 std::vector<CountType> counts;
3801 if (root_id == this->
rank())
3803 timpi_assert (data.size() == this->
size());
3805 if (!identical_buffer_sizes)
3806 counts.resize(this->
size());
3808 for (std::size_t i=0; i < data.size(); ++i)
3810 if (!identical_buffer_sizes)
3811 counts[i] = cast_int<CountType>(data[i].size());
3815 timpi_assert(!i || data[i-1].
size() == data[i].
size());
3817 std::copy(data[i].begin(), data[i].end(), std::back_inserter(stacked_data));
3821 if (identical_buffer_sizes)
3822 this->
scatter(stacked_data, recv, root_id);
3824 this->
scatter(stacked_data, counts, recv, root_id);
3829 template <
typename T,
typename A>
3832 if (this->
size() < 2 || buf.empty())
3835 TIMPI_LOG_SCOPE(
"alltoall()",
"Parallel");
3841 cast_int<CountType>(buf.size()/this->
size());
3844 timpi_assert_equal_to (buf.size()%this->
size(), 0);
3846 timpi_assert(this->
verify(size_per_proc));
3851 (TIMPI_ALLTOALL(MPI_IN_PLACE, size_per_proc, send_type, buf.data(),
3852 size_per_proc, send_type, this->
get()));
3857 template <
typename T
3858 #ifdef TIMPI_HAVE_MPI 3860 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type
3864 const unsigned int root_id,
3868 if (this->
size() == 1)
3870 timpi_assert (!this->
rank());
3871 timpi_assert (!root_id);
3875 timpi_assert_less (root_id, this->
size());
3877 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
3885 #ifdef TIMPI_HAVE_MPI 3886 template <
typename T,
3887 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3889 const unsigned int root_id,
3893 if (this->
size() == 1)
3895 timpi_assert (!this->
rank());
3896 timpi_assert (!root_id);
3900 timpi_assert_less (root_id, this->
size());
3908 std::vector<T> range = {data};
3922 template <
typename T,
typename A,
3923 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
3925 const unsigned int root_id,
3926 const bool timpi_mpi_var(identical_sizes))
const 3929 if (this->
size() == 1)
3931 timpi_assert (!this->
rank());
3932 timpi_assert (!root_id);
3936 #ifdef TIMPI_HAVE_MPI 3938 timpi_assert_less (root_id, this->
size());
3939 timpi_assert (this->
verify(identical_sizes));
3941 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
3943 std::size_t data_size = data.size();
3945 if (identical_sizes)
3946 timpi_assert(this->
verify(data_size));
3950 data.resize(data_size);
3954 T * data_ptr = data.empty() ? nullptr : data.data();
3956 timpi_assert_less(root_id, this->
size());
3959 (TIMPI_BCAST(data_ptr, cast_int<CountType>(data.size()),
3960 StandardType<T>(data_ptr), root_id, this->
get()));
3964 template <
typename T,
typename A,
3965 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
3967 const unsigned int root_id,
3968 const bool identical_sizes)
const 3970 if (this->
size() == 1)
3972 timpi_assert (!this->
rank());
3973 timpi_assert (!root_id);
3977 timpi_assert_less (root_id, this->
size());
3978 timpi_assert (this->
verify(identical_sizes));
3980 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
3982 std::size_t data_size = data.size();
3984 if (identical_sizes)
3985 timpi_assert(this->
verify(data_size));
3989 data.resize(data_size);
3991 timpi_assert_less(root_id, this->
size());
4001 template <
typename Map,
4002 typename std::enable_if<std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
4003 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value,
4006 const unsigned int root_id,
4007 const bool timpi_mpi_var(identical_sizes))
const 4010 if (this->
size() == 1)
4012 timpi_assert (!this->
rank());
4013 timpi_assert (!root_id);
4017 #ifdef TIMPI_HAVE_MPI 4018 timpi_assert_less (root_id, this->
size());
4019 timpi_assert (this->
verify(identical_sizes));
4021 TIMPI_LOG_SCOPE(
"broadcast(map)",
"Parallel");
4023 std::size_t data_size=data.size();
4024 if (identical_sizes)
4025 timpi_assert(this->
verify(data_size));
4029 std::vector<std::pair<
typename Map::key_type,
4030 typename Map::mapped_type>> comm_data;
4032 if (root_id == this->
rank())
4033 comm_data.assign(data.begin(), data.end());
4035 comm_data.resize(data_size);
4037 this->
broadcast(comm_data, root_id,
true);
4039 if (this->
rank() != root_id)
4042 data.insert(comm_data.begin(), comm_data.end());
4047 template <
typename Map,
4048 typename std::enable_if<!(std::is_base_of<DataType, StandardType<typename Map::key_type>>::value &&
4049 std::is_base_of<DataType, StandardType<typename Map::mapped_type>>::value),
4052 const unsigned int root_id,
4053 const bool timpi_mpi_var(identical_sizes))
const 4056 if (this->size() == 1)
4058 timpi_assert (!this->rank());
4059 timpi_assert (!root_id);
4063 #ifdef TIMPI_HAVE_MPI 4064 timpi_assert_less (root_id, this->size());
4065 timpi_assert (this->verify(identical_sizes));
4067 TIMPI_LOG_SCOPE(
"broadcast()",
"Parallel");
4069 std::size_t data_size=data.size();
4070 if (identical_sizes)
4071 timpi_assert(this->verify(data_size));
4073 this->broadcast(data_size, root_id);
4075 std::vector<typename Map::key_type> pair_first; pair_first.reserve(data_size);
4076 std::vector<typename Map::mapped_type> pair_second; pair_first.reserve(data_size);
4078 if (root_id == this->rank())
4080 for (
const auto & pr : data)
4082 pair_first.push_back(pr.first);
4083 pair_second.push_back(pr.second);
4088 pair_first.resize(data_size);
4089 pair_second.resize(data_size);
4093 (pair_first, root_id,
4096 (pair_second, root_id,
4099 timpi_assert(pair_first.size() == pair_first.size());
4101 if (this->rank() != root_id)
4104 for (std::size_t i=0; i<pair_first.size(); ++i)
4105 data[pair_first[i]] = pair_second[i];
4110 template <
typename T1,
typename T2,
typename C,
typename A>
4112 const unsigned int root_id,
4113 const bool identical_sizes)
const 4120 template <
typename K,
typename V,
typename H,
typename E,
typename A>
4122 const unsigned int root_id,
4123 const bool identical_sizes)
const 4128 template <
typename Context,
typename OutputContext,
4129 typename Iter,
typename OutputIter>
4132 const Iter range_end,
4133 OutputContext * context2,
4134 OutputIter out_iter,
4135 const unsigned int root_id,
4136 std::size_t approx_buffer_size)
const 4138 typedef typename std::iterator_traits<Iter>::value_type T;
4141 if (this->
size() == 1)
4143 timpi_assert (!this->
rank());
4144 timpi_assert (!root_id);
4152 std::vector<buffer_t> buffer;
4154 if (this->
rank() == root_id)
4156 (context1, range_begin, range_end, buffer, approx_buffer_size);
4160 std::size_t buffer_size = buffer.size();
4167 buffer.resize(buffer_size);
4174 std::unique_ptr<OutputIter> next_out_iter =
4175 std::make_unique<OutputIter>(out_iter);
4177 if (this->
rank() != root_id)
4180 (buffer, context2, *next_out_iter, (T*)
nullptr);
4181 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
4187 template <
typename Context,
typename Iter,
typename OutputIter>
4191 const Iter range_end,
4192 OutputIter out_iter,
4193 std::size_t approx_buffer_size)
const 4195 typedef typename std::iterator_traits<Iter>::value_type T;
4198 bool nonempty_range = (range_begin != range_end);
4199 this->
max(nonempty_range);
4203 std::unique_ptr<OutputIter> next_out_iter =
4204 std::make_unique<OutputIter>(out_iter);
4206 while (nonempty_range)
4210 std::vector<buffer_t> buffer;
4213 (context, range_begin, range_end, buffer, approx_buffer_size);
4215 this->
gather(root_id, buffer);
4218 (buffer, context, *next_out_iter, (T*)(
nullptr));
4219 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
4221 nonempty_range = (range_begin != range_end);
4222 this->
max(nonempty_range);
4227 template <
typename Context,
typename Iter,
typename OutputIter>
4230 const Iter range_end,
4231 OutputIter out_iter,
4232 std::size_t approx_buffer_size)
const 4234 typedef typename std::iterator_traits<Iter>::value_type T;
4237 bool nonempty_range = (range_begin != range_end);
4238 this->
max(nonempty_range);
4242 std::unique_ptr<OutputIter> next_out_iter =
4243 std::make_unique<OutputIter>(out_iter);
4245 while (nonempty_range)
4249 std::vector<buffer_t> buffer;
4252 (context, range_begin, range_end, buffer, approx_buffer_size);
4256 timpi_assert(buffer.size());
4259 (buffer, context, *next_out_iter, (T*)
nullptr);
4260 next_out_iter = std::make_unique<OutputIter>(return_out_iter);
4262 nonempty_range = (range_begin != range_end);
4263 this->
max(nonempty_range);
4269 template<
typename T>
4274 TIMPI_LOG_SCOPE(
"packed_range_probe()",
"Parallel");
4276 ignore(src_processor_id, tag);
4282 timpi_assert(src_processor_id < this->
size() ||
4285 timpi_call_mpi(MPI_Iprobe(
int(src_processor_id),
4298 template <
typename T,
typename A,
4299 typename std::enable_if<std::is_base_of<DataType, StandardType<T>>::value,
int>::type>
4301 std::vector<T,A> & buf,
4305 T * dataptr = buf.empty() ? nullptr : buf.data();
4310 template <
typename T,
typename A,
4311 typename std::enable_if<Has_buffer_type<Packing<T>>::value,
int>::type>
4313 std::vector<T,A> & buf,
4327 template <
typename T,
typename A1,
typename A2>
4329 std::vector<std::vector<T,A1>,A2> & buf,
4333 T * dataptr = buf.empty() ? nullptr : (buf[0].empty() ? nullptr : buf[0].data());
4339 template <
typename Context,
typename OutputIter,
typename T>
4347 TIMPI_LOG_SCOPE(
"possibly_receive_packed_range()",
"Parallel");
4351 auto stat = packed_range_probe<T>(src_processor_id, tag, int_flag);
4355 src_processor_id = stat.source();
4370 timpi_assert(!int_flag || (int_flag &&
4371 src_processor_id < this->
size() &&
4380 #endif // TIMPI_PARALLEL_IMPLEMENTATION_H void gather_packed_range(const unsigned int root_id, Context *context, Iter range_begin, const Iter range_end, OutputIter out, std::size_t approx_buffer_size=1000000) const
Take a range of local variables, combine it with ranges from all processors, and write the output to ...
void add_prior_request(const Request &req)
data_type dataplusint_type< short int >()
MPI_Request request
Request object for non-blocking I/O.
void allgather(const T &send_data, std::vector< T, A > &recv_data) const
Take a vector of length this->size(), and fill in recv[processor_id] = the value of send on that proc...
void nonblocking_send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, Request &req, const MessageTag &tag=no_tag) const
Similar to the above Nonblocking send_packed_range with a few important differences: ...
void broadcast_packed_range(const Context *context1, Iter range_begin, const Iter range_end, OutputContext *context2, OutputIter out, const unsigned int root_id=0, std::size_t approx_buffer_size=1000000) const
Blocking-broadcast range-of-pointers to one processor.
OutputIter unpack_range(const std::vector< buffertype > &buffer, Context *context, OutputIter out_iter, const T *)
Helper function for range unpacking.
Status packed_range_probe(const unsigned int src_processor_id, const MessageTag &tag, bool &flag) const
Non-Blocking message probe for a packed range message.
void scatter(const std::vector< T, A > &data, T &recv, const unsigned int root_id=0) const
Take a vector of local variables and scatter the ith item to the ith processor in the communicator...
void map_max(Map &data) const
Private implementation function called by the map-based max() specializations.
void minloc(T &r, unsigned int &min_id) const
Take a local variable and replace it with the minimum of it's values on all processors, returning the minimum rank of a processor which originally held the minimum value.
static const request null_request
Types combined with an int.
void gather(const unsigned int root_id, const T &send_data, std::vector< T, A > &recv) const
Take a vector of length comm.size(), and on processor root_id fill in recv[processor_id] = the value ...
const MessageTag any_tag
Default message tag ids.
void sum(T &r) const
Take a local variable and replace it with the sum of it's values on all processors.
void alltoall(std::vector< T, A > &r) const
Effectively transposes the input vector across all processors.
processor_id_type rank() const
Encapsulates the MPI_Datatype.
void allgather_packed_range(Context *context, Iter range_begin, const Iter range_end, OutputIter out, std::size_t approx_buffer_size=1000000) const
Take a range of local variables, combine it with ranges from all processors, and write the output to ...
void map_broadcast(Map &data, const unsigned int root_id, const bool identical_sizes) const
Private implementation function called by the map-based broadcast() specializations.
Templated class to provide the appropriate MPI datatype for use with built-in C types or simple C++ c...
void assign(const communicator &comm)
Utility function for setting our member variables from an MPI communicator.
data_type dataplusint_type< float >()
void ignore(const Args &...)
Define data types and (un)serialization functions for use when encoding a potentially-variable-size o...
data_type dataplusint_type()
Templated function to return the appropriate MPI datatype for use with built-in C types when combined...
void send_packed_range(const unsigned int dest_processor_id, const Context *context, Iter range_begin, const Iter range_end, const MessageTag &tag=no_tag, std::size_t approx_buffer_size=1000000) const
Blocking-send range-of-pointers to one processor.
void nonblocking_receive_packed_range(const unsigned int src_processor_id, Context *context, OutputIter out, const T *output_type, Request &req, Status &stat, const MessageTag &tag=any_tag) const
Non-Blocking-receive range-of-pointers from one processor.
const unsigned int any_source
Processor id meaning "Accept from any source".
bool possibly_receive(unsigned int &src_processor_id, std::vector< T, A > &buf, Request &req, const MessageTag &tag) const
Nonblocking-receive from one processor with user-defined type.
Encapsulates the MPI_Comm object.
processor_id_type size() const
std::size_t packed_size_of(const std::vector< std::vector< T, A1 >, A2 > &buf, const DataType &type) const
uint8_t processor_id_type
Status receive(const unsigned int dest_processor_id, T &buf, const MessageTag &tag=any_tag) const
Blocking-receive from one processor with data-defined type.
Encapsulates the MPI tag integers.
void min(const T &r, T &o, Request &req) const
Non-blocking minimum of the local value r into o with the request req.
void receive_packed_range(const unsigned int dest_processor_id, Context *context, OutputIter out, const T *output_type, const MessageTag &tag=any_tag) const
Blocking-receive range-of-pointers from one processor.
status probe(const unsigned int src_processor_id, const MessageTag &tag=any_tag) const
Blocking message probe.
data_type dataplusint_type< int >()
Iter pack_range(const Context *context, Iter range_begin, const Iter range_end, std::vector< buffertype > &buffer, std::size_t approx_buffer_size)
Helper function for range packing.
static const bool has_min_max
void maxloc(T &r, unsigned int &max_id) const
Take a local variable and replace it with the maximum of it's values on all processors, returning the minimum rank of a processor which originally held the maximum value.
void send_receive(const unsigned int dest_processor_id, const T1 &send_data, const unsigned int source_processor_id, T2 &recv_data, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag) const
Send data send to one processor while simultaneously receiving other data recv from a (potentially di...
bool possibly_receive_packed_range(unsigned int &src_processor_id, Context *context, OutputIter out, const T *output_type, Request &req, const MessageTag &tag) const
Nonblocking packed range receive from one processor with user-defined type.
timpi_pure bool semiverify(const T *r) const
Check whether a local pointer points to the same value on all processors where it is not null...
static const bool is_fixed_type
data_type dataplusint_type< long >()
void broadcast(T &data, const unsigned int root_id=0, const bool identical_sizes=false) const
Take a local value and broadcast it to all processors.
void add_post_wait_work(PostWaitWork *work)
StandardType<T>'s which do not define a way to MPI_Type T should inherit from this class...
data_type dataplusint_type< long double >()
CountType size(const data_type &type) const
void map_sum(Map &data) const
Private implementation function called by the map-based sum() specializations.
Encapsulates the MPI_Request.
timpi_pure bool verify(const T &r) const
Check whether a local variable has the same value on all processors, returning true if it does or fal...
void max(const T &r, T &o, Request &req) const
Non-blocking maximum of the local value r into o with the request req.
void send(const unsigned int dest_processor_id, const T &buf, const MessageTag &tag=no_tag) const
Blocking-send to one processor with data-defined type.
data_type dataplusint_type< double >()
SendMode send_mode() const
Gets the user-requested SendMode.
void send_receive_packed_range(const unsigned int dest_processor_id, const Context1 *context1, RangeIter send_begin, const RangeIter send_end, const unsigned int source_processor_id, Context2 *context2, OutputIter out, const T *output_type, const MessageTag &send_tag=no_tag, const MessageTag &recv_tag=any_tag, std::size_t approx_buffer_size=1000000) const
Send a range-of-pointers to one processor while simultaneously receiving another range from a (potent...
Encapsulates the MPI_Status struct.
std::size_t packed_range_size(const Context *context, Iter range_begin, const Iter range_end)
Helper function for range packing.
std::pair< data_type, std::unique_ptr< StandardType< std::pair< T, int > > > > dataplusint_type_acquire()
void set_union(T &data, const unsigned int root_id) const
Take a container (set, map, unordered_set, multimap, etc) of local variables on each processor...
Templated class to provide the appropriate MPI reduction operations for use with built-in C types or ...