Functions
void	empty_send_assertion (const Communicator &comm, processor_id_type empty_target_pid)

template<typename MapToContainers , typename SendFunctor , typename PossiblyReceiveFunctor , typename ActionFunctor >
void	push_parallel_nbx_helper (const Communicator &comm, MapToContainers &&data, const SendFunctor &send_functor, const PossiblyReceiveFunctor &possibly_receive_functor, const ActionFunctor &act_on_data)

template<typename MapToContainers , typename SendFunctor , typename ReceiveFunctor , typename ActionFunctor >
void	push_parallel_alltoall_helper (const Communicator &comm, MapToContainers &&data, const SendFunctor &send_functor, const ReceiveFunctor &receive_functor, const ActionFunctor &act_on_data)

template<typename MapToContainers , typename SendReceiveFunctor , typename ActionFunctor >
void	push_parallel_roundrobin_helper (const Communicator &comm, MapToContainers &&data, const SendReceiveFunctor &sendreceive_functor, const ActionFunctor &act_on_data)

Function Documentation

◆ empty_send_assertion()

void TIMPI::detail::empty_send_assertion	(	const Communicator &	comm,
		processor_id_type	empty_target_pid
	)

inline

Definition at line 218 of file parallel_sync.h.

References TIMPI::Communicator::max(), and TIMPI::Communicator::rank().

Referenced by push_parallel_alltoall_helper(), push_parallel_nbx_helper(), and push_parallel_roundrobin_helper().

 {
   bool someone_found_empty_send = (empty_target_pid != processor_id_type(-1));
   comm.max(someone_found_empty_send);
   std::stringstream err_msg;
   if (empty_target_pid != processor_id_type(-1))
     err_msg << " [" << comm.rank() << "] sent an empty to [" <<
       empty_target_pid << "]";
   timpi_assert_msg(!someone_found_empty_send,
                    "Some rank(s) sent empty data!" + err_msg.str());
 }

◆ push_parallel_alltoall_helper()

template<typename MapToContainers , typename SendFunctor , typename ReceiveFunctor , typename ActionFunctor >

void TIMPI::detail::push_parallel_alltoall_helper	(	const Communicator &	comm,
		MapToContainers &&	data,
		const SendFunctor &	send_functor,
		const ReceiveFunctor &	receive_functor,
		const ActionFunctor &	act_on_data
	)

Definition at line 449 of file parallel_sync.h.

References TIMPI::Communicator::alltoall(), TIMPI::any_source, empty_send_assertion(), TIMPI::Communicator::get_unique_tag(), TIMPI::Communicator::probe(), TIMPI::Communicator::rank(), and TIMPI::Communicator::size().

Referenced by TIMPI::push_parallel_packed_range(), and TIMPI::push_parallel_vector_data().

 {
   typedef typename std::remove_reference<MapToContainers>::type::value_type::second_type
     container_type;
 
   // This function must be run on all processors at once
   timpi_parallel_only(comm);
 
   // This function implements a simpler asynchronous protocol than
   // NBX.  Every processor will know exactly how many receives to
   // post.
 
   processor_id_type num_procs = comm.size();
 
   // Don't give us empty vectors to send.  We'll yell at you (after
   // the sync is done, so all ranks get out of it and we can throw an
   // assertion failure everywhere) if we're debugging and we catch
   // one.
   //
   // But in opt mode we'll just skip empty vectors.
 #ifndef NDEBUG
   processor_id_type empty_target_pid = processor_id_type(-1);
 #endif
 
   // Number of vectors to send to each procesor
   std::vector<std::size_t> will_send_to(num_procs, 0);
   for (auto & datapair : data)
     {
       // In the case of data partitioned into more processors than we
       // have ranks, we "wrap around"
       processor_id_type dest_pid = datapair.first % num_procs;
 
       // But in opt mode we'll just try to stay consistent with what
       // we can do in the other backends
       if (datapair.second.empty())
         {
 #ifndef NDEBUG
           empty_target_pid = dest_pid;
 #endif
           continue;
         }
 
       will_send_to[dest_pid]++;
     }
 
   // Tell everyone about where everyone will send to
   comm.alltoall(will_send_to);
 
   // will_send_to now represents how many vectors we'll receive from
   // each processor; give it a better name.
   auto & will_receive_from = will_send_to;
 
   processor_id_type n_receives = 0;
   for (processor_id_type proc_id = 0; proc_id < num_procs; proc_id++)
     n_receives += will_receive_from[proc_id];
 
   // We'll grab a tag so we can overlap request sends and receives
   // without confusing one for the other
   MessageTag tag = comm.get_unique_tag();
 
   // The send requests
   std::list<Request> requests;
 
   // Post all of the non-empty sends, non-blocking
   for (auto & datapair : data)
     {
       processor_id_type destid = datapair.first % num_procs;
       auto & datum = datapair.second;
 
       if (datum.empty())
         continue;
 
       // Just act on data if the user requested a send-to-self
       if (destid == comm.rank())
         {
           act_on_data(destid, std::move(datum));
           n_receives--;
         }
       else
         {
           requests.emplace_back();
           send_functor(destid, datum, requests.back(), tag);
         }
     }
 
   // In serial we've now acted on all our data.
   if (num_procs == 1)
     return;
 
   // Post all of the receives.
   for (processor_id_type i = 0; i != n_receives; ++i)
     {
       Status stat(comm.probe(any_source, tag));
       const processor_id_type
         proc_id = cast_int<processor_id_type>(stat.source());
 
       container_type received_data;
       receive_functor(proc_id, received_data, tag);
       act_on_data(proc_id, std::move(received_data));
     }
 
   // Wait on all the sends to complete
   for (auto & req : requests)
     req.wait();
 
   // So, *did* we see any empty containers being sent?
 #ifndef NDEBUG
   empty_send_assertion(comm, empty_target_pid);
 #endif // NDEBUG
 }

◆ push_parallel_nbx_helper()

template<typename MapToContainers , typename SendFunctor , typename PossiblyReceiveFunctor , typename ActionFunctor >

void TIMPI::detail::push_parallel_nbx_helper	(	const Communicator &	comm,
		MapToContainers &&	data,
		const SendFunctor &	send_functor,
		const PossiblyReceiveFunctor &	possibly_receive_functor,
		const ActionFunctor &	act_on_data
	)

Definition at line 237 of file parallel_sync.h.

References TIMPI::any_source, empty_send_assertion(), TIMPI::Communicator::get_unique_tag(), TIMPI::Communicator::nonblocking_barrier(), TIMPI::Communicator::rank(), TIMPI::Communicator::send_mode(), TIMPI::Communicator::size(), TIMPI::Communicator::SYNCHRONOUS, TIMPI::Request::test(), and TIMPI::Request::wait().

Referenced by TIMPI::push_parallel_packed_range(), and TIMPI::push_parallel_vector_data().

 {
   typedef typename std::remove_reference<MapToContainers>::type::value_type::second_type
     container_type;
 
   // This function must be run on all processors at once
   timpi_parallel_only(comm);
 
   // This function implements the "NBX" algorithm from
   // https://htor.inf.ethz.ch/publications/img/hoefler-dsde-protocols.pdf
 
   // We'll grab a tag so we can overlap request sends and receives
   // without confusing one for the other
   const auto tag = comm.get_unique_tag();
 
   // Save off the old send_mode so we can restore it after this
   const auto old_send_mode = comm.send_mode();
 
   // Set the sending to synchronous - this is so that we can know when
   // the sends are complete
   const_cast<Communicator &>(comm).send_mode(Communicator::SYNCHRONOUS);
 
   // The send requests
   std::list<Request> send_requests;
 
   const processor_id_type num_procs = comm.size();
 
   // Don't give us empty vectors to send.  We'll yell at you (after
   // the sync is done, so all ranks get out of it and we can throw an
   // assertion failure everywhere) if we're debugging and we catch
   // one.
   //
   // But in opt mode we'll just skip empty vectors.
 #ifndef NDEBUG
   processor_id_type empty_target_pid = processor_id_type(-1);
 #endif
 
   for (auto & datapair : data)
     {
       // In the case of data partitioned into more processors than we
       // have ranks, we "wrap around"
       processor_id_type dest_pid = datapair.first % num_procs;
       auto & datum = datapair.second;
 
       if (datum.empty())
         {
 #ifndef NDEBUG
           empty_target_pid = dest_pid;
 #endif
           continue;
         }
 
       // Just act on data if the user requested a send-to-self
       if (dest_pid == comm.rank())
         act_on_data(dest_pid, std::move(datum));
       else
         {
           send_requests.emplace_back();
           send_functor(dest_pid, datum, send_requests.back(), tag);
         }
     }
 
   // In serial we've now acted on all our data.
   if (num_procs == 1)
     return;
 
   // Whether or not the nonblocking barrier has started
   bool started_barrier = false;
   // Request for the nonblocking barrier
   Request barrier_request;
 
   struct IncomingInfo
   {
     unsigned int src_pid = any_source;
     Request request;
     container_type data;
   };
 
   // Storage for the incoming requests and data
   // The last entry in this list will _always_ be an invalid entry
   // that is available for use for processing the next incoming
   // request. That is, its size will always be >= 1
   std::list<IncomingInfo> incoming;
   incoming.emplace_back(); // add the first invalid entry for receives
 
   // Helper for checking and processing receives if there are any; we
   // need to check this in multiple places
   auto possibly_receive = [&incoming, &tag, &possibly_receive_functor]() {
     auto & next_incoming = incoming.back();
     timpi_assert_equal_to(next_incoming.src_pid, any_source);
     if (possibly_receive_functor(next_incoming.src_pid,
                                  next_incoming.data,
                                  next_incoming.request, tag))
       {
         timpi_assert(next_incoming.src_pid != any_source);
 
         // Insert another entry so that the next poll has something
         // to fill into if needed
         incoming.emplace_back();
 
         // We received something
         return true;
       }
 
       // We didn't receive anything
       return false;
   };
 
   // Keep looking for receives
   while (true)
     {
       timpi_assert(incoming.size() > 0);
 
       // Check if there is a message and start receiving it
       possibly_receive();
 
       // Work through the incoming requests and act on them if they're ready
       incoming.remove_if
         ([&act_on_data
 #ifndef NDEBUG
           ,&incoming
 #endif
           ](IncomingInfo & info)
           {
             // The last entry (marked by an invalid src pid) should be skipped;
             // it needs to remain in the list for potential filling in the next poll
             const bool is_invalid_entry = info.src_pid == any_source;
             timpi_assert_equal_to(is_invalid_entry, &info == &incoming.back());
 
             if (is_invalid_entry)
               return false;
 
             // If it's finished - let's act on it
             if (info.request.test())
               {
                 // Do any post-wait work
                 info.request.wait();
 
                 // Act on the data
                 act_on_data(info.src_pid, std::move(info.data));
 
                 // This removes it from the list
                 return true;
               }
 
               // Not finished yet
               return false;
             });
 
       // Remove any sends that have completed in user space
       send_requests.remove_if
         ([](Request & req)
          {
            if (req.test())
              {
                // Do Post-Wait work
                req.wait();
                return true;
              }
 
              // Not finished yet
              return false;
          });
 
       // If all of the sends are complete, we can start the barrier.
       // We strongly believe that the MPI standard guarantees
       // if a synchronous send is marked as completed, then there
       // is a corresponding user-posted request for said send.
       // Therefore, send_requests being empty is enough
       // to state that our sends are done and everyone that we have
       // sent data is expecting it. Double therefore, this condition
       // being satisified on all processors in addition to all
       // receive requests being complete is a sufficient stopping
       // criteria
       if (send_requests.empty() && !started_barrier)
         {
           started_barrier = true;
           comm.nonblocking_barrier(barrier_request);
         }
 
       // There is no data to act on (we reserve a single value in
       // \p incoming for filling within the next poll loop)
       if (incoming.size() == 1)
         // We've started the barrier
         if (started_barrier)
           // The barrier is complete (everyone is done)
           if (barrier_request.test())
             // Profit
             break;
     }
 
   // There better not be anything left at this point
   timpi_assert(!possibly_receive());
 
   // Reset the send mode
   const_cast<Communicator &>(comm).send_mode(old_send_mode);
 
   // So, *did* we see any empty containers being sent?
 #ifndef NDEBUG
   empty_send_assertion(comm, empty_target_pid);
 #endif // NDEBUG
 }

◆ push_parallel_roundrobin_helper()

template<typename MapToContainers , typename SendReceiveFunctor , typename ActionFunctor >

void TIMPI::detail::push_parallel_roundrobin_helper	(	const Communicator &	comm,
		MapToContainers &&	data,
		const SendReceiveFunctor &	sendreceive_functor,
		const ActionFunctor &	act_on_data
	)

Definition at line 568 of file parallel_sync.h.

References empty_send_assertion(), TIMPI::Communicator::get_unique_tag(), TIMPI::Communicator::max(), TIMPI::Communicator::rank(), and TIMPI::Communicator::size().

Referenced by TIMPI::push_parallel_packed_range(), and TIMPI::push_parallel_vector_data().

 {
   typedef typename std::remove_reference<MapToContainers>::type::value_type::second_type
     container_type;
 
   // This function must be run on all processors at once
   timpi_parallel_only(comm);
 
   // This function implements the simplest protocol possible, fully
   // synchronous.  Every processor talks to every other.  Only use this for
   // debugging, and only when you're desperate.
 
   unsigned int num_procs = comm.size();
 
   // Don't give us empty vectors to send.  We'll yell at you (after
   // the sync is done, so all ranks get out of it and we can throw an
   // assertion failure everywhere) if we're debugging and we catch
   // one.
   //
   // But in opt mode we'll just skip empty vectors.
 #ifndef NDEBUG
   processor_id_type empty_target_pid = processor_id_type(-1);
 #endif
 
   // Do multiple exchanges if we have an oversized data map
   processor_id_type n_exchanges = 1;
   for (auto & datapair : data)
     {
       const unsigned int dest_pid = datapair.first;
       n_exchanges = std::max(n_exchanges, dest_pid/num_procs+1);
 
       if (datapair.second.empty())
         {
 #ifndef NDEBUG
           empty_target_pid = dest_pid;
 #endif
           continue;
         }
     }
 
   comm.max(n_exchanges);
 
   // We'll grab a tag so responses and queries won't be confused when
   // this is used within a pull
   auto tag = comm.get_unique_tag();
 
   // Do the send_receives, blocking
   for (processor_id_type e=0; e != n_exchanges; ++e)
     for (processor_id_type p=0; p != num_procs; ++p)
       {
         const processor_id_type procup =
           cast_int<processor_id_type>((comm.rank() + p) %
                                       num_procs);
         const processor_id_type procdown =
           cast_int<processor_id_type>((comm.rank() + num_procs - p) %
                                       num_procs);
 
         container_type empty_container;
         auto data_it = data.find(procup + e*num_procs);
         auto * const data_to_send =
           (data_it == data.end()) ?
           &empty_container : &data_it->second;
 
         container_type received_data;
         sendreceive_functor(procup, *data_to_send,
                             procdown, received_data, tag);
 
         // Empty containers aren't *real* data, they're an artifact of
         // doing send_receive with everyone.  Just skip them.
         if (!received_data.empty())
           act_on_data(procdown, std::move(received_data));
       }
 
   // So, *did* we see any empty containers being sent?
 #ifndef NDEBUG
   empty_send_assertion(comm, empty_target_pid);
 #endif // NDEBUG
 }

Functions

Function Documentation

◆ empty_send_assertion()

◆ push_parallel_alltoall_helper()

◆ push_parallel_nbx_helper()

◆ push_parallel_roundrobin_helper()