https://mooseframework.inl.gov
RankMap.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #include "RankMap.h"
11 #include "PerfGraphInterface.h"
12 
13 #include "MooseApp.h"
14 
15 #include "libmesh/parallel.h"
16 
18  : ParallelObject(comm), PerfGraphInterface(perf_graph, "RankMap")
19 {
20  TIME_SECTION("construct", 2, "Constructing RankMap");
21 
22  auto num_procs = n_processors();
23  _rank_to_hardware_id.resize(num_procs);
24 
25  Parallel::Communicator shmem_comm;
26  _communicator.split_by_type(MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, shmem_comm);
27 
28  // This will be the world rank of the root process
29  // from the shared memory communicator we're getting ready to create
30  // Each process on the same node will end up with the same world_rank
31  processor_id_type world_rank = processor_id();
32 
33  // Broadcast the world rank of the sub group root to all processes within this communicator
34  shmem_comm.broadcast(world_rank, 0);
35 
36  // Send the info to everyone
37  std::vector<processor_id_type> world_ranks(num_procs);
38  _communicator.allgather(world_rank, world_ranks);
39 
40  // Map of world_rank to hardware_id
41  std::map<unsigned int, unsigned int> world_rank_to_hardware_id;
42 
43  // Assign a contiguous unique numerical id to each shared memory group
44  unsigned int next_id = 0;
45 
46  for (MooseIndex(world_ranks) pid = 0; pid < world_ranks.size(); pid++)
47  {
48  auto world_rank = world_ranks[pid];
49 
50  auto it = world_rank_to_hardware_id.lower_bound(world_rank);
51 
52  unsigned int current_id = 0;
53 
54  // If we've seen this world_rank before then use its already given ID
55  if (it != world_rank_to_hardware_id.end() && it->first == world_rank)
56  current_id = it->second;
57  else // Create the new ID
58  {
59  current_id = next_id++;
60 
61  world_rank_to_hardware_id.emplace_hint(it, world_rank, current_id);
62  }
63 
64  _rank_to_hardware_id[pid] = current_id;
65 
66  // Side-effect insertion utilized
67  _hardware_id_to_ranks[current_id].emplace_back(pid);
68  }
69 }
void allgather(const T &send_data, std::vector< T, A > &recv_data) const
std::vector< unsigned int > _rank_to_hardware_id
Each entry corresponds to the hardware_id for that PID.
Definition: RankMap.h:63
std::unordered_map< unsigned int, std::vector< processor_id_type > > _hardware_id_to_ranks
Map of hardware_id -> ranks on that node.
Definition: RankMap.h:60
const Parallel::Communicator & _communicator
uint8_t processor_id_type
processor_id_type n_processors() const
Interface for objects interacting with the PerfGraph.
void broadcast(T &data, const unsigned int root_id=0, const bool identical_sizes=false) const
void split_by_type(int split_type, int key, info i, Communicator &target) const
RankMap(const libMesh::Parallel::Communicator &comm, PerfGraph &perf_graph)
Constructs and fills the map.
Definition: RankMap.C:17
processor_id_type processor_id() const
The PerfGraph will hold the master list of all registered performance segments and the head PerfNode...
Definition: PerfGraph.h:43