www.mooseframework.org
WorkBalance.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://www.mooseframework.org
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #include "WorkBalance.h"
11 
12 // MOOSE includes
13 #include "MooseVariable.h"
15 #include "ThreadedNodeLoop.h"
16 
17 #include "libmesh/quadrature.h"
18 
19 #include <numeric>
20 
21 registerMooseObject("MooseApp", WorkBalance);
22 
23 template <>
26 {
28  params.addClassDescription("Computes several metrics for workload balance per processor");
29 
30  // These are numbered this way because NL is always system 0 and Aux is system 1
31  MooseEnum system_enum("ALL=-1 NL AUX", "ALL");
32  params.addParam<MooseEnum>(
33  "system",
34  system_enum,
35  "The system(s) to retrieve the number of DOFs from (NL, AUX, ALL). Default == ALL");
36 
37  params.addParam<bool>("sync_to_all_procs",
38  false,
39  "Whether or not to sync the vectors to all processors. By default we only "
40  "sync them to processor 0 so they can be written out. Setting this to "
41  "true will use more communication, but is necessary if you expect these "
42  "vectors to be available on all processors");
43 
44  return params;
45 }
46 
48  : GeneralVectorPostprocessor(parameters),
49  _system(getParam<MooseEnum>("system")),
50  _rank_map(_app.rankMap()),
51  _my_hardware_id(_rank_map.hardwareID(processor_id())),
52  _sync_to_all_procs(getParam<bool>("sync_to_all_procs")),
53  _local_num_elems(0),
54  _local_num_nodes(0),
55  _local_num_dofs(0),
56  _local_num_partition_sides(0),
57  _local_partition_surface_area(0),
58  _local_num_partition_hardware_id_sides(0),
59  _local_partition_hardware_id_surface_area(0),
60  _pid(declareVector("pid")),
61  _num_elems(declareVector("num_elems")),
62  _num_nodes(declareVector("num_nodes")),
63  _num_dofs(declareVector("num_dofs")),
64  _num_partition_sides(declareVector("num_partition_sides")),
65  _partition_surface_area(declareVector("partition_surface_area")),
66  _num_partition_hardware_id_sides(declareVector("num_partition_hardware_id_sides")),
67  _partition_hardware_id_surface_area(declareVector("partition_hardware_id_surface_area"))
68 {
69 }
70 
71 void
73 {
74  _local_num_elems = 0;
75  _local_num_nodes = 0;
76  _local_num_dofs = 0;
81 }
82 
83 namespace
84 {
85 
86 // Helper Threaded Loop for Elements
87 class WBElementLoop : public ThreadedElementLoopBase<ConstElemRange>
88 {
89 public:
90  WBElementLoop(MooseMesh & mesh, int system, const RankMap & rank_map)
92  _system(system),
93  _rank_map(rank_map),
94  _my_hardware_id(rank_map.hardwareID(mesh.processor_id())),
95  _local_num_elems(0),
96  _local_num_dofs(0),
97  _local_num_partition_sides(0),
98  _local_partition_surface_area(0),
99  _local_num_partition_hardware_id_sides(0),
100  _local_partition_hardware_id_surface_area(0),
101  _this_pid(_mesh.processor_id()) // Get this once because it is expensive
102  {
103  }
104 
105  WBElementLoop(WBElementLoop & x, Threads::split split)
107  _system(x._system),
108  _rank_map(x._rank_map),
109  _my_hardware_id(x._my_hardware_id),
110  _local_num_elems(0),
111  _local_num_dofs(0),
112  _local_num_partition_sides(0),
113  _local_partition_surface_area(0),
114  _local_num_partition_hardware_id_sides(0),
115  _local_partition_hardware_id_surface_area(0),
116  _this_pid(x._this_pid)
117  {
118  }
119 
120  virtual ~WBElementLoop() {}
121 
122  virtual void pre() override
123  {
124  _local_num_elems = 0;
125  _local_num_dofs = 0;
126  _local_num_partition_sides = 0;
127  _local_partition_surface_area = 0;
128  _local_num_partition_hardware_id_sides = 0;
129  _local_partition_hardware_id_surface_area = 0;
130  }
131 
132  virtual void onElement(const Elem * elem) override
133  {
134  _local_num_elems++;
135 
136  // Find out how many dofs there are on this element
137  if (_system == WorkBalance::ALL) // All systems
138  {
139  auto n_sys = elem->n_systems();
140  for (decltype(n_sys) sys = 0; sys < n_sys; sys++)
141  {
142  auto n_vars = elem->n_vars(sys);
143 
144  for (decltype(n_vars) var = 0; var < n_vars; var++)
145  _local_num_dofs += elem->n_dofs(sys, var);
146  }
147  }
148  else // Particular system
149  {
150  auto n_vars = elem->n_vars(static_cast<unsigned int>(_system));
151 
152  for (decltype(n_vars) var = 0; var < n_vars; var++)
153  _local_num_dofs += elem->n_dofs(static_cast<unsigned int>(_system), var);
154  }
155  }
156 
157  virtual void onInternalSide(const Elem * elem, unsigned int side) override
158  {
159  if (elem->neighbor_ptr(side)->processor_id() != _this_pid)
160  {
161  _local_num_partition_sides++;
162 
163  // Build the side so we can compute its volume
164  auto side_elem = elem->build_side_ptr(side);
165  auto volume = side_elem->volume();
166  _local_partition_surface_area += volume;
167 
168  if (_my_hardware_id != _rank_map.hardwareID(elem->neighbor_ptr(side)->processor_id()))
169  {
170  _local_num_partition_hardware_id_sides++;
171  _local_partition_hardware_id_surface_area += volume;
172  }
173  }
174  }
175 
176  void join(const WBElementLoop & y)
177  {
178  _local_num_elems += y._local_num_elems;
179  _local_num_dofs += y._local_num_dofs;
180  _local_num_partition_sides += y._local_num_partition_sides;
181  _local_partition_surface_area += y._local_partition_surface_area;
182  _local_num_partition_hardware_id_sides += y._local_num_partition_hardware_id_sides;
183  _local_partition_hardware_id_surface_area += y._local_partition_hardware_id_surface_area;
184  }
185 
186  int _system;
187 
188  const RankMap & _rank_map;
189 
190  unsigned int _my_hardware_id;
191 
192  dof_id_type _local_num_elems;
193  dof_id_type _local_num_dofs;
194  dof_id_type _local_num_partition_sides;
195  Real _local_partition_surface_area;
196  dof_id_type _local_num_partition_hardware_id_sides;
197  Real _local_partition_hardware_id_surface_area;
198 
199  processor_id_type _this_pid;
200 };
201 
202 class WBNodeLoop : public ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>
203 {
204 public:
205  WBNodeLoop(FEProblemBase & fe_problem, int system)
206  : ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>(fe_problem),
207  _system(system),
208  _local_num_nodes(0),
209  _local_num_dofs(0)
210  {
211  }
212 
213  WBNodeLoop(WBNodeLoop & x, Threads::split split)
214  : ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>(x, split),
215  _system(x._system),
216  _local_num_nodes(0),
217  _local_num_dofs(0)
218  {
219  }
220 
221  virtual void onNode(ConstNodeRange::const_iterator & node_it)
222  {
223  auto & node = *(*node_it);
224 
225  _local_num_nodes++;
226 
227  // Find out how many dofs there are on this node
228  if (_system == WorkBalance::ALL) // All systems
229  {
230  auto n_sys = node.n_systems();
231  for (decltype(n_sys) sys = 0; sys < n_sys; sys++)
232  {
233  auto n_vars = node.n_vars(sys);
234 
235  for (decltype(n_vars) var = 0; var < n_vars; var++)
236  _local_num_dofs += node.n_dofs(sys, var);
237  }
238  }
239  else // Particular system
240  {
241  auto n_vars = node.n_vars(static_cast<unsigned int>(_system));
242 
243  for (decltype(n_vars) var = 0; var < n_vars; var++)
244  _local_num_dofs += node.n_dofs(static_cast<unsigned int>(_system), var);
245  }
246  }
247 
248  void join(WBNodeLoop & y)
249  {
250  _local_num_nodes += y._local_num_nodes;
251  _local_num_dofs += y._local_num_dofs;
252  }
253 
254  int _system;
255 
256  dof_id_type _local_num_nodes;
257  dof_id_type _local_num_dofs;
258 };
259 
260 } // End of blank namespace
261 
262 void
264 {
265  auto & mesh = _fe_problem.mesh();
266 
267  // Get all of the Elem info first
268  auto wb_el = WBElementLoop(mesh, _system, _rank_map);
269 
270  Threads::parallel_reduce(*mesh.getActiveLocalElementRange(), wb_el);
271 
272  _local_num_elems = wb_el._local_num_elems;
273  _local_num_dofs = wb_el._local_num_dofs;
274  _local_num_partition_sides = wb_el._local_num_partition_sides;
275  _local_partition_surface_area = wb_el._local_partition_surface_area;
276  _local_num_partition_hardware_id_sides = wb_el._local_num_partition_hardware_id_sides;
277  _local_partition_hardware_id_surface_area = wb_el._local_partition_hardware_id_surface_area;
278 
279  // Now Node info
280  auto wb_nl = WBNodeLoop(_fe_problem, _system);
281 
282  Threads::parallel_reduce(*mesh.getLocalNodeRange(), wb_nl);
283 
284  _local_num_nodes = wb_nl._local_num_nodes;
285  _local_num_dofs += wb_nl._local_num_dofs;
286 }
287 
288 void
290 {
291  if (!_sync_to_all_procs)
292  {
293  // Gather the results down to processor 0
294  _communicator.gather(0, static_cast<Real>(_local_num_elems), _num_elems);
295  _communicator.gather(0, static_cast<Real>(_local_num_nodes), _num_nodes);
296  _communicator.gather(0, static_cast<Real>(_local_num_dofs), _num_dofs);
297  _communicator.gather(0, static_cast<Real>(_local_num_partition_sides), _num_partition_sides);
298  _communicator.gather(0, _local_partition_surface_area, _partition_surface_area);
299  _communicator.gather(0,
300  static_cast<Real>(_local_num_partition_hardware_id_sides),
302  _communicator.gather(
304  }
305  else
306  {
307  // Gather the results down to all procs
308  _communicator.allgather(static_cast<Real>(_local_num_elems), _num_elems);
309  _communicator.allgather(static_cast<Real>(_local_num_nodes), _num_nodes);
310  _communicator.allgather(static_cast<Real>(_local_num_dofs), _num_dofs);
311  _communicator.allgather(static_cast<Real>(_local_num_partition_sides), _num_partition_sides);
312  _communicator.allgather(_local_partition_surface_area, _partition_surface_area);
313  _communicator.allgather(static_cast<Real>(_local_num_partition_hardware_id_sides),
315  _communicator.allgather(_local_partition_hardware_id_surface_area,
317  }
318 
319  // Fill in the PID column - this just makes plotting easier
320  _pid.resize(_num_elems.size());
321  std::iota(_pid.begin(), _pid.end(), 0);
322 }
dof_id_type _local_num_elems
Definition: WorkBalance.h:52
dof_id_type _local_num_dofs
Definition: WorkBalance.h:54
InputParameters validParams< WorkBalance >()
Definition: WorkBalance.C:25
virtual void onElement(const Elem *elem)
Assembly of the element (not including surface assembly)
VectorPostprocessorValue & _num_dofs
Definition: WorkBalance.h:65
registerMooseObject("MooseApp", WorkBalance)
VectorPostprocessorValue & _pid
Definition: WorkBalance.h:62
VectorPostprocessorValue & _num_nodes
Definition: WorkBalance.h:64
Real _local_partition_surface_area
Definition: WorkBalance.h:56
dof_id_type _local_num_nodes
Definition: WorkBalance.h:53
virtual void pre()
Called before the element range loop.
This class is here to combine the VectorPostprocessor interface and the base class VectorPostprocesso...
Builds lists and maps that help in knowing which physical hardware nodes each rank is on...
Definition: RankMap.h:23
VectorPostprocessorValue & _num_partition_sides
Definition: WorkBalance.h:66
The main MOOSE class responsible for handling user-defined parameters in almost every MOOSE system...
VectorPostprocessorValue & _num_elems
Definition: WorkBalance.h:63
static PetscErrorCode Vec x
virtual void initialize() override
Called before execute() is ever called so that data can be cleared.
Definition: WorkBalance.C:72
const RankMap & _rank_map
Helpful in determining the physical layout of the ranks.
Definition: WorkBalance.h:46
Specialization of SubProblem for solving nonlinear equations plus auxiliary equations.
virtual void finalize() override
Finalize.
Definition: WorkBalance.C:289
bool _sync_to_all_procs
Definition: WorkBalance.h:50
int _system
The system to count DoFs from.
Definition: WorkBalance.h:43
std::vector< std::string > split(const std::string &str, const std::string &delimiter)
Python like split function for strings.
Definition: MooseUtils.C:784
VectorPostprocessorValue & _partition_surface_area
Definition: WorkBalance.h:67
dof_id_type _local_num_partition_sides
Definition: WorkBalance.h:55
nl system()
InputParameters validParams< GeneralVectorPostprocessor >()
VectorPostprocessorValue & _partition_hardware_id_surface_area
Definition: WorkBalance.h:69
MooseMesh wraps a libMesh::Mesh object and enhances its capabilities by caching additional data and s...
Definition: MooseMesh.h:74
This is a "smart" enum class intended to replace many of the shortcomings in the C++ enum type It sho...
Definition: MooseEnum.h:31
virtual void execute() override
Execute method.
Definition: WorkBalance.C:263
virtual void onInternalSide(const Elem *elem, unsigned int side)
Called when doing internal edge assembling.
dof_id_type _local_num_partition_hardware_id_sides
Definition: WorkBalance.h:59
VectorPostprocessorValue & _num_partition_hardware_id_sides
Definition: WorkBalance.h:68
FEProblemBase & _fe_problem
Reference to the FEProblemBase for this user object.
Definition: UserObject.h:141
WorkBalance(const InputParameters &parameters)
Definition: WorkBalance.C:47
virtual MooseMesh & mesh() override
Base class for assembly-like calculations.
void addClassDescription(const std::string &doc_string)
This method adds a description of the class that will be displayed in the input file syntax dump...
void addParam(const std::string &name, const S &value, const std::string &doc_string)
These methods add an option parameter and a documentation string to the InputParameters object...
Compute several metrics for each MPI process.
Definition: WorkBalance.h:24
Real _local_partition_hardware_id_surface_area
Definition: WorkBalance.h:60
std::string join(const T &strings, const std::string &delimiter)
Python like join function for strings.
Definition: MooseUtils.C:799
virtual void onNode(IteratorType &node_it)
Called for each node.