https://mooseframework.inl.gov
WorkBalance.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #include "WorkBalance.h"
11 
12 // MOOSE includes
13 #include "MooseVariable.h"
15 #include "ThreadedNodeLoop.h"
17 #include "CastUniquePointer.h"
18 
19 #include "libmesh/quadrature.h"
20 #include "libmesh/elem_side_builder.h"
21 
22 #include <numeric>
23 
24 registerMooseObject("MooseApp", WorkBalance);
25 
28 {
30  params.addClassDescription("Computes several metrics for workload balance per processor");
31 
32  // These are numbered this way because NL is always system 0 and Aux is system 1
33  MooseEnum system_enum("ALL=-1 NL AUX", "ALL");
34  params.addParam<MooseEnum>(
35  "system",
36  system_enum,
37  "The system(s) to retrieve the number of DOFs from (NL, AUX, ALL). Default == ALL");
38 
39  params.addParam<bool>("sync_to_all_procs",
40  false,
41  "Whether or not to sync the vectors to all processors. By default we only "
42  "sync them to processor 0 so they can be written out. Setting this to "
43  "true will use more communication, but is necessary if you expect these "
44  "vectors to be available on all processors");
45 
46  MultiMooseEnum balances(
47  "num_elems=0 num_nodes=1 num_dofs=2 num_partition_sides=3 partition_surface_area=4 "
48  "num_partition_hardware_id_sides=5 partition_hardware_id_surface_area=6",
49  "num_elems num_nodes num_dofs num_partition_sides partition_surface_area "
50  "num_partition_hardware_id_sides partition_hardware_id_surface_area");
51  params.addParam<MultiMooseEnum>(
52  "balances", balances, "Which metrics do you want to use to represent word balance");
53  return params;
54 }
55 
57  : GeneralVectorPostprocessor(parameters),
58  _system(getParam<MooseEnum>("system")),
59  _rank_map(_app.rankMap()),
60  _my_hardware_id(_rank_map.hardwareID(processor_id())),
61  _sync_to_all_procs(getParam<bool>("sync_to_all_procs")),
62  _local_num_elems(0),
63  _local_num_nodes(0),
64  _local_num_dofs(0),
65  _local_num_partition_sides(0),
66  _local_partition_surface_area(0),
67  _local_num_partition_hardware_id_sides(0),
68  _local_partition_hardware_id_surface_area(0),
69  _pid(declareVector("pid")),
70  _balances(getParam<MultiMooseEnum>("balances"))
71 {
72  for (auto & balance : _balances)
74  balance)); // Use 'toLower' to make names consistent with the original interface
75 }
76 
77 void
79 {
80  _local_num_elems = 0;
81  _local_num_nodes = 0;
82  _local_num_dofs = 0;
87 }
88 
89 namespace
90 {
91 
92 // Helper Threaded Loop for Elements
93 class WBElementLoop : public ThreadedElementLoopBase<ConstElemRange>
94 {
95 public:
96  WBElementLoop(MooseMesh & mesh, int system, const RankMap & rank_map)
98  _system(system),
99  _rank_map(rank_map),
100  _my_hardware_id(rank_map.hardwareID(mesh.processor_id())),
101  _local_num_elems(0),
102  _local_num_dofs(0),
103  _local_num_partition_sides(0),
104  _local_partition_surface_area(0),
105  _local_num_partition_hardware_id_sides(0),
106  _local_partition_hardware_id_surface_area(0),
107  _this_pid(_mesh.processor_id()) // Get this once because it is expensive
108  {
109  // This is required because dynamic_pointer_cast() requires an l-value
110  auto partitioner = mesh.getMesh().partitioner()->clone();
111  _petsc_partitioner = dynamic_pointer_cast<PetscExternalPartitioner>(partitioner);
112  }
113 
114  WBElementLoop(WBElementLoop & x, Threads::split split)
116  _system(x._system),
117  _rank_map(x._rank_map),
118  _my_hardware_id(x._my_hardware_id),
119  _local_num_elems(0),
120  _local_num_dofs(0),
121  _local_num_partition_sides(0),
122  _local_partition_surface_area(0),
123  _local_num_partition_hardware_id_sides(0),
124  _local_partition_hardware_id_surface_area(0),
125  _this_pid(x._this_pid)
126  {
127  if (x._petsc_partitioner)
128  {
129  // This is required because dynamic_pointer_cast() requires an l-value
130  auto partitioner = x._petsc_partitioner->clone();
131  _petsc_partitioner = dynamic_pointer_cast<PetscExternalPartitioner>(partitioner);
132  }
133  }
134 
135  virtual ~WBElementLoop() {}
136 
137  virtual void pre() override
138  {
139  _local_num_elems = 0;
140  _local_num_dofs = 0;
141  _local_num_partition_sides = 0;
142  _local_partition_surface_area = 0;
143  _local_num_partition_hardware_id_sides = 0;
144  _local_partition_hardware_id_surface_area = 0;
145  }
146 
147  virtual void onElement(const Elem * elem) override
148  {
149  if (_petsc_partitioner && _petsc_partitioner->applyElementEeight())
150  {
151  // We should change partitioner interface to take const
152  // But at this point let us keep API intact
153  _local_num_elems += _petsc_partitioner->computeElementWeight(const_cast<Elem &>(*elem));
154  }
155  else
156  _local_num_elems++;
157 
158  // Find out how many dofs there are on this element
159  if (_system == WorkBalance::ALL) // All systems
160  {
161  auto n_sys = elem->n_systems();
162  for (decltype(n_sys) sys = 0; sys < n_sys; sys++)
163  {
164  auto n_vars = elem->n_vars(sys);
165 
166  for (decltype(n_vars) var = 0; var < n_vars; var++)
167  _local_num_dofs += elem->n_dofs(sys, var);
168  }
169  }
170  else // Particular system
171  {
172  auto n_vars = elem->n_vars(static_cast<unsigned int>(_system));
173 
174  for (decltype(n_vars) var = 0; var < n_vars; var++)
175  _local_num_dofs += elem->n_dofs(static_cast<unsigned int>(_system), var);
176  }
177  }
178 
179  virtual void onInternalSide(const Elem * elem, unsigned int side) override
180  {
181  if (elem->neighbor_ptr(side)->processor_id() != _this_pid)
182  {
183  if (_petsc_partitioner && _petsc_partitioner->applySideWeight())
184  {
185  // We should change partitioner interface to take const
186  // But at this point let us keep API intact
187  _local_num_partition_sides +=
188  _petsc_partitioner->computeSideWeight(const_cast<Elem &>(*elem), side);
189  }
190  else
191  _local_num_partition_sides++;
192 
193  // NOTE: we do not want to account for different coordinate systems here, so
194  // using volume from libmesh elem is fine here
195  auto volume = _elem_side_builder(*elem, side).volume();
196  _local_partition_surface_area += volume;
197 
198  if (_my_hardware_id != _rank_map.hardwareID(elem->neighbor_ptr(side)->processor_id()))
199  {
200  _local_num_partition_hardware_id_sides++;
201  _local_partition_hardware_id_surface_area += volume;
202  }
203  }
204  }
205 
206  void join(const WBElementLoop & y)
207  {
208  _local_num_elems += y._local_num_elems;
209  _local_num_dofs += y._local_num_dofs;
210  _local_num_partition_sides += y._local_num_partition_sides;
211  _local_partition_surface_area += y._local_partition_surface_area;
212  _local_num_partition_hardware_id_sides += y._local_num_partition_hardware_id_sides;
213  _local_partition_hardware_id_surface_area += y._local_partition_hardware_id_surface_area;
214  }
215 
216  int _system;
217 
218  const RankMap & _rank_map;
219 
220  unsigned int _my_hardware_id;
221 
222  dof_id_type _local_num_elems;
223  dof_id_type _local_num_dofs;
224  dof_id_type _local_num_partition_sides;
225  Real _local_partition_surface_area;
226  dof_id_type _local_num_partition_hardware_id_sides;
227  Real _local_partition_hardware_id_surface_area;
228 
229  processor_id_type _this_pid;
230 
231  libMesh::ElemSideBuilder _elem_side_builder;
232 
233  std::unique_ptr<PetscExternalPartitioner> _petsc_partitioner;
234 
235 private:
236  bool shouldComputeInternalSide(const Elem & /*elem*/, const Elem & /*neighbor*/) const override
237  {
238  return true;
239  }
240 };
241 
242 class WBNodeLoop : public ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>
243 {
244 public:
245  WBNodeLoop(FEProblemBase & fe_problem, int system)
246  : ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>(fe_problem),
247  _system(system),
248  _local_num_nodes(0),
249  _local_num_dofs(0)
250  {
251  }
252 
253  WBNodeLoop(WBNodeLoop & x, Threads::split split)
254  : ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>(x, split),
255  _system(x._system),
256  _local_num_nodes(0),
257  _local_num_dofs(0)
258  {
259  }
260 
261  virtual void onNode(ConstNodeRange::const_iterator & node_it)
262  {
263  auto & node = *(*node_it);
264 
265  _local_num_nodes++;
266 
267  // Find out how many dofs there are on this node
268  if (_system == WorkBalance::ALL) // All systems
269  {
270  auto n_sys = node.n_systems();
271  for (decltype(n_sys) sys = 0; sys < n_sys; sys++)
272  {
273  auto n_vars = node.n_vars(sys);
274 
275  for (decltype(n_vars) var = 0; var < n_vars; var++)
276  _local_num_dofs += node.n_dofs(sys, var);
277  }
278  }
279  else // Particular system
280  {
281  auto n_vars = node.n_vars(static_cast<unsigned int>(_system));
282 
283  for (decltype(n_vars) var = 0; var < n_vars; var++)
284  _local_num_dofs += node.n_dofs(static_cast<unsigned int>(_system), var);
285  }
286  }
287 
288  void join(WBNodeLoop & y)
289  {
290  _local_num_nodes += y._local_num_nodes;
291  _local_num_dofs += y._local_num_dofs;
292  }
293 
294  int _system;
295 
296  dof_id_type _local_num_nodes;
297  dof_id_type _local_num_dofs;
298 };
299 
300 } // End of blank namespace
301 
302 void
304 {
305  auto & mesh = _fe_problem.mesh();
306 
307  // Get all of the Elem info first
308  WBElementLoop wb_el(mesh, _system, _rank_map);
309 
310  Threads::parallel_reduce(*mesh.getActiveLocalElementRange(), wb_el);
311 
312  _local_num_elems = wb_el._local_num_elems;
313  _local_num_dofs = wb_el._local_num_dofs;
314  _local_num_partition_sides = wb_el._local_num_partition_sides;
315  _local_partition_surface_area = wb_el._local_partition_surface_area;
316  _local_num_partition_hardware_id_sides = wb_el._local_num_partition_hardware_id_sides;
317  _local_partition_hardware_id_surface_area = wb_el._local_partition_hardware_id_surface_area;
318 
319  // Now Node info
320  WBNodeLoop wb_nl(_fe_problem, _system);
321 
322  Threads::parallel_reduce(*mesh.getLocalNodeRange(), wb_nl);
323 
324  _local_num_nodes = wb_nl._local_num_nodes;
325  _local_num_dofs += wb_nl._local_num_dofs;
326 }
327 
328 void
330 {
331  if (!_sync_to_all_procs)
332  {
333  switch (balance_id)
334  {
335  case 0: // num_elems
336  _communicator.gather(0, static_cast<Real>(_local_num_elems), vppv);
337  break;
338  case 1: // num_nodes
339  _communicator.gather(0, static_cast<Real>(_local_num_nodes), vppv);
340  break;
341  case 2: // num_dofs
342  _communicator.gather(0, static_cast<Real>(_local_num_dofs), vppv);
343  break;
344  case 3: // num_partition_sides
345  _communicator.gather(0, static_cast<Real>(_local_num_partition_sides), vppv);
346  break;
347  case 4: // partition_surface_area
349  break;
350  case 5: // num_partition_hardware_id_sides
351  _communicator.gather(0, static_cast<Real>(_local_num_partition_hardware_id_sides), vppv);
352  break;
353  case 6: // partition_hardware_id_surface_area
355  break;
356  default:
357  mooseError("Unknown balance type: ", balance_id);
358  }
359  }
360  else
361  {
362  switch (balance_id)
363  {
364  case 0: // num_elems
365  _communicator.allgather(static_cast<Real>(_local_num_elems), vppv);
366  break;
367  case 1: // num_nodes
368  _communicator.allgather(static_cast<Real>(_local_num_nodes), vppv);
369  break;
370  case 2: // num_dofs
371  _communicator.allgather(static_cast<Real>(_local_num_dofs), vppv);
372  break;
373  case 3: // num_partition_sides
374  _communicator.allgather(static_cast<Real>(_local_num_partition_sides), vppv);
375  break;
376  case 4: // partition_surface_area
378  break;
379  case 5: // num_partition_hardware_id_sides
381  break;
382  case 6: // partition_hardware_id_surface_area
384  break;
385  default:
386  mooseError("Unknown balance type: ", balance_id);
387  }
388  }
389 }
390 
391 void
393 {
394  for (auto & balance : _balances)
395  {
396  auto balance_id = balance.id();
397 
398  auto & balance_vector = *_balance_vectors.at(balance);
399 
400  gather(balance_id, balance_vector);
401  }
402  // Fill in the PID column - this just makes plotting easier
403  _pid.resize(_communicator.size());
404  std::iota(_pid.begin(), _pid.end(), 0);
405 }
MultiMooseEnum _balances
The chosen balance metrics to compute.
Definition: WorkBalance.h:63
dof_id_type _local_num_elems
Definition: WorkBalance.h:50
dof_id_type _local_num_dofs
Definition: WorkBalance.h:52
void allgather(const T &send_data, std::vector< T, A > &recv_data) const
std::string join(Iterator begin, Iterator end, const std::string &delimiter)
Python-like join function for strings over an iterator range.
Definition: MooseUtils.h:142
virtual bool shouldComputeInternalSide(const Elem &elem, const Elem &neighbor) const
Whether to compute the internal side for the provided element-neighbor pair.
virtual void onElement(const Elem *elem)
Assembly of the element (not including surface assembly)
registerMooseObject("MooseApp", WorkBalance)
std::string toLower(const std::string &name)
Convert supplied string to lower case.
VectorPostprocessorValue & _pid
Definition: WorkBalance.h:60
Real _local_partition_surface_area
Definition: WorkBalance.h:54
dof_id_type _local_num_nodes
Definition: WorkBalance.h:51
void gather(const unsigned int root_id, const T &send_data, std::vector< T, A > &recv) const
virtual void pre()
Called before the element range loop.
This class is here to combine the VectorPostprocessor interface and the base class VectorPostprocesso...
Builds lists and maps that help in knowing which physical hardware nodes each rank is on...
Definition: RankMap.h:23
MeshBase & mesh
static InputParameters validParams()
Definition: WorkBalance.C:27
The main MOOSE class responsible for handling user-defined parameters in almost every MOOSE system...
std::unique_ptr< T_DEST, T_DELETER > dynamic_pointer_cast(std::unique_ptr< T_SRC, T_DELETER > &src)
These are reworked from https://stackoverflow.com/a/11003103.
const Parallel::Communicator & _communicator
virtual void initialize() override
Called before execute() is ever called so that data can be cleared.
Definition: WorkBalance.C:78
const RankMap & _rank_map
Helpful in determining the physical layout of the ranks.
Definition: WorkBalance.h:44
Specialization of SubProblem for solving nonlinear equations plus auxiliary equations.
virtual void finalize() override
Finalize.
Definition: WorkBalance.C:392
bool _sync_to_all_procs
Definition: WorkBalance.h:48
int _system
The system to count DoFs from.
Definition: WorkBalance.h:41
processor_id_type size() const
uint8_t processor_id_type
dof_id_type _local_num_partition_sides
Definition: WorkBalance.h:53
std::map< std::string, VectorPostprocessorValue * > _balance_vectors
The VPP vectors that will hold the balance metrics.
Definition: WorkBalance.h:66
unsigned int n_vars
static InputParameters validParams()
MooseMesh wraps a libMesh::Mesh object and enhances its capabilities by caching additional data and s...
Definition: MooseMesh.h:88
This is a "smart" enum class intended to replace many of the shortcomings in the C++ enum type It sho...
Definition: MooseEnum.h:33
VectorPostprocessorValue & declareVector(const std::string &vector_name)
Register a new vector to fill up.
virtual void execute() override
Execute method.
Definition: WorkBalance.C:303
Real volume(const MeshBase &mesh, unsigned int dim=libMesh::invalid_uint)
virtual void onInternalSide(const Elem *elem, unsigned int side)
Called when doing internal edge assembling.
dof_id_type _local_num_partition_hardware_id_sides
Definition: WorkBalance.h:57
tbb::split split
std::vector< Real > VectorPostprocessorValue
Definition: MooseTypes.h:203
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
vec_type::const_iterator const_iterator
FEProblemBase & _fe_problem
Reference to the FEProblemBase for this user object.
Definition: UserObject.h:211
WorkBalance(const InputParameters &parameters)
Definition: WorkBalance.C:56
virtual MooseMesh & mesh() override
void mooseError(Args &&... args) const
Emits an error prefixed with object name and type.
Base class for assembly-like calculations.
void gather(int balance_id, VectorPostprocessorValue &vppv)
Definition: WorkBalance.C:329
void addClassDescription(const std::string &doc_string)
This method adds a description of the class that will be displayed in the input file syntax dump...
void addParam(const std::string &name, const S &value, const std::string &doc_string)
These methods add an optional parameter and a documentation string to the InputParameters object...
This is a "smart" enum class intended to replace many of the shortcomings in the C++ enum type...
Partitions a mesh using external petsc partitioners such as parmetis, ptscotch, chaco, party, etc.
Compute several metrics for each MPI process.
Definition: WorkBalance.h:19
Real _local_partition_hardware_id_surface_area
Definition: WorkBalance.h:58
virtual void onNode(IteratorType &node_it)
Called for each node.
uint8_t dof_id_type