Line data Source code
1 : //* This file is part of the MOOSE framework
2 : //* https://mooseframework.inl.gov
3 : //*
4 : //* All rights reserved, see COPYRIGHT for full restrictions
5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 : //*
7 : //* Licensed under LGPL 2.1, please see LICENSE for details
8 : //* https://www.gnu.org/licenses/lgpl-2.1.html
9 :
10 : #include "WorkBalance.h"
11 :
12 : // MOOSE includes
13 : #include "MooseVariable.h"
14 : #include "ThreadedElementLoopBase.h"
15 : #include "ThreadedNodeLoop.h"
16 : #include "PetscExternalPartitioner.h"
17 : #include "CastUniquePointer.h"
18 :
19 : #include "libmesh/quadrature.h"
20 : #include "libmesh/elem_side_builder.h"
21 :
22 : #include <numeric>
23 :
24 : registerMooseObject("MooseApp", WorkBalance);
25 :
26 : InputParameters
27 14939 : WorkBalance::validParams()
28 : {
29 14939 : InputParameters params = GeneralVectorPostprocessor::validParams();
30 14939 : params.addClassDescription("Computes several metrics for workload balance per processor");
31 :
32 : // These are numbered this way because NL is always system 0 and Aux is system 1
33 14939 : MooseEnum system_enum("ALL=-1 NL AUX", "ALL");
34 14939 : params.addParam<MooseEnum>(
35 : "system",
36 : system_enum,
37 : "The system(s) to retrieve the number of DOFs from (NL, AUX, ALL). Default == ALL");
38 :
39 44817 : params.addParam<bool>("sync_to_all_procs",
40 29878 : false,
41 : "Whether or not to sync the vectors to all processors. By default we only "
42 : "sync them to processor 0 so they can be written out. Setting this to "
43 : "true will use more communication, but is necessary if you expect these "
44 : "vectors to be available on all processors");
45 :
46 : MultiMooseEnum balances(
47 : "num_elems=0 num_nodes=1 num_dofs=2 num_partition_sides=3 partition_surface_area=4 "
48 : "num_partition_hardware_id_sides=5 partition_hardware_id_surface_area=6",
49 : "num_elems num_nodes num_dofs num_partition_sides partition_surface_area "
50 14939 : "num_partition_hardware_id_sides partition_hardware_id_surface_area");
51 14939 : params.addParam<MultiMooseEnum>(
52 : "balances", balances, "Which metrics do you want to use to represent word balance");
53 29878 : return params;
54 14939 : }
55 :
56 330 : WorkBalance::WorkBalance(const InputParameters & parameters)
57 : : GeneralVectorPostprocessor(parameters),
58 330 : _system(getParam<MooseEnum>("system")),
59 330 : _rank_map(_app.rankMap()),
60 330 : _my_hardware_id(_rank_map.hardwareID(processor_id())),
61 330 : _sync_to_all_procs(getParam<bool>("sync_to_all_procs")),
62 330 : _local_num_elems(0),
63 330 : _local_num_nodes(0),
64 330 : _local_num_dofs(0),
65 330 : _local_num_partition_sides(0),
66 330 : _local_partition_surface_area(0),
67 330 : _local_num_partition_hardware_id_sides(0),
68 330 : _local_partition_hardware_id_surface_area(0),
69 330 : _pid(declareVector("pid")),
70 660 : _balances(getParam<MultiMooseEnum>("balances"))
71 : {
72 1380 : for (auto & balance : _balances)
73 1050 : _balance_vectors[balance] = &declareVector(MooseUtils::toLower(
74 : balance)); // Use 'toLower' to make names consistent with the original interface
75 330 : }
76 :
77 : void
78 321 : WorkBalance::initialize()
79 : {
80 321 : _local_num_elems = 0;
81 321 : _local_num_nodes = 0;
82 321 : _local_num_dofs = 0;
83 321 : _local_num_partition_sides = 0;
84 321 : _local_partition_surface_area = 0;
85 321 : _local_num_partition_hardware_id_sides = 0;
86 321 : _local_partition_hardware_id_surface_area = 0;
87 321 : }
88 :
89 : namespace
90 : {
91 :
92 : // Helper Threaded Loop for Elements
93 : class WBElementLoop : public ThreadedElementLoopBase<ConstElemRange>
94 : {
95 : public:
96 321 : WBElementLoop(MooseMesh & mesh, int system, const RankMap & rank_map)
97 321 : : ThreadedElementLoopBase(mesh),
98 321 : _system(system),
99 321 : _rank_map(rank_map),
100 321 : _my_hardware_id(rank_map.hardwareID(mesh.processor_id())),
101 321 : _local_num_elems(0),
102 321 : _local_num_dofs(0),
103 321 : _local_num_partition_sides(0),
104 321 : _local_partition_surface_area(0),
105 321 : _local_num_partition_hardware_id_sides(0),
106 321 : _local_partition_hardware_id_surface_area(0),
107 642 : _this_pid(_mesh.processor_id()) // Get this once because it is expensive
108 : {
109 : // This is required because dynamic_pointer_cast() requires an l-value
110 321 : auto partitioner = mesh.getMesh().partitioner()->clone();
111 321 : _petsc_partitioner = dynamic_pointer_cast<PetscExternalPartitioner>(partitioner);
112 321 : }
113 :
114 45 : WBElementLoop(WBElementLoop & x, Threads::split split)
115 45 : : ThreadedElementLoopBase(x, split),
116 45 : _system(x._system),
117 45 : _rank_map(x._rank_map),
118 45 : _my_hardware_id(x._my_hardware_id),
119 45 : _local_num_elems(0),
120 45 : _local_num_dofs(0),
121 45 : _local_num_partition_sides(0),
122 45 : _local_partition_surface_area(0),
123 45 : _local_num_partition_hardware_id_sides(0),
124 45 : _local_partition_hardware_id_surface_area(0),
125 45 : _this_pid(x._this_pid)
126 : {
127 45 : if (x._petsc_partitioner)
128 : {
129 : // This is required because dynamic_pointer_cast() requires an l-value
130 32 : auto partitioner = x._petsc_partitioner->clone();
131 32 : _petsc_partitioner = dynamic_pointer_cast<PetscExternalPartitioner>(partitioner);
132 32 : }
133 45 : }
134 :
135 411 : virtual ~WBElementLoop() {}
136 :
137 366 : virtual void pre() override
138 : {
139 366 : _local_num_elems = 0;
140 366 : _local_num_dofs = 0;
141 366 : _local_num_partition_sides = 0;
142 366 : _local_partition_surface_area = 0;
143 366 : _local_num_partition_hardware_id_sides = 0;
144 366 : _local_partition_hardware_id_surface_area = 0;
145 366 : }
146 :
147 67654 : virtual void onElement(const Elem * elem) override
148 : {
149 67654 : if (_petsc_partitioner && _petsc_partitioner->applyElementEeight())
150 : {
151 : // We should change partitioner interface to take const
152 : // But at this point let us keep API intact
153 16800 : _local_num_elems += _petsc_partitioner->computeElementWeight(const_cast<Elem &>(*elem));
154 : }
155 : else
156 50854 : _local_num_elems++;
157 :
158 : // Find out how many dofs there are on this element
159 67654 : if (_system == WorkBalance::ALL) // All systems
160 : {
161 1500 : auto n_sys = elem->n_systems();
162 4500 : for (decltype(n_sys) sys = 0; sys < n_sys; sys++)
163 : {
164 3000 : auto n_vars = elem->n_vars(sys);
165 :
166 7500 : for (decltype(n_vars) var = 0; var < n_vars; var++)
167 4500 : _local_num_dofs += elem->n_dofs(sys, var);
168 : }
169 : }
170 : else // Particular system
171 : {
172 66154 : auto n_vars = elem->n_vars(static_cast<unsigned int>(_system));
173 :
174 133108 : for (decltype(n_vars) var = 0; var < n_vars; var++)
175 66954 : _local_num_dofs += elem->n_dofs(static_cast<unsigned int>(_system), var);
176 : }
177 67654 : }
178 :
179 317130 : virtual void onInternalSide(const Elem * elem, unsigned int side) override
180 : {
181 317130 : if (elem->neighbor_ptr(side)->processor_id() != _this_pid)
182 : {
183 18022 : if (_petsc_partitioner && _petsc_partitioner->applySideWeight())
184 : {
185 : // We should change partitioner interface to take const
186 : // But at this point let us keep API intact
187 2856 : _local_num_partition_sides +=
188 2856 : _petsc_partitioner->computeSideWeight(const_cast<Elem &>(*elem), side);
189 : }
190 : else
191 15166 : _local_num_partition_sides++;
192 :
193 : // NOTE: we do not want to account for different coordinate systems here, so
194 : // using volume from libmesh elem is fine here
195 18022 : auto volume = _elem_side_builder(*elem, side).volume();
196 18022 : _local_partition_surface_area += volume;
197 :
198 18022 : if (_my_hardware_id != _rank_map.hardwareID(elem->neighbor_ptr(side)->processor_id()))
199 : {
200 0 : _local_num_partition_hardware_id_sides++;
201 0 : _local_partition_hardware_id_surface_area += volume;
202 : }
203 : }
204 317130 : }
205 :
206 45 : void join(const WBElementLoop & y)
207 : {
208 45 : _local_num_elems += y._local_num_elems;
209 45 : _local_num_dofs += y._local_num_dofs;
210 45 : _local_num_partition_sides += y._local_num_partition_sides;
211 45 : _local_partition_surface_area += y._local_partition_surface_area;
212 45 : _local_num_partition_hardware_id_sides += y._local_num_partition_hardware_id_sides;
213 45 : _local_partition_hardware_id_surface_area += y._local_partition_hardware_id_surface_area;
214 45 : }
215 :
216 : int _system;
217 :
218 : const RankMap & _rank_map;
219 :
220 : unsigned int _my_hardware_id;
221 :
222 : dof_id_type _local_num_elems;
223 : dof_id_type _local_num_dofs;
224 : dof_id_type _local_num_partition_sides;
225 : Real _local_partition_surface_area;
226 : dof_id_type _local_num_partition_hardware_id_sides;
227 : Real _local_partition_hardware_id_surface_area;
228 :
229 : processor_id_type _this_pid;
230 :
231 : libMesh::ElemSideBuilder _elem_side_builder;
232 :
233 : std::unique_ptr<PetscExternalPartitioner> _petsc_partitioner;
234 :
235 : private:
236 317130 : bool shouldComputeInternalSide(const Elem & /*elem*/, const Elem & /*neighbor*/) const override
237 : {
238 317130 : return true;
239 : }
240 : };
241 :
242 : class WBNodeLoop : public ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>
243 : {
244 : public:
245 321 : WBNodeLoop(FEProblemBase & fe_problem, int system)
246 321 : : ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>(fe_problem),
247 321 : _system(system),
248 321 : _local_num_nodes(0),
249 321 : _local_num_dofs(0)
250 : {
251 321 : }
252 :
253 45 : WBNodeLoop(WBNodeLoop & x, Threads::split split)
254 45 : : ThreadedNodeLoop<ConstNodeRange, ConstNodeRange::const_iterator>(x, split),
255 45 : _system(x._system),
256 45 : _local_num_nodes(0),
257 45 : _local_num_dofs(0)
258 : {
259 45 : }
260 :
261 75655 : virtual void onNode(ConstNodeRange::const_iterator & node_it)
262 : {
263 75655 : auto & node = *(*node_it);
264 :
265 75655 : _local_num_nodes++;
266 :
267 : // Find out how many dofs there are on this node
268 75655 : if (_system == WorkBalance::ALL) // All systems
269 : {
270 1815 : auto n_sys = node.n_systems();
271 5445 : for (decltype(n_sys) sys = 0; sys < n_sys; sys++)
272 : {
273 3630 : auto n_vars = node.n_vars(sys);
274 :
275 9075 : for (decltype(n_vars) var = 0; var < n_vars; var++)
276 5445 : _local_num_dofs += node.n_dofs(sys, var);
277 : }
278 : }
279 : else // Particular system
280 : {
281 73840 : auto n_vars = node.n_vars(static_cast<unsigned int>(_system));
282 :
283 148648 : for (decltype(n_vars) var = 0; var < n_vars; var++)
284 74808 : _local_num_dofs += node.n_dofs(static_cast<unsigned int>(_system), var);
285 : }
286 75655 : }
287 :
288 45 : void join(WBNodeLoop & y)
289 : {
290 45 : _local_num_nodes += y._local_num_nodes;
291 45 : _local_num_dofs += y._local_num_dofs;
292 45 : }
293 :
294 : int _system;
295 :
296 : dof_id_type _local_num_nodes;
297 : dof_id_type _local_num_dofs;
298 : };
299 :
300 : } // End of blank namespace
301 :
302 : void
303 321 : WorkBalance::execute()
304 : {
305 321 : auto & mesh = _fe_problem.mesh();
306 :
307 : // Get all of the Elem info first
308 321 : WBElementLoop wb_el(mesh, _system, _rank_map);
309 :
310 321 : Threads::parallel_reduce(*mesh.getActiveLocalElementRange(), wb_el);
311 :
312 321 : _local_num_elems = wb_el._local_num_elems;
313 321 : _local_num_dofs = wb_el._local_num_dofs;
314 321 : _local_num_partition_sides = wb_el._local_num_partition_sides;
315 321 : _local_partition_surface_area = wb_el._local_partition_surface_area;
316 321 : _local_num_partition_hardware_id_sides = wb_el._local_num_partition_hardware_id_sides;
317 321 : _local_partition_hardware_id_surface_area = wb_el._local_partition_hardware_id_surface_area;
318 :
319 : // Now Node info
320 321 : WBNodeLoop wb_nl(_fe_problem, _system);
321 :
322 321 : Threads::parallel_reduce(*mesh.getLocalNodeRange(), wb_nl);
323 :
324 321 : _local_num_nodes = wb_nl._local_num_nodes;
325 321 : _local_num_dofs += wb_nl._local_num_dofs;
326 321 : }
327 :
328 : void
329 987 : WorkBalance::gather(int balance_id, VectorPostprocessorValue & vppv)
330 : {
331 987 : if (!_sync_to_all_procs)
332 : {
333 840 : switch (balance_id)
334 : {
335 300 : case 0: // num_elems
336 300 : _communicator.gather(0, static_cast<Real>(_local_num_elems), vppv);
337 300 : break;
338 48 : case 1: // num_nodes
339 48 : _communicator.gather(0, static_cast<Real>(_local_num_nodes), vppv);
340 48 : break;
341 48 : case 2: // num_dofs
342 48 : _communicator.gather(0, static_cast<Real>(_local_num_dofs), vppv);
343 48 : break;
344 300 : case 3: // num_partition_sides
345 300 : _communicator.gather(0, static_cast<Real>(_local_num_partition_sides), vppv);
346 300 : break;
347 48 : case 4: // partition_surface_area
348 48 : _communicator.gather(0, _local_partition_surface_area, vppv);
349 48 : break;
350 48 : case 5: // num_partition_hardware_id_sides
351 48 : _communicator.gather(0, static_cast<Real>(_local_num_partition_hardware_id_sides), vppv);
352 48 : break;
353 48 : case 6: // partition_hardware_id_surface_area
354 48 : _communicator.gather(0, _local_partition_hardware_id_surface_area, vppv);
355 48 : break;
356 0 : default:
357 0 : mooseError("Unknown balance type: ", balance_id);
358 : }
359 : }
360 : else
361 : {
362 147 : switch (balance_id)
363 : {
364 21 : case 0: // num_elems
365 21 : _communicator.allgather(static_cast<Real>(_local_num_elems), vppv);
366 21 : break;
367 21 : case 1: // num_nodes
368 21 : _communicator.allgather(static_cast<Real>(_local_num_nodes), vppv);
369 21 : break;
370 21 : case 2: // num_dofs
371 21 : _communicator.allgather(static_cast<Real>(_local_num_dofs), vppv);
372 21 : break;
373 21 : case 3: // num_partition_sides
374 21 : _communicator.allgather(static_cast<Real>(_local_num_partition_sides), vppv);
375 21 : break;
376 21 : case 4: // partition_surface_area
377 21 : _communicator.allgather(_local_partition_surface_area, vppv);
378 21 : break;
379 21 : case 5: // num_partition_hardware_id_sides
380 21 : _communicator.allgather(static_cast<Real>(_local_num_partition_hardware_id_sides), vppv);
381 21 : break;
382 21 : case 6: // partition_hardware_id_surface_area
383 21 : _communicator.allgather(_local_partition_hardware_id_surface_area, vppv);
384 21 : break;
385 0 : default:
386 0 : mooseError("Unknown balance type: ", balance_id);
387 : }
388 : }
389 987 : }
390 :
391 : void
392 321 : WorkBalance::finalize()
393 : {
394 1308 : for (auto & balance : _balances)
395 : {
396 987 : auto balance_id = balance.id();
397 :
398 987 : auto & balance_vector = *_balance_vectors.at(balance);
399 :
400 987 : gather(balance_id, balance_vector);
401 : }
402 : // Fill in the PID column - this just makes plotting easier
403 321 : _pid.resize(_communicator.size());
404 321 : std::iota(_pid.begin(), _pid.end(), 0);
405 321 : }
|