Line data Source code
1 : //* This file is part of the MOOSE framework
2 : //* https://mooseframework.inl.gov
3 : //*
4 : //* All rights reserved, see COPYRIGHT for full restrictions
5 : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 : //*
7 : //* Licensed under LGPL 2.1, please see LICENSE for details
8 : //* https://www.gnu.org/licenses/lgpl-2.1.html
9 :
10 : #pragma once
11 :
12 : // MOOSE Includes
13 : #include "MooseTypes.h"
14 : #include "PerfNode.h"
15 : #include "IndirectSort.h"
16 : #include "ConsoleStream.h"
17 : #include "ConsoleStreamInterface.h"
18 : #include "MooseError.h"
19 : #include "MemoryUtils.h"
20 : #include "PerfGraphRegistry.h"
21 :
22 : // System Includes
23 : #include <array>
24 : #include <atomic>
25 : #include <thread>
26 : #include <future>
27 : #include <mutex>
28 :
29 : // Forward Declarations
30 : class PerfGuard;
31 : class PerfGraphLivePrint;
32 :
33 : template <class... Ts>
34 : class VariadicTable;
35 :
36 : #define MOOSE_MAX_STACK_SIZE 100
37 : #define MAX_EXECUTION_LIST_SIZE 10000
38 :
39 : /**
40 : * The PerfGraph will hold the master list of all registered performance segments and
41 : * the head PerfNode
42 : */
43 : class PerfGraph : protected ConsoleStreamInterface
44 : {
45 : public:
46 : using PerfGraphRegistry = moose::internal::PerfGraphRegistry;
47 :
48 : /**
49 : * For retrieving values
50 : */
51 : enum DataType
52 : {
53 : SELF,
54 : CHILDREN,
55 : TOTAL,
56 : SELF_AVG,
57 : CHILDREN_AVG,
58 : TOTAL_AVG,
59 : SELF_PERCENT,
60 : CHILDREN_PERCENT,
61 : TOTAL_PERCENT,
62 : SELF_MEMORY,
63 : CHILDREN_MEMORY,
64 : TOTAL_MEMORY,
65 : CALLS
66 : };
67 :
68 : /**
69 : * DataType in a MooseEnum for use in InputParameters in objects that query
70 : * the PerfGraph with sectionData.
71 : */
72 15161 : static MooseEnum dataTypeEnum()
73 : {
74 : return MooseEnum(
75 : "SELF CHILDREN TOTAL SELF_AVG CHILDREN_AVG TOTAL_AVG SELF_PERCENT CHILDREN_PERCENT "
76 15161 : "TOTAL_PERCENT SELF_MEMORY CHILDREN_MEMORY TOTAL_MEMORY CALLS");
77 : }
78 :
79 : /**
80 : * Create a new PerfGraph
81 : *
82 : * @param root_name The name of the root node
83 : * @param app The MooseApp this PerfGraph is for
84 : * @param live_all Whether every message should be printed
85 : * @param perf_graph_live Enable/disable PerfGraphLive (permanently)
86 : */
87 : PerfGraph(const std::string & root_name,
88 : MooseApp & app,
89 : const bool live_all,
90 : const bool perf_graph_live);
91 :
92 : /**
93 : * Destructor
94 : */
95 : ~PerfGraph();
96 :
97 : /**
98 : * Print the tree out
99 : *
100 : * @param console The output stream to output to
101 : * @param level The log level, the higher the number the more output you get
102 : */
103 : void print(const ConsoleStream & console, unsigned int level);
104 :
105 : /**
106 : * Print out the heaviest branch through the tree
107 : *
108 : * @param console The output stream to output to
109 : */
110 : void printHeaviestBranch(const ConsoleStream & console);
111 :
112 : /**
113 : * Print out the heaviest sections that were timed
114 : *
115 : * @param console The output stream to output to
116 : */
117 : void printHeaviestSections(const ConsoleStream & console, const unsigned int num_sections);
118 :
119 : /**
120 : * Whether or not timing is active
121 : *
122 : * When not active no timing information will be kept
123 : */
124 : bool active() const { return _active; }
125 :
126 : /**
127 : * Turn on or off timing
128 : */
129 10 : void setActive(bool active) { _active = active; }
130 :
131 : /**
132 : * Enables Live Print
133 : */
134 : void enableLivePrint();
135 :
136 : /**
137 : * Completely disables Live Print (cannot be restarted)
138 : */
139 : void disableLivePrint();
140 :
141 : /**
142 : * Forces all sections to be output live
143 : */
144 : void setLivePrintAll(bool active) { _live_print_all = active; }
145 :
146 : /**
147 : * Set the time limit before a message prints
148 : */
149 62078 : void setLiveTimeLimit(Real time_limit)
150 : {
151 62078 : _live_print_time_limit.store(time_limit, std::memory_order_relaxed);
152 62078 : }
153 :
154 : /**
155 : * Sert the memory limit before a message prints
156 : */
157 62078 : void setLiveMemoryLimit(unsigned int mem_limit)
158 : {
159 62078 : _live_print_mem_limit.store(mem_limit, std::memory_order_relaxed);
160 62078 : }
161 :
162 : /**
163 : * Gets a PerfGraph result pertaining to a section
164 : * @param type The result type to retrieve
165 : * @param section_name The name of the section
166 : * @param must_exist Whether not the section must exist; if false and the
167 : * section does not exist, returns 0, if true and the section does not exist,
168 : * exit with an error
169 : */
170 : Real
171 : sectionData(const DataType type, const std::string & section_name, const bool must_exist = true);
172 :
173 : /**
174 : * Updates the time section_time and time for all currently running nodes
175 : */
176 : void update();
177 :
178 : /**
179 : * @returns The MooseApp
180 : */
181 3439252 : MooseApp & mooseApp() { return _moose_app; }
182 :
183 : /**
184 : * @returns A constant reference to the root node
185 : */
186 2014466 : const PerfNode & rootNode() const { return *_root_node; }
187 :
188 : template <typename Functor>
189 : void treeRecurse(const Functor & act,
190 : const unsigned int level = MOOSE_MAX_STACK_SIZE,
191 : const bool heaviest = false) const;
192 :
193 : protected:
194 : typedef VariadicTable<std::string,
195 : unsigned long int,
196 : Real,
197 : Real,
198 : Real,
199 : long int,
200 : Real,
201 : Real,
202 : Real,
203 : long int>
204 : FullTable;
205 :
206 : typedef VariadicTable<std::string, unsigned long int, Real, Real, Real, long int> HeaviestTable;
207 :
208 : /**
209 : * Use to hold the cumulative time and memory for each section, which comes
210 : * from all of the PerfNodes that contribute to said section
211 : *
212 : * These will be filled by update()
213 : */
214 : struct CumulativeSectionInfo
215 : {
216 : /// Amount of time used within this section (without children)
217 : Real _self = 0.;
218 :
219 : /// Amount of time used by children
220 : Real _children = 0.;
221 :
222 : /// Total amount of time used
223 : Real _total = 0.;
224 :
225 : /// Number of times this section has been called
226 : unsigned long int _num_calls = 0;
227 :
228 : /// Amount of memory gained within this section (without children)
229 : long int _self_memory = 0;
230 :
231 : /// Amount of memory gained by children
232 : long int _children_memory = 0;
233 :
234 : /// Total memory gain for this section
235 : long int _total_memory = 0;
236 : };
237 :
238 : /**
239 : * The execution state of an increment.
240 : */
241 : enum IncrementState
242 : {
243 : /// Section just started running
244 : STARTED,
245 :
246 : /// This section has already started printing
247 : PRINTED,
248 :
249 : /// The section is complete
250 : FINISHED
251 : };
252 :
253 : /**
254 : * Use to hold an increment of time and memory for a section
255 : * This is used in the LivePrint capability.
256 : */
257 : class SectionIncrement
258 : {
259 : public:
260 635522300 : SectionIncrement()
261 635522300 : : _state(IncrementState::FINISHED),
262 635522300 : _print_stack_level(0),
263 635522300 : _num_dots(0),
264 635522300 : _time(std::chrono::seconds(0)),
265 635522300 : _memory(0),
266 635522300 : _beginning_num_printed(0)
267 : {
268 635522300 : }
269 :
270 : PerfID _id;
271 :
272 : /// Whether or not this increment is the start of an increment or
273 : /// the finishing of an increment.
274 : IncrementState _state;
275 :
276 : /// How much to indent this section
277 : unsigned int _print_stack_level;
278 :
279 : /// How many dots have been printed for this section
280 : unsigned int _num_dots;
281 :
282 : /// Either the starting time or final time depending on _state
283 : std::chrono::time_point<std::chrono::steady_clock> _time;
284 :
285 : /// Either the starting memory or final memory depending on _state
286 : long int _memory;
287 :
288 : /// The _console numPrinted() at the time this section was created
289 : unsigned long long int _beginning_num_printed;
290 : };
291 :
292 : /**
293 : * Add the information to the execution list
294 : *
295 : * Should only be called by push() and pop()
296 : */
297 : inline void addToExecutionList(const PerfID id,
298 : const IncrementState state,
299 : const std::chrono::time_point<std::chrono::steady_clock> time,
300 : const long int memory);
301 :
302 : /**
303 : * Add a Node onto the end of the end of the current callstack
304 : *
305 : * Note: only accessible by using PerfGuard!
306 : */
307 : void push(const PerfID id);
308 :
309 : /**
310 : * Remove a Node from the end of the current scope
311 : *
312 : * Note: only accessible by using PerfGuard!
313 : */
314 : void pop();
315 :
316 : /**
317 : * Updates the cumulative self/children/total time and memory for each section
318 : * across all nodes that contribute to said section in _cumulative_section_info
319 : *
320 : * Note: requires that the contents in each CumulativeSectionInfo in
321 : * _cumulative_section_info be initially resized and zeroed
322 : *
323 : * @param current_node The current node to work on
324 : */
325 : void recursivelyUpdate(const PerfNode & current_node);
326 :
327 : /// The MooseApp
328 : MooseApp & _moose_app;
329 :
330 : /// Whether or not to put everything in the perf graph
331 : bool _live_print_all;
332 :
333 : /// Whether or not live print is disabled (cannot be turned on again)
334 : bool _disable_live_print;
335 :
336 : /// The PerfGraphRegistry
337 : PerfGraphRegistry & _perf_graph_registry;
338 :
339 : /// This processor id
340 : const libMesh::processor_id_type _pid;
341 :
342 : /// Name of the root node
343 : const std::string _root_name;
344 :
345 : /// The id for the root node
346 : const PerfID _root_node_id;
347 :
348 : /// The root node of the graph
349 : const std::unique_ptr<PerfNode> _root_node;
350 :
351 : /// The current node position in the stack
352 : int _current_position;
353 :
354 : /// The full callstack. Currently capped at a depth of 100
355 : std::array<PerfNode *, MOOSE_MAX_STACK_SIZE> _stack;
356 :
357 : /// A circular buffer for holding the execution list, this is read by the printing loop
358 : std::array<SectionIncrement, MAX_EXECUTION_LIST_SIZE> _execution_list;
359 :
360 : /// Where the print thread should start reading the execution list
361 : std::atomic<unsigned int> _execution_list_begin;
362 :
363 : /// Where the print thread should stop reading the execution list
364 : std::atomic<unsigned int> _execution_list_end;
365 :
366 : /// The cumulative time and memory for each section. This is updated on update()
367 : /// Note that this is _total_ cumulative time/memory across every place
368 : /// that section is in the graph
369 : ///
370 : /// I'm making this a map so that we can give out references to the values
371 : /// The three values are: self, children
372 : /// The map is on std::string because we might need to be able to retrieve
373 : /// timing values in a "late binding" situation _before_ the section
374 : /// has been registered.
375 : std::unordered_map<std::string, CumulativeSectionInfo> _cumulative_section_info;
376 :
377 : /// Pointers into _cumulative_section_info indexed on PerfID
378 : /// This is here for convenience and speed so we don't need
379 : /// to iterate over the above map much - and it makes it
380 : /// easier to sort
381 : std::vector<CumulativeSectionInfo *> _cumulative_section_info_ptrs;
382 :
383 : /// Whether or not timing is active
384 : bool _active;
385 :
386 : /// The promise to the print thread that will signal when to stop
387 : std::promise<bool> _done;
388 :
389 : /// Tell the print thread to teardown
390 : bool _destructing;
391 :
392 : /// The mutex to use with a condition_variable predicate to guard _destructing
393 : std::mutex _destructing_mutex;
394 :
395 : /// The condition_variable to wake the print thread
396 : std::condition_variable _finished_section;
397 :
398 : /// The time limit before a message is printed (in seconds)
399 : std::atomic<Real> _live_print_time_limit;
400 :
401 : /// The memory limit before a message is printed (in MB)
402 : std::atomic<unsigned int> _live_print_mem_limit;
403 :
404 : /// The object that is doing live printing
405 : const std::unique_ptr<PerfGraphLivePrint> _live_print;
406 :
407 : /// The thread for printing sections as they execute
408 : std::thread _print_thread;
409 :
410 : // Here so PerfGuard is the only thing that can call push/pop
411 : friend class PerfGuard;
412 : friend class PerfGraphLivePrint;
413 : friend void dataStore(std::ostream &, PerfGraph &, void *);
414 : friend void dataLoad(std::istream &, PerfGraph &, void *);
415 :
416 : private:
417 : /**
418 : * Helper for building a VariadicTable that represents the tree.
419 : *
420 : * @param level The level to print out below (<=)
421 : * @param heaviest Show only the heaviest branch
422 : */
423 : FullTable treeTable(const unsigned int level, const bool heaviest = false);
424 :
425 : template <typename Functor>
426 : void treeRecurseInternal(const PerfNode & node,
427 : const Functor & act,
428 : const unsigned int level,
429 : const bool heaviest,
430 : unsigned int current_depth) const;
431 : };
432 :
433 : template <typename Functor>
434 : void
435 8027458 : PerfGraph::treeRecurseInternal(const PerfNode & node,
436 : const Functor & act,
437 : const unsigned int level,
438 : const bool heaviest,
439 : unsigned int current_depth) const
440 : {
441 : mooseAssert(_perf_graph_registry.sectionExists(node.id()), "Unable to find section name!");
442 :
443 8027458 : const auto & current_section_info = _perf_graph_registry.readSectionInfo(node.id());
444 8027458 : if (current_section_info._level <= level)
445 : {
446 : mooseAssert(!_cumulative_section_info_ptrs.empty(), "update() must be run before treeRecurse!");
447 809389 : act(node, current_section_info, current_depth++);
448 : }
449 :
450 8027458 : if (heaviest)
451 : {
452 188 : const PerfNode * heaviest_child = nullptr;
453 1118 : for (const auto & child_it : node.children())
454 : {
455 930 : const auto & current_child = *child_it.second;
456 :
457 930 : if (!heaviest_child || (current_child.totalTime() > heaviest_child->totalTime()))
458 343 : heaviest_child = ¤t_child;
459 : }
460 :
461 188 : if (heaviest_child)
462 167 : treeRecurseInternal(*heaviest_child, act, level, true, current_depth);
463 : }
464 : else
465 : {
466 16006735 : for (const auto & child_it : node.children())
467 7979465 : treeRecurseInternal(*child_it.second, act, level, false, current_depth);
468 : }
469 8027458 : }
470 :
471 : template <typename Functor>
472 : void
473 47826 : PerfGraph::treeRecurse(const Functor & act,
474 : const unsigned int level /* = MOOSE_MAX_STACK_SIZE */,
475 : const bool heaviest /* = false */) const
476 : {
477 : mooseAssert(_root_node, "Root node does not exist; calling this too early");
478 47826 : treeRecurseInternal(*_root_node, act, level, heaviest, 0);
479 47826 : }
480 :
481 : void dataStore(std::ostream & stream, PerfGraph & perf_graph, void * context);
482 : void dataLoad(std::istream & stream, PerfGraph & perf_graph, void * context);
|