https://mooseframework.inl.gov
PerfGraph.h
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #pragma once
11 
12 // MOOSE Includes
13 #include "MooseTypes.h"
14 #include "PerfNode.h"
15 #include "IndirectSort.h"
16 #include "ConsoleStream.h"
17 #include "ConsoleStreamInterface.h"
18 #include "MooseError.h"
19 #include "MemoryUtils.h"
20 #include "PerfGraphRegistry.h"
21 
22 // System Includes
23 #include <array>
24 #include <atomic>
25 #include <thread>
26 #include <future>
27 #include <mutex>
28 
29 // Forward Declarations
30 class PerfGuard;
31 class PerfGraphLivePrint;
32 
33 template <class... Ts>
35 
36 #define MOOSE_MAX_STACK_SIZE 100
37 #define MAX_EXECUTION_LIST_SIZE 10000
38 
44 {
45 public:
47 
51  enum DataType
52  {
66  };
67 
73  {
74  return MooseEnum(
75  "SELF CHILDREN TOTAL SELF_AVG CHILDREN_AVG TOTAL_AVG SELF_PERCENT CHILDREN_PERCENT "
76  "TOTAL_PERCENT SELF_MEMORY CHILDREN_MEMORY TOTAL_MEMORY CALLS");
77  }
78 
87  PerfGraph(const std::string & root_name,
88  MooseApp & app,
89  const bool live_all,
90  const bool perf_graph_live);
91 
95  ~PerfGraph();
96 
103  void print(const ConsoleStream & console, unsigned int level);
104 
110  void printHeaviestBranch(const ConsoleStream & console);
111 
117  void printHeaviestSections(const ConsoleStream & console, const unsigned int num_sections);
118 
124  bool active() const { return _active; }
125 
129  void setActive(bool active) { _active = active; }
130 
134  void enableLivePrint();
135 
139  void disableLivePrint();
140 
145 
149  void setLiveTimeLimit(Real time_limit)
150  {
151  _live_print_time_limit.store(time_limit, std::memory_order_relaxed);
152  }
153 
157  void setLiveMemoryLimit(unsigned int mem_limit)
158  {
159  _live_print_mem_limit.store(mem_limit, std::memory_order_relaxed);
160  }
161 
170  Real
171  sectionData(const DataType type, const std::string & section_name, const bool must_exist = true);
172 
176  void update();
177 
183  std::size_t getMaxMemory() const { return _max_memory; }
184 
188  MooseApp & mooseApp() { return _moose_app; }
189 
193  const PerfNode & rootNode() const { return *_root_node; }
194 
195  template <typename Functor>
196  void treeRecurse(const Functor & act,
197  const unsigned int level = MOOSE_MAX_STACK_SIZE,
198  const bool heaviest = false) const;
199 
200 protected:
201  typedef VariadicTable<std::string,
202  unsigned long int,
203  Real,
204  Real,
205  Real,
206  long int,
207  Real,
208  Real,
209  Real,
210  long int>
212 
214 
222  {
224  Real _self = 0.;
225 
228 
230  Real _total = 0.;
231 
233  unsigned long int _num_calls = 0;
234 
236  long int _self_memory = 0;
237 
239  long int _children_memory = 0;
240 
242  long int _total_memory = 0;
243  };
244 
249  {
252 
255 
258  };
259 
265  {
266  public:
270  _num_dots(0),
271  _time(std::chrono::seconds(0)),
272  _memory(0),
274  {
275  }
276 
278 
282 
284  unsigned int _print_stack_level;
285 
287  unsigned int _num_dots;
288 
290  std::chrono::time_point<std::chrono::steady_clock> _time;
291 
293  long int _memory;
294 
296  unsigned long long int _beginning_num_printed;
297  };
298 
304  inline void addToExecutionList(const PerfID id,
305  const IncrementState state,
306  const std::chrono::time_point<std::chrono::steady_clock> time,
307  const long int memory);
308 
314  void push(const PerfID id);
315 
321  void pop();
322 
332  void recursivelyUpdate(const PerfNode & current_node);
333 
336 
339 
342 
345 
348 
350  const std::string _root_name;
351 
354 
356  const std::unique_ptr<PerfNode> _root_node;
357 
360 
362  std::array<PerfNode *, MOOSE_MAX_STACK_SIZE> _stack;
363 
365  std::array<SectionIncrement, MAX_EXECUTION_LIST_SIZE> _execution_list;
366 
368  std::atomic<unsigned int> _execution_list_begin;
369 
371  std::atomic<unsigned int> _execution_list_end;
372 
382  std::unordered_map<std::string, CumulativeSectionInfo> _cumulative_section_info;
383 
388  std::vector<CumulativeSectionInfo *> _cumulative_section_info_ptrs;
389 
391  std::atomic<std::size_t> _max_memory;
392 
394  bool _active;
395 
397  std::promise<bool> _done;
398 
401 
403  std::mutex _destructing_mutex;
404 
406  std::condition_variable _finished_section;
407 
409  std::atomic<Real> _live_print_time_limit;
410 
412  std::atomic<unsigned int> _live_print_mem_limit;
413 
415  const std::unique_ptr<PerfGraphLivePrint> _live_print;
416 
418  std::thread _print_thread;
419 
420  // Here so PerfGuard is the only thing that can call push/pop
421  friend class PerfGuard;
422  friend class PerfGraphLivePrint;
423  friend void dataStore(std::ostream &, PerfGraph &, void *);
424  friend void dataLoad(std::istream &, PerfGraph &, void *);
425 
426 private:
433  FullTable treeTable(const unsigned int level, const bool heaviest = false);
434 
435  template <typename Functor>
436  void treeRecurseInternal(const PerfNode & node,
437  const Functor & act,
438  const unsigned int level,
439  const bool heaviest,
440  unsigned int current_depth) const;
441 
445  void updateMaxMemory(const std::size_t current_memory);
446 };
447 
448 template <typename Functor>
449 void
451  const Functor & act,
452  const unsigned int level,
453  const bool heaviest,
454  unsigned int current_depth) const
455 {
456  mooseAssert(_perf_graph_registry.sectionExists(node.id()), "Unable to find section name!");
457 
458  const auto & current_section_info = _perf_graph_registry.readSectionInfo(node.id());
459  if (current_section_info._level <= level)
460  {
461  mooseAssert(!_cumulative_section_info_ptrs.empty(), "update() must be run before treeRecurse!");
462  act(node, current_section_info, current_depth++);
463  }
464 
465  if (heaviest)
466  {
467  const PerfNode * heaviest_child = nullptr;
468  for (const auto & child_it : node.children())
469  {
470  const auto & current_child = *child_it.second;
471 
472  if (!heaviest_child || (current_child.totalTime() > heaviest_child->totalTime()))
473  heaviest_child = &current_child;
474  }
475 
476  if (heaviest_child)
477  treeRecurseInternal(*heaviest_child, act, level, true, current_depth);
478  }
479  else
480  {
481  for (const auto & child_it : node.children())
482  treeRecurseInternal(*child_it.second, act, level, false, current_depth);
483  }
484 }
485 
486 template <typename Functor>
487 void
489  const unsigned int level /* = MOOSE_MAX_STACK_SIZE */,
490  const bool heaviest /* = false */) const
491 {
492  mooseAssert(_root_node, "Root node does not exist; calling this too early");
493  treeRecurseInternal(*_root_node, act, level, heaviest, 0);
494 }
495 
496 void dataStore(std::ostream & stream, PerfGraph & perf_graph, void * context);
497 void dataLoad(std::istream & stream, PerfGraph & perf_graph, void * context);
std::size_t getMaxMemory() const
Get the maximum memory allocation in MB.
Definition: PerfGraph.h:183
void recursivelyUpdate(const PerfNode &current_node)
Updates the cumulative self/children/total time and memory for each section across all nodes that con...
Definition: PerfGraph.C:332
long int _memory
Either the starting memory or final memory depending on _state.
Definition: PerfGraph.h:293
PerfGraphRegistry & _perf_graph_registry
The PerfGraphRegistry.
Definition: PerfGraph.h:344
A helper class for re-directing output streams to Console output objects form MooseObjects.
Definition: ConsoleStream.h:30
long int _total_memory
Total memory gain for this section.
Definition: PerfGraph.h:242
long int _self_memory
Amount of memory gained within this section (without children)
Definition: PerfGraph.h:236
unsigned long int _num_calls
Number of times this section has been called.
Definition: PerfGraph.h:233
static MooseEnum dataTypeEnum()
DataType in a MooseEnum for use in InputParameters in objects that query the PerfGraph with sectionDa...
Definition: PerfGraph.h:72
The place where all timed sections will be stored.
The section is complete.
Definition: PerfGraph.h:257
~PerfGraph()
Destructor.
Definition: PerfGraph.C:55
PerfGraph(const std::string &root_name, MooseApp &app, const bool live_all, const bool perf_graph_live)
Create a new PerfGraph.
Definition: PerfGraph.C:28
bool sectionExists(const std::string &section_name) const
Whether or not a section with that name has been registered The name of the section.
Real sectionData(const DataType type, const std::string &section_name, const bool must_exist=true)
Gets a PerfGraph result pertaining to a section.
Definition: PerfGraph.C:94
std::promise< bool > _done
The promise to the print thread that will signal when to stop.
Definition: PerfGraph.h:397
std::thread _print_thread
The thread for printing sections as they execute.
Definition: PerfGraph.h:418
friend void dataStore(std::ostream &, PerfGraph &, void *)
Definition: PerfGraph.C:511
std::atomic< unsigned int > _live_print_mem_limit
The memory limit before a message is printed (in MB)
Definition: PerfGraph.h:412
DataType
For retrieving values.
Definition: PerfGraph.h:51
A class for "pretty printing" a table of data.
Definition: PerfGraph.h:34
VariadicTable< std::string, unsigned long int, Real, Real, Real, long int, Real, Real, Real, long int > FullTable
Definition: PerfGraph.h:211
std::atomic< unsigned int > _execution_list_begin
Where the print thread should start reading the execution list.
Definition: PerfGraph.h:368
const std::unique_ptr< PerfGraphLivePrint > _live_print
The object that is doing live printing.
Definition: PerfGraph.h:415
This section has already started printing.
Definition: PerfGraph.h:254
Real _total
Total amount of time used.
Definition: PerfGraph.h:230
Base class for MOOSE-based applications.
Definition: MooseApp.h:108
Use to hold an increment of time and memory for a section This is used in the LivePrint capability...
Definition: PerfGraph.h:264
std::atomic< unsigned int > _execution_list_end
Where the print thread should stop reading the execution list.
Definition: PerfGraph.h:371
unsigned int _print_stack_level
How much to indent this section.
Definition: PerfGraph.h:284
IncrementState
The execution state of an increment.
Definition: PerfGraph.h:248
bool _live_print_all
Whether or not to put everything in the perf graph.
Definition: PerfGraph.h:338
Real _self
Amount of time used within this section (without children)
Definition: PerfGraph.h:224
std::condition_variable _finished_section
The condition_variable to wake the print thread.
Definition: PerfGraph.h:406
void update()
Updates the time section_time and time for all currently running nodes.
Definition: PerfGraph.C:285
uint8_t processor_id_type
IncrementState _state
Whether or not this increment is the start of an increment or the finishing of an increment...
Definition: PerfGraph.h:281
unsigned int PerfID
Definition: MooseTypes.h:240
void addToExecutionList(const PerfID id, const IncrementState state, const std::chrono::time_point< std::chrono::steady_clock > time, const long int memory)
Add the information to the execution list.
Definition: PerfGraph.C:155
std::vector< CumulativeSectionInfo * > _cumulative_section_info_ptrs
Pointers into _cumulative_section_info indexed on PerfID This is here for convenience and speed so we...
Definition: PerfGraph.h:388
Scope guard for starting and stopping timing for a node.
Definition: PerfGuard.h:25
void treeRecurse(const Functor &act, const unsigned int level=MOOSE_MAX_STACK_SIZE, const bool heaviest=false) const
Definition: PerfGraph.h:488
const std::unique_ptr< PerfNode > _root_node
The root node of the graph.
Definition: PerfGraph.h:356
const std::map< PerfID, std::unique_ptr< PerfNode > > & children() const
Get the children.
Definition: PerfNode.h:107
FunctorEnvelope< T > Functor
PerfID id() const
Get the ID of this Node.
Definition: PerfNode.h:36
unsigned long long int _beginning_num_printed
The _console numPrinted() at the time this section was created.
Definition: PerfGraph.h:296
void setActive(bool active)
Turn on or off timing.
Definition: PerfGraph.h:129
unsigned int _num_dots
How many dots have been printed for this section.
Definition: PerfGraph.h:287
std::mutex _destructing_mutex
The mutex to use with a condition_variable predicate to guard _destructing.
Definition: PerfGraph.h:403
std::unordered_map< std::string, CumulativeSectionInfo > _cumulative_section_info
The cumulative time and memory for each section.
Definition: PerfGraph.h:382
void setLiveMemoryLimit(unsigned int mem_limit)
Sert the memory limit before a message prints.
Definition: PerfGraph.h:157
An inteface for the _console for outputting to the Console object.
void dataStore(std::ostream &stream, PerfGraph &perf_graph, void *context)
Definition: PerfGraph.C:511
bool active() const
Whether or not timing is active.
Definition: PerfGraph.h:124
bool _active
Whether or not timing is active.
Definition: PerfGraph.h:394
This is effectively a functor that runs on a separate thread and watches the state of the call stack ...
void disableLivePrint()
Completely disables Live Print (cannot be restarted)
Definition: PerfGraph.C:68
MooseApp & _moose_app
The MooseApp.
Definition: PerfGraph.h:335
This is a "smart" enum class intended to replace many of the shortcomings in the C++ enum type It sho...
Definition: MooseEnum.h:54
int _current_position
The current node position in the stack.
Definition: PerfGraph.h:359
void updateMaxMemory(const std::size_t current_memory)
Update _max_memory if current_memory > _max_memory.
Definition: PerfGraph.C:500
Real _children
Amount of time used by children.
Definition: PerfGraph.h:227
std::chrono::steady_clock::duration totalTime() const
The time this Node plus all of it&#39;s children took.
Definition: PerfNode.C:22
void treeRecurseInternal(const PerfNode &node, const Functor &act, const unsigned int level, const bool heaviest, unsigned int current_depth) const
Definition: PerfGraph.h:450
std::atomic< std::size_t > _max_memory
Maximum memory encountered during push and pop.
Definition: PerfGraph.h:391
void pop()
Remove a Node from the end of the current scope.
Definition: PerfGraph.C:236
MooseApp & mooseApp()
Definition: PerfGraph.h:188
bool _disable_live_print
Whether or not live print is disabled (cannot be turned on again)
Definition: PerfGraph.h:341
void print(const ConsoleStream &console, unsigned int level)
Print the tree out.
Definition: PerfGraph.C:416
std::chrono::time_point< std::chrono::steady_clock > _time
Either the starting time or final time depending on _state.
Definition: PerfGraph.h:290
friend void dataLoad(std::istream &, PerfGraph &, void *)
Definition: PerfGraph.C:525
std::array< PerfNode *, MOOSE_MAX_STACK_SIZE > _stack
The full callstack. Currently capped at a depth of 100.
Definition: PerfGraph.h:362
void printHeaviestBranch(const ConsoleStream &console)
Print out the heaviest branch through the tree.
Definition: PerfGraph.C:423
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
const std::string _root_name
Name of the root node.
Definition: PerfGraph.h:350
Use to hold the cumulative time and memory for each section, which comes from all of the PerfNodes th...
Definition: PerfGraph.h:221
VariadicTable< std::string, unsigned long int, Real, Real, Real, long int > HeaviestTable
Definition: PerfGraph.h:213
const libMesh::processor_id_type _pid
This processor id.
Definition: PerfGraph.h:347
long int _children_memory
Amount of memory gained by children.
Definition: PerfGraph.h:239
bool _destructing
Tell the print thread to teardown.
Definition: PerfGraph.h:400
void printHeaviestSections(const ConsoleStream &console, const unsigned int num_sections)
Print out the heaviest sections that were timed.
Definition: PerfGraph.C:430
void setLiveTimeLimit(Real time_limit)
Set the time limit before a message prints.
Definition: PerfGraph.h:149
void push(const PerfID id)
Add a Node onto the end of the end of the current callstack.
Definition: PerfGraph.C:186
FullTable treeTable(const unsigned int level, const bool heaviest=false)
Helper for building a VariadicTable that represents the tree.
Definition: PerfGraph.C:353
Section just started running.
Definition: PerfGraph.h:251
The PerfGraph will hold the master list of all registered performance segments and the head PerfNode...
Definition: PerfGraph.h:43
void setLivePrintAll(bool active)
Forces all sections to be output live.
Definition: PerfGraph.h:144
const PerfGraphSectionInfo & readSectionInfo(PerfID section_id) const
Special accessor just for PerfGraph so that no locking is needed in PerfGraph.
std::atomic< Real > _live_print_time_limit
The time limit before a message is printed (in seconds)
Definition: PerfGraph.h:409
void ErrorVector unsigned int
std::array< SectionIncrement, MAX_EXECUTION_LIST_SIZE > _execution_list
A circular buffer for holding the execution list, this is read by the printing loop.
Definition: PerfGraph.h:365
void dataLoad(std::istream &stream, PerfGraph &perf_graph, void *context)
Definition: PerfGraph.C:525
void enableLivePrint()
Enables Live Print.
Definition: PerfGraph.C:58
const PerfNode & rootNode() const
Definition: PerfGraph.h:193
const PerfID _root_node_id
The id for the root node.
Definition: PerfGraph.h:353
A node in the PerfGraph.
Definition: PerfNode.h:25