https://mooseframework.inl.gov
PerfGraph.h
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #pragma once
11 
12 // MOOSE Includes
13 #include "MooseTypes.h"
14 #include "PerfNode.h"
15 #include "IndirectSort.h"
16 #include "ConsoleStream.h"
17 #include "ConsoleStreamInterface.h"
18 #include "MooseError.h"
19 #include "MemoryUtils.h"
20 #include "PerfGraphRegistry.h"
21 
22 // System Includes
23 #include <array>
24 #include <atomic>
25 #include <thread>
26 #include <future>
27 #include <mutex>
28 
29 // Forward Declarations
30 class PerfGuard;
31 class PerfGraphLivePrint;
32 
33 template <class... Ts>
35 
36 #define MOOSE_MAX_STACK_SIZE 100
37 #define MAX_EXECUTION_LIST_SIZE 10000
38 
44 {
45 public:
47 
51  enum DataType
52  {
66  };
67 
73  {
74  return MooseEnum(
75  "SELF CHILDREN TOTAL SELF_AVG CHILDREN_AVG TOTAL_AVG SELF_PERCENT CHILDREN_PERCENT "
76  "TOTAL_PERCENT SELF_MEMORY CHILDREN_MEMORY TOTAL_MEMORY CALLS");
77  }
78 
87  PerfGraph(const std::string & root_name,
88  MooseApp & app,
89  const bool live_all,
90  const bool perf_graph_live);
91 
95  ~PerfGraph();
96 
103  void print(const ConsoleStream & console, unsigned int level);
104 
110  void printHeaviestBranch(const ConsoleStream & console);
111 
117  void printHeaviestSections(const ConsoleStream & console, const unsigned int num_sections);
118 
124  bool active() const { return _active; }
125 
129  void setActive(bool active) { _active = active; }
130 
134  void enableLivePrint();
135 
139  void disableLivePrint();
140 
145 
149  void setLiveTimeLimit(Real time_limit)
150  {
151  _live_print_time_limit.store(time_limit, std::memory_order_relaxed);
152  }
153 
157  void setLiveMemoryLimit(unsigned int mem_limit)
158  {
159  _live_print_mem_limit.store(mem_limit, std::memory_order_relaxed);
160  }
161 
170  Real
171  sectionData(const DataType type, const std::string & section_name, const bool must_exist = true);
172 
176  void update();
177 
181  MooseApp & mooseApp() { return _moose_app; }
182 
186  const PerfNode & rootNode() const { return *_root_node; }
187 
188  template <typename Functor>
189  void treeRecurse(const Functor & act,
190  const unsigned int level = MOOSE_MAX_STACK_SIZE,
191  const bool heaviest = false) const;
192 
193 protected:
194  typedef VariadicTable<std::string,
195  unsigned long int,
196  Real,
197  Real,
198  Real,
199  long int,
200  Real,
201  Real,
202  Real,
203  long int>
205 
207 
215  {
217  Real _self = 0.;
218 
221 
223  Real _total = 0.;
224 
226  unsigned long int _num_calls = 0;
227 
229  long int _self_memory = 0;
230 
232  long int _children_memory = 0;
233 
235  long int _total_memory = 0;
236  };
237 
242  {
245 
248 
251  };
252 
258  {
259  public:
263  _num_dots(0),
264  _time(std::chrono::seconds(0)),
265  _memory(0),
267  {
268  }
269 
271 
275 
277  unsigned int _print_stack_level;
278 
280  unsigned int _num_dots;
281 
283  std::chrono::time_point<std::chrono::steady_clock> _time;
284 
286  long int _memory;
287 
289  unsigned long long int _beginning_num_printed;
290  };
291 
297  inline void addToExecutionList(const PerfID id,
298  const IncrementState state,
299  const std::chrono::time_point<std::chrono::steady_clock> time,
300  const long int memory);
301 
307  void push(const PerfID id);
308 
314  void pop();
315 
325  void recursivelyUpdate(const PerfNode & current_node);
326 
329 
332 
335 
338 
341 
343  const std::string _root_name;
344 
347 
349  const std::unique_ptr<PerfNode> _root_node;
350 
353 
355  std::array<PerfNode *, MOOSE_MAX_STACK_SIZE> _stack;
356 
358  std::array<SectionIncrement, MAX_EXECUTION_LIST_SIZE> _execution_list;
359 
361  std::atomic<unsigned int> _execution_list_begin;
362 
364  std::atomic<unsigned int> _execution_list_end;
365 
375  std::unordered_map<std::string, CumulativeSectionInfo> _cumulative_section_info;
376 
381  std::vector<CumulativeSectionInfo *> _cumulative_section_info_ptrs;
382 
384  bool _active;
385 
387  std::promise<bool> _done;
388 
391 
393  std::mutex _destructing_mutex;
394 
396  std::condition_variable _finished_section;
397 
399  std::atomic<Real> _live_print_time_limit;
400 
402  std::atomic<unsigned int> _live_print_mem_limit;
403 
405  const std::unique_ptr<PerfGraphLivePrint> _live_print;
406 
408  std::thread _print_thread;
409 
410  // Here so PerfGuard is the only thing that can call push/pop
411  friend class PerfGuard;
412  friend class PerfGraphLivePrint;
413  friend void dataStore(std::ostream &, PerfGraph &, void *);
414  friend void dataLoad(std::istream &, PerfGraph &, void *);
415 
416 private:
423  FullTable treeTable(const unsigned int level, const bool heaviest = false);
424 
425  template <typename Functor>
426  void treeRecurseInternal(const PerfNode & node,
427  const Functor & act,
428  const unsigned int level,
429  const bool heaviest,
430  unsigned int current_depth) const;
431 };
432 
433 template <typename Functor>
434 void
436  const Functor & act,
437  const unsigned int level,
438  const bool heaviest,
439  unsigned int current_depth) const
440 {
441  mooseAssert(_perf_graph_registry.sectionExists(node.id()), "Unable to find section name!");
442 
443  const auto & current_section_info = _perf_graph_registry.readSectionInfo(node.id());
444  if (current_section_info._level <= level)
445  {
446  mooseAssert(!_cumulative_section_info_ptrs.empty(), "update() must be run before treeRecurse!");
447  act(node, current_section_info, current_depth++);
448  }
449 
450  if (heaviest)
451  {
452  const PerfNode * heaviest_child = nullptr;
453  for (const auto & child_it : node.children())
454  {
455  const auto & current_child = *child_it.second;
456 
457  if (!heaviest_child || (current_child.totalTime() > heaviest_child->totalTime()))
458  heaviest_child = &current_child;
459  }
460 
461  if (heaviest_child)
462  treeRecurseInternal(*heaviest_child, act, level, true, current_depth);
463  }
464  else
465  {
466  for (const auto & child_it : node.children())
467  treeRecurseInternal(*child_it.second, act, level, false, current_depth);
468  }
469 }
470 
471 template <typename Functor>
472 void
474  const unsigned int level /* = MOOSE_MAX_STACK_SIZE */,
475  const bool heaviest /* = false */) const
476 {
477  mooseAssert(_root_node, "Root node does not exist; calling this too early");
478  treeRecurseInternal(*_root_node, act, level, heaviest, 0);
479 }
480 
481 void dataStore(std::ostream & stream, PerfGraph & perf_graph, void * context);
482 void dataLoad(std::istream & stream, PerfGraph & perf_graph, void * context);
void recursivelyUpdate(const PerfNode &current_node)
Updates the cumulative self/children/total time and memory for each section across all nodes that con...
Definition: PerfGraph.C:322
long int _memory
Either the starting memory or final memory depending on _state.
Definition: PerfGraph.h:286
PerfGraphRegistry & _perf_graph_registry
The PerfGraphRegistry.
Definition: PerfGraph.h:337
A helper class for re-directing output streams to Console output objects form MooseObjects.
Definition: ConsoleStream.h:30
long int _total_memory
Total memory gain for this section.
Definition: PerfGraph.h:235
long int _self_memory
Amount of memory gained within this section (without children)
Definition: PerfGraph.h:229
unsigned long int _num_calls
Number of times this section has been called.
Definition: PerfGraph.h:226
static MooseEnum dataTypeEnum()
DataType in a MooseEnum for use in InputParameters in objects that query the PerfGraph with sectionDa...
Definition: PerfGraph.h:72
The place where all timed sections will be stored.
The section is complete.
Definition: PerfGraph.h:250
~PerfGraph()
Destructor.
Definition: PerfGraph.C:54
PerfGraph(const std::string &root_name, MooseApp &app, const bool live_all, const bool perf_graph_live)
Create a new PerfGraph.
Definition: PerfGraph.C:28
bool sectionExists(const std::string &section_name) const
Whether or not a section with that name has been registered The name of the section.
Real sectionData(const DataType type, const std::string &section_name, const bool must_exist=true)
Gets a PerfGraph result pertaining to a section.
Definition: PerfGraph.C:93
std::promise< bool > _done
The promise to the print thread that will signal when to stop.
Definition: PerfGraph.h:387
std::thread _print_thread
The thread for printing sections as they execute.
Definition: PerfGraph.h:408
friend void dataStore(std::ostream &, PerfGraph &, void *)
Definition: PerfGraph.C:490
std::atomic< unsigned int > _live_print_mem_limit
The memory limit before a message is printed (in MB)
Definition: PerfGraph.h:402
DataType
For retrieving values.
Definition: PerfGraph.h:51
A class for "pretty printing" a table of data.
Definition: PerfGraph.h:34
VariadicTable< std::string, unsigned long int, Real, Real, Real, long int, Real, Real, Real, long int > FullTable
Definition: PerfGraph.h:204
std::atomic< unsigned int > _execution_list_begin
Where the print thread should start reading the execution list.
Definition: PerfGraph.h:361
const std::unique_ptr< PerfGraphLivePrint > _live_print
The object that is doing live printing.
Definition: PerfGraph.h:405
This section has already started printing.
Definition: PerfGraph.h:247
Real _total
Total amount of time used.
Definition: PerfGraph.h:223
Base class for MOOSE-based applications.
Definition: MooseApp.h:96
Use to hold an increment of time and memory for a section This is used in the LivePrint capability...
Definition: PerfGraph.h:257
std::atomic< unsigned int > _execution_list_end
Where the print thread should stop reading the execution list.
Definition: PerfGraph.h:364
unsigned int _print_stack_level
How much to indent this section.
Definition: PerfGraph.h:277
IncrementState
The execution state of an increment.
Definition: PerfGraph.h:241
bool _live_print_all
Whether or not to put everything in the perf graph.
Definition: PerfGraph.h:331
Real _self
Amount of time used within this section (without children)
Definition: PerfGraph.h:217
std::condition_variable _finished_section
The condition_variable to wake the print thread.
Definition: PerfGraph.h:396
void update()
Updates the time section_time and time for all currently running nodes.
Definition: PerfGraph.C:275
uint8_t processor_id_type
IncrementState _state
Whether or not this increment is the start of an increment or the finishing of an increment...
Definition: PerfGraph.h:274
unsigned int PerfID
Definition: MooseTypes.h:212
void addToExecutionList(const PerfID id, const IncrementState state, const std::chrono::time_point< std::chrono::steady_clock > time, const long int memory)
Add the information to the execution list.
Definition: PerfGraph.C:154
std::vector< CumulativeSectionInfo * > _cumulative_section_info_ptrs
Pointers into _cumulative_section_info indexed on PerfID This is here for convenience and speed so we...
Definition: PerfGraph.h:381
Scope guard for starting and stopping timing for a node.
Definition: PerfGuard.h:24
void treeRecurse(const Functor &act, const unsigned int level=MOOSE_MAX_STACK_SIZE, const bool heaviest=false) const
Definition: PerfGraph.h:473
const std::unique_ptr< PerfNode > _root_node
The root node of the graph.
Definition: PerfGraph.h:349
const std::map< PerfID, std::unique_ptr< PerfNode > > & children() const
Get the children.
Definition: PerfNode.h:106
FunctorEnvelope< T > Functor
PerfID id() const
Get the ID of this Node.
Definition: PerfNode.h:35
unsigned long long int _beginning_num_printed
The _console numPrinted() at the time this section was created.
Definition: PerfGraph.h:289
void setActive(bool active)
Turn on or off timing.
Definition: PerfGraph.h:129
unsigned int _num_dots
How many dots have been printed for this section.
Definition: PerfGraph.h:280
std::mutex _destructing_mutex
The mutex to use with a condition_variable predicate to guard _destructing.
Definition: PerfGraph.h:393
std::unordered_map< std::string, CumulativeSectionInfo > _cumulative_section_info
The cumulative time and memory for each section.
Definition: PerfGraph.h:375
void setLiveMemoryLimit(unsigned int mem_limit)
Sert the memory limit before a message prints.
Definition: PerfGraph.h:157
An inteface for the _console for outputting to the Console object.
void dataStore(std::ostream &stream, PerfGraph &perf_graph, void *context)
Definition: PerfGraph.C:490
bool active() const
Whether or not timing is active.
Definition: PerfGraph.h:124
bool _active
Whether or not timing is active.
Definition: PerfGraph.h:384
This is effectively a functor that runs on a separate thread and watches the state of the call stack ...
void disableLivePrint()
Completely disables Live Print (cannot be restarted)
Definition: PerfGraph.C:67
MooseApp & _moose_app
The MooseApp.
Definition: PerfGraph.h:328
This is a "smart" enum class intended to replace many of the shortcomings in the C++ enum type It sho...
Definition: MooseEnum.h:33
int _current_position
The current node position in the stack.
Definition: PerfGraph.h:352
Real _children
Amount of time used by children.
Definition: PerfGraph.h:220
std::chrono::steady_clock::duration totalTime() const
The time this Node plus all of it&#39;s children took.
Definition: PerfNode.C:22
void treeRecurseInternal(const PerfNode &node, const Functor &act, const unsigned int level, const bool heaviest, unsigned int current_depth) const
Definition: PerfGraph.h:435
void pop()
Remove a Node from the end of the current scope.
Definition: PerfGraph.C:231
MooseApp & mooseApp()
Definition: PerfGraph.h:181
bool _disable_live_print
Whether or not live print is disabled (cannot be turned on again)
Definition: PerfGraph.h:334
void print(const ConsoleStream &console, unsigned int level)
Print the tree out.
Definition: PerfGraph.C:406
std::chrono::time_point< std::chrono::steady_clock > _time
Either the starting time or final time depending on _state.
Definition: PerfGraph.h:283
friend void dataLoad(std::istream &, PerfGraph &, void *)
Definition: PerfGraph.C:504
std::array< PerfNode *, MOOSE_MAX_STACK_SIZE > _stack
The full callstack. Currently capped at a depth of 100.
Definition: PerfGraph.h:355
void printHeaviestBranch(const ConsoleStream &console)
Print out the heaviest branch through the tree.
Definition: PerfGraph.C:413
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
const std::string _root_name
Name of the root node.
Definition: PerfGraph.h:343
Use to hold the cumulative time and memory for each section, which comes from all of the PerfNodes th...
Definition: PerfGraph.h:214
VariadicTable< std::string, unsigned long int, Real, Real, Real, long int > HeaviestTable
Definition: PerfGraph.h:206
const libMesh::processor_id_type _pid
This processor id.
Definition: PerfGraph.h:340
long int _children_memory
Amount of memory gained by children.
Definition: PerfGraph.h:232
bool _destructing
Tell the print thread to teardown.
Definition: PerfGraph.h:390
void printHeaviestSections(const ConsoleStream &console, const unsigned int num_sections)
Print out the heaviest sections that were timed.
Definition: PerfGraph.C:420
void setLiveTimeLimit(Real time_limit)
Set the time limit before a message prints.
Definition: PerfGraph.h:149
void push(const PerfID id)
Add a Node onto the end of the end of the current callstack.
Definition: PerfGraph.C:185
FullTable treeTable(const unsigned int level, const bool heaviest=false)
Helper for building a VariadicTable that represents the tree.
Definition: PerfGraph.C:343
Section just started running.
Definition: PerfGraph.h:244
The PerfGraph will hold the master list of all registered performance segments and the head PerfNode...
Definition: PerfGraph.h:43
void setLivePrintAll(bool active)
Forces all sections to be output live.
Definition: PerfGraph.h:144
const PerfGraphSectionInfo & readSectionInfo(PerfID section_id) const
Special accessor just for PerfGraph so that no locking is needed in PerfGraph.
std::atomic< Real > _live_print_time_limit
The time limit before a message is printed (in seconds)
Definition: PerfGraph.h:399
void ErrorVector unsigned int
std::array< SectionIncrement, MAX_EXECUTION_LIST_SIZE > _execution_list
A circular buffer for holding the execution list, this is read by the printing loop.
Definition: PerfGraph.h:358
void dataLoad(std::istream &stream, PerfGraph &perf_graph, void *context)
Definition: PerfGraph.C:504
void enableLivePrint()
Enables Live Print.
Definition: PerfGraph.C:57
const PerfNode & rootNode() const
Definition: PerfGraph.h:186
const PerfID _root_node_id
The id for the root node.
Definition: PerfGraph.h:346
A node in the PerfGraph.
Definition: PerfNode.h:24