https://mooseframework.inl.gov
PerfGraph.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #include "PerfGraph.h"
11 
12 // MOOSE Includes
13 #include "PerfGuard.h"
14 #include "MooseError.h"
15 #include "PerfGraphLivePrint.h"
16 #include "PerfGraphRegistry.h"
17 #include "MooseApp.h"
18 
19 // Note: do everything we can to make sure this only gets #included
20 // in the .C file... this is a heavily templated header that we
21 // don't want to expose to EVERY file in MOOSE...
22 #include "VariadicTable.h"
23 
24 // System Includes
25 #include <chrono>
26 #include <memory>
27 
28 PerfGraph::PerfGraph(const std::string & root_name,
29  MooseApp & app,
30  const bool live_all,
31  const bool perf_graph_live)
33  _moose_app(app),
34  _live_print_all(live_all),
35  _disable_live_print(!perf_graph_live),
36  _perf_graph_registry(moose::internal::getPerfGraphRegistry()),
37  _pid(app.comm().rank()),
38  _root_name(root_name),
39  _root_node_id(_perf_graph_registry.registerSection(root_name, 0)),
40  _root_node(std::make_unique<PerfNode>(_root_node_id)),
41  _current_position(-1),
42  _stack(),
43  _execution_list_begin(0),
44  _execution_list_end(0),
45  _max_memory(0),
46  _active(true),
47  _destructing(false),
48  _live_print_time_limit(5.0),
49  _live_print_mem_limit(100),
50  _live_print(std::make_unique<PerfGraphLivePrint>(*this, app))
51 {
53 }
54 
56 
57 void
59 {
60  if (_pid == 0 && !_disable_live_print)
61  {
62  // Start the printing thread
63  _print_thread = std::thread([this] { this->_live_print->start(); });
64  }
65 }
66 
67 void
69 {
70  if (_pid == 0 && !_disable_live_print)
71  {
72  {
73  // Unlike using atomics for execution_thread_end
74  // here we actually lock to ensure that either the print thread
75  // immediately sees that we are destructing or is immediately
76  // notified with the below notification. Without doing this
77  // it would be possible (but unlikely) for the print thread to
78  // hang for 1 second at the end of execution (which would not be
79  // good anytime you are running lots of fast calculations back-to-back
80  // like during testing or stochastic sampling).
81  std::lock_guard<std::mutex> lock(_destructing_mutex);
82  _destructing = true;
83  }
84 
85  _finished_section.notify_one();
86 
87  _print_thread.join();
88 
89  _disable_live_print = true;
90  }
91 }
92 
93 Real
95  const std::string & section_name,
96  const bool must_exist /* = true */)
97 {
98  update();
99 
100  const auto section_it =
101  _cumulative_section_info.find(section_name == "Root" ? _root_name : section_name);
102 
103  if (section_it == _cumulative_section_info.end())
104  {
105  if (!must_exist || // isn't required to exist
106  _perf_graph_registry.sectionExists(section_name) // or, is required to exist and it does
107  )
108  return 0;
109 
110  mooseError("Unknown PerfGraph section name \"",
111  section_name,
112  "\" in PerfGraph::sectionData().\nIf you are attempting to retrieve the root use "
113  "\"Root\".");
114  }
115 
116  const CumulativeSectionInfo & section_info = section_it->second;
117 
118  if (type == CALLS)
119  return section_info._num_calls;
120 
121  const auto app_time = _cumulative_section_info_ptrs[_root_node_id]->_total;
122 
123  switch (type)
124  {
125  case SELF:
126  return section_info._self;
127  case CHILDREN:
128  return section_info._children;
129  case TOTAL:
130  return section_info._total;
131  case SELF_AVG:
132  return section_info._self / static_cast<Real>(section_info._num_calls);
133  case CHILDREN_AVG:
134  return section_info._children / static_cast<Real>(section_info._num_calls);
135  case TOTAL_AVG:
136  return section_info._total / static_cast<Real>(section_info._num_calls);
137  case SELF_PERCENT:
138  return 100. * (section_info._self / app_time);
139  case CHILDREN_PERCENT:
140  return 100. * (section_info._children / app_time);
141  case TOTAL_PERCENT:
142  return 100. * (section_info._total / app_time);
143  case SELF_MEMORY:
144  return section_info._self_memory;
145  case CHILDREN_MEMORY:
146  return section_info._children_memory;
147  case TOTAL_MEMORY:
148  return section_info._total_memory;
149  default:
150  ::mooseError("Unknown DataType");
151  }
152 }
153 
154 void
156  const IncrementState state,
157  const std::chrono::time_point<std::chrono::steady_clock> time,
158  const long int memory)
159 {
160  auto & section_increment = _execution_list[_execution_list_end];
161 
162  section_increment._id = id;
163  section_increment._state = state;
164  section_increment._time = time;
165  section_increment._memory = memory;
166  section_increment._beginning_num_printed = _console.numPrinted();
167 
168  // A note about this next section of code:
169  // It is only EVER run on the main thread - and therefore there can be
170  // no race conditions. All that is important here is that the print
171  // thread always sees a consistent value for _execution_list_end
172  auto next_execution_list_end = _execution_list_end + 1;
173 
174  // Are we at the end of our circular buffer?
175  if (next_execution_list_end >= MAX_EXECUTION_LIST_SIZE)
176  next_execution_list_end = 0;
177 
178  // This "release" will synchronize the above memory changes with the
179  // "acquire" in the printing thread
180  // All of the above memory operations will be seen by the
181  // printing thread before the printing thread sees this new value
182  _execution_list_end.store(next_execution_list_end, std::memory_order_release);
183 }
184 
185 void
187 {
188  if (!_active)
189  return;
190 
191  PerfNode * new_node = nullptr;
192 
193  if (id == _root_node_id)
194  new_node = _root_node.get();
195  else
196  new_node = _stack[_current_position]->getChild(id);
197 
198  MemoryUtils::Stats stats;
199  auto memory_success = MemoryUtils::getMemoryStats(stats);
200 
201  long int start_memory = 0;
202 
203  if (memory_success)
204  {
205  const auto memory_mb =
207  start_memory = memory_mb;
208  updateMaxMemory(memory_mb);
209  }
210  // If we weren't able to get the memory stats, let's just use the parent's
211  else if (_current_position != -1)
212  start_memory = _stack[_current_position]->startMemory();
213 
214  // Set the start time
215  auto current_time = std::chrono::steady_clock::now();
216 
217  new_node->setStartTimeAndMemory(current_time, start_memory);
218 
219  // Increment the number of calls
220  new_node->incrementNumCalls();
221 
223 
224  if (_current_position >= MOOSE_MAX_STACK_SIZE)
225  mooseError("PerfGraph is out of stack space!");
226 
227  _stack[_current_position] = new_node;
228 
229  // Add this to the execution list unless the message is empty - but pre-emted by live_print_all
230  if ((_pid == 0 && !_disable_live_print) &&
232  addToExecutionList(id, IncrementState::STARTED, current_time, start_memory);
233 }
234 
235 void
237 {
238  if (!_active)
239  return;
240 
241  auto current_time = std::chrono::steady_clock::now();
242 
243  auto & current_node = _stack[_current_position];
244 
245  MemoryUtils::Stats stats;
246  auto memory_success = MemoryUtils::getMemoryStats(stats);
247 
248  long int current_memory = 0;
249 
250  if (memory_success)
251  {
252  const auto memory_mb =
254  current_memory = memory_mb;
255  updateMaxMemory(memory_mb);
256  }
257  // If we weren't able to get the memory stats, let's just use the start memory
258  else if (_current_position !=
259  -1) // If we weren't able to get the memory stats, let's just use the start memory
260  current_memory = _stack[_current_position]->startMemory();
261 
262  current_node->addTimeAndMemory(current_time, current_memory);
263 
265 
266  // Add this to the exection list
267  if ((_pid == 0 && !_disable_live_print) &&
268  (!_perf_graph_registry.readSectionInfo(current_node->id())._live_message.empty() ||
270  {
271  addToExecutionList(current_node->id(), IncrementState::FINISHED, current_time, current_memory);
272 
273  // Tell the printing thread that a section has finished
274  //
275  // Note: no mutex is needed here because we're using an atomic
276  // in the predicate of the condition_variable in the thread
277  // This is technically correct - but there is a chance of missing a signal
278  // For us - that chance is low and doesn't matter (the timeout will just be hit
279  // instead). So - I would rather not have an extra lock here in the main thread.
280  _finished_section.notify_one();
281  }
282 }
283 
284 void
286 {
287  // First update all of the currently running nodes
288  auto now = std::chrono::steady_clock::now();
289 
290  MemoryUtils::Stats stats;
292  auto now_memory =
294 
295  for (int i = 0; i <= _current_position; i++)
296  {
297  auto node = _stack[i];
298  node->addTimeAndMemory(now, now_memory);
299  node->setStartTimeAndMemory(now, now_memory);
300  }
301 
302  // Zero out the entries
303  for (auto & section_time_it : _cumulative_section_info)
304  {
305  auto & section_time = section_time_it.second;
306 
307  section_time._num_calls = 0;
308  section_time._self = 0.;
309  section_time._children = 0.;
310  section_time._total = 0.;
311  section_time._self_memory = 0;
312  section_time._children_memory = 0;
313  section_time._total_memory = 0.;
314  }
315 
317 
318  // Update vector pointing to section times
319  // Note: we are doing this _after_ recursively filling
320  // because new entries may have been created
322 
323  for (auto & section_time_it : _cumulative_section_info)
324  {
325  auto id = _perf_graph_registry.sectionID(section_time_it.first);
326 
327  _cumulative_section_info_ptrs[id] = &section_time_it.second;
328  }
329 }
330 
331 void
333 {
334  const auto & section_name = _perf_graph_registry.readSectionInfo(current_node.id())._name;
335 
336  // RHS insertion on purpose
337  auto & section_time = _cumulative_section_info[section_name];
338 
339  section_time._self += current_node.selfTimeSec();
340  section_time._children += current_node.childrenTimeSec();
341  section_time._total += current_node.totalTimeSec();
342  section_time._num_calls += current_node.numCalls();
343 
344  section_time._self_memory += current_node.selfMemory();
345  section_time._children_memory += current_node.childrenMemory();
346  section_time._total_memory += current_node.totalMemory();
347 
348  for (auto & child_it : current_node.children())
349  recursivelyUpdate(*child_it.second);
350 }
351 
353 PerfGraph::treeTable(const unsigned int level, const bool heaviest /* = false */)
354 {
355  update();
356 
357  FullTable vtable({"Section",
358  "Calls",
359  "Self(s)",
360  "Avg(s)",
361  "%",
362  "Mem(MB)",
363  "Total(s)",
364  "Avg(s)",
365  "%",
366  "Mem(MB)"},
367  10);
368 
369  vtable.setColumnFormat({
370  VariadicTableColumnFormat::AUTO, // Section Name
380  });
381 
382  vtable.setColumnPrecision({
383  1, // Section Name
384  0, // Calls
385  3, // Self
386  3, // Avg.
387  2, // %
388  0, // Memory
389  3, // Total
390  3, // Avg.
391  2, // %
392  0, // Memory
393  });
394 
395  auto act = [this, &vtable](const PerfNode & node,
396  const moose::internal::PerfGraphSectionInfo & section_info,
397  const unsigned int depth)
398  {
399  vtable.addRow(std::string(depth * 2, ' ') + section_info._name, // Section Name
400  node.numCalls(), // Calls
401  node.selfTimeSec(), // Self
402  node.selfTimeAvg(), // Avg.
403  100. * node.selfTimeSec() / _root_node->totalTimeSec(), // %
404  node.selfMemory(), // Memory
405  node.totalTimeSec(), // Total
406  node.totalTimeAvg(), // Avg.
407  100. * node.totalTimeSec() / _root_node->totalTimeSec(), // %
408  node.totalMemory()); // Memory
409  };
410  treeRecurse(act, level, heaviest);
411 
412  return vtable;
413 }
414 
415 void
416 PerfGraph::print(const ConsoleStream & console, unsigned int level)
417 {
418  console << "\nPerformance Graph:\n";
419  treeTable(level).print(console);
420 }
421 
422 void
424 {
425  console << "\nHeaviest Branch:\n";
426  treeTable(MOOSE_MAX_STACK_SIZE, /* heaviest = */ true).print(console);
427 }
428 
429 void
430 PerfGraph::printHeaviestSections(const ConsoleStream & console, const unsigned int num_sections)
431 {
432  update();
433 
434  console << "\nHeaviest Sections:\n";
435 
436  // Indirect Sort The Self Time
437  std::vector<size_t> sorted;
440  sorted,
442  {
443  if (lhs && rhs)
444  return lhs->_self > rhs->_self;
445 
446  // If the LHS exists - it's definitely bigger than a non-existant RHS
447  if (lhs)
448  return true;
449 
450  // Both don't exist - so it doesn't matter how we sort them
451  return false;
452  });
453 
454  HeaviestTable vtable({"Section", "Calls", "Self(s)", "Avg.", "%", "Mem(MB)"}, 10);
455 
456  vtable.setColumnFormat({VariadicTableColumnFormat::AUTO, // Section; doesn't matter
462  );
463 
464  vtable.setColumnPrecision({
465  1, // Section
466  1, // Calls
467  3, // Time
468  3, // Avg.
469  2, // Percent
470  1 // Memory
471  });
472 
473  mooseAssert(!_cumulative_section_info_ptrs.empty(),
474  "update() must be run before printHeaviestSections()!");
475 
476  // The total time of the root node
477  auto total_root_time = _cumulative_section_info_ptrs[_root_node_id]->_total;
478 
479  // Now print out the largest ones
480  for (unsigned int i = 0; i < num_sections; i++)
481  {
482  auto id = sorted[i];
483 
485  continue;
486 
487  const auto & entry = *_cumulative_section_info_ptrs[id];
488  vtable.addRow(_perf_graph_registry.sectionInfo(id)._name, // Section
489  entry._num_calls, // Calls
490  entry._self, // Time
491  entry._self / static_cast<Real>(entry._num_calls), // Avg.
492  100. * entry._self / total_root_time, // Percent
493  entry._self_memory); // Memory
494  }
495 
496  vtable.print(console);
497 }
498 
499 void
500 PerfGraph::updateMaxMemory(const std::size_t current_memory)
501 {
502  // We shouldn't need to lock _max_memory in-between getting
503  // it and setting it as this should only ever be set outside
504  // of threads. _max_memory is an atomic so that getting it
505  // is thread safe.
506  if (current_memory > getMaxMemory())
507  _max_memory = current_memory;
508 }
509 
510 void
511 dataStore(std::ostream & stream, PerfGraph & perf_graph, void *)
512 {
513  // We need to store the registry id -> section info map so that we can add
514  // registered sections that may not be added yet during recover
515  dataStore(stream, perf_graph._perf_graph_registry._id_to_item, nullptr);
516 
517  // Update before serializing the nodes so that the time/memory/calls are correct
518  perf_graph.update();
519 
520  // Recursively serialize all of the nodes
521  dataStore(stream, perf_graph._root_node, nullptr);
522 }
523 
524 void
525 dataLoad(std::istream & stream, PerfGraph & perf_graph, void *)
526 {
527  // Load in all of the recovered sections and register those that do not exist yet
528  std::vector<moose::internal::PerfGraphSectionInfo> recovered_section_info;
529  dataLoad(stream, recovered_section_info, nullptr);
530  for (const auto & info : recovered_section_info)
531  {
532  if (info._live_message.size())
534  info._name, info._level, info._live_message, info._print_dots);
535  else
536  perf_graph._perf_graph_registry.registerSection(info._name, info._level);
537  }
538 
539  // Update the current node time/memory/calls before loading the nodes as the load
540  // will append information to current nodes that exist
541  perf_graph.update();
542 
543  // Recursively load all of the nodes; this will append information to matching nodes
544  // and will create new nodes for section paths that do not exist
545  dataLoad(stream, perf_graph._root_node, &perf_graph);
546 }
std::size_t getMaxMemory() const
Get the maximum memory allocation in MB.
Definition: PerfGraph.h:183
long int totalMemory() const
Get the amount of memory added by this node.
Definition: PerfNode.h:162
void recursivelyUpdate(const PerfNode &current_node)
Updates the cumulative self/children/total time and memory for each section across all nodes that con...
Definition: PerfGraph.C:332
Real selfTimeSec() const
Get the time this node took in seconds.
Definition: PerfNode.h:116
PerfGraphRegistry & _perf_graph_registry
The PerfGraphRegistry.
Definition: PerfGraph.h:344
A helper class for re-directing output streams to Console output objects form MooseObjects.
Definition: ConsoleStream.h:30
long int _total_memory
Total memory gain for this section.
Definition: PerfGraph.h:242
long int _self_memory
Amount of memory gained within this section (without children)
Definition: PerfGraph.h:236
unsigned long int _num_calls
Number of times this section has been called.
Definition: PerfGraph.h:233
~PerfGraph()
Destructor.
Definition: PerfGraph.C:55
PerfGraph(const std::string &root_name, MooseApp &app, const bool live_all, const bool perf_graph_live)
Create a new PerfGraph.
Definition: PerfGraph.C:28
std::string _live_message
Message to print while the section is running.
bool sectionExists(const std::string &section_name) const
Whether or not a section with that name has been registered The name of the section.
void setStartTimeAndMemory(const std::chrono::time_point< std::chrono::steady_clock > time, const long int memory)
Set the current start time.
Definition: PerfNode.h:41
Real sectionData(const DataType type, const std::string &section_name, const bool must_exist=true)
Gets a PerfGraph result pertaining to a section.
Definition: PerfGraph.C:94
long int selfMemory() const
Get the amount of memory added by this node.
Definition: PerfNode.C:41
void print(StreamType &stream)
Pretty print the table of data.
Definition: VariadicTable.h:98
MPI_Info info
bool getMemoryStats(Stats &stats)
get all memory stats for the current process stats The Stats object to fill with the data ...
Definition: MemoryUtils.C:79
std::thread _print_thread
The thread for printing sections as they execute.
Definition: PerfGraph.h:418
void mooseError(Args &&... args)
Emit an error message with the given stringified, concatenated args and terminate the application...
Definition: MooseError.h:311
Real selfTimeAvg() const
The average time this node took in seconds.
Definition: PerfNode.h:120
std::size_t _physical_memory
Definition: MemoryUtils.h:23
DataType
For retrieving values.
Definition: PerfGraph.h:51
A class for "pretty printing" a table of data.
Definition: PerfGraph.h:34
void indirectSort(RandomAccessIterator beg, RandomAccessIterator end, std::vector< size_t > &b)
Definition: IndirectSort.h:68
const std::unique_ptr< PerfGraphLivePrint > _live_print
The object that is doing live printing.
Definition: PerfGraph.h:415
Real _total
Total amount of time used.
Definition: PerfGraph.h:230
Base class for MOOSE-based applications.
Definition: MooseApp.h:108
std::atomic< unsigned int > _execution_list_end
Where the print thread should stop reading the execution list.
Definition: PerfGraph.h:371
long int childrenMemory() const
Get the amount of memory added by this node.
Definition: PerfNode.C:47
IncrementState
The execution state of an increment.
Definition: PerfGraph.h:248
bool _live_print_all
Whether or not to put everything in the perf graph.
Definition: PerfGraph.h:338
Real totalTimeSec() const
The time this Node plus all of its children took in seconds.
Definition: PerfNode.h:129
Real _self
Amount of time used within this section (without children)
Definition: PerfGraph.h:224
Used to hold metadata about the registered sections Note: this is a class instead of a struct because...
std::condition_variable _finished_section
The condition_variable to wake the print thread.
Definition: PerfGraph.h:406
void update()
Updates the time section_time and time for all currently running nodes.
Definition: PerfGraph.C:285
unsigned int PerfID
Definition: MooseTypes.h:240
void incrementNumCalls()
Increments the number of calls.
Definition: PerfNode.h:76
void addToExecutionList(const PerfID id, const IncrementState state, const std::chrono::time_point< std::chrono::steady_clock > time, const long int memory)
Add the information to the execution list.
Definition: PerfGraph.C:155
std::vector< CumulativeSectionInfo * > _cumulative_section_info_ptrs
Pointers into _cumulative_section_info indexed on PerfID This is here for convenience and speed so we...
Definition: PerfGraph.h:388
unsigned long int numCalls() const
Get the number of times this node was called.
Definition: PerfNode.h:147
void treeRecurse(const Functor &act, const unsigned int level=MOOSE_MAX_STACK_SIZE, const bool heaviest=false) const
Definition: PerfGraph.h:488
unsigned long long int numPrinted() const
The number of times something has been printed.
Definition: ConsoleStream.C:68
const std::unique_ptr< PerfNode > _root_node
The root node of the graph.
Definition: PerfGraph.h:356
const std::map< PerfID, std::unique_ptr< PerfNode > > & children() const
Get the children.
Definition: PerfNode.h:107
Real childrenTimeSec() const
The time this node&#39;s children took in seconds.
Definition: PerfNode.h:142
PerfID id() const
Get the ID of this Node.
Definition: PerfNode.h:36
void dataLoad(std::istream &stream, PerfGraph &perf_graph, void *)
Definition: PerfGraph.C:525
std::mutex _destructing_mutex
The mutex to use with a condition_variable predicate to guard _destructing.
Definition: PerfGraph.h:403
std::unordered_map< std::string, CumulativeSectionInfo > _cumulative_section_info
The cumulative time and memory for each section.
Definition: PerfGraph.h:382
An inteface for the _console for outputting to the Console object.
std::deque< Item > _id_to_item
Vector of IDs to Items.
bool _active
Whether or not timing is active.
Definition: PerfGraph.h:394
This is effectively a functor that runs on a separate thread and watches the state of the call stack ...
PerfID registerSection(const std::string &section_name, const unsigned int level)
Call to register a named section for timing.
void disableLivePrint()
Completely disables Live Print (cannot be restarted)
Definition: PerfGraph.C:68
int _current_position
The current node position in the stack.
Definition: PerfGraph.h:359
void updateMaxMemory(const std::size_t current_memory)
Update _max_memory if current_memory > _max_memory.
Definition: PerfGraph.C:500
Real _children
Amount of time used by children.
Definition: PerfGraph.h:227
std::atomic< std::size_t > _max_memory
Maximum memory encountered during push and pop.
Definition: PerfGraph.h:391
void pop()
Remove a Node from the end of the current scope.
Definition: PerfGraph.C:236
const PerfGraphSectionInfo & sectionInfo(const PerfID section_id) const
Given a PerfID return the PerfGraphSectionInfo The ID.
bool _disable_live_print
Whether or not live print is disabled (cannot be turned on again)
Definition: PerfGraph.h:341
void setColumnFormat(const std::vector< VariadicTableColumnFormat > &column_format)
Set how to format numbers for each column.
void print(const ConsoleStream &console, unsigned int level)
Print the tree out.
Definition: PerfGraph.C:416
std::array< PerfNode *, MOOSE_MAX_STACK_SIZE > _stack
The full callstack. Currently capped at a depth of 100.
Definition: PerfGraph.h:362
void printHeaviestBranch(const ConsoleStream &console)
Print out the heaviest branch through the tree.
Definition: PerfGraph.C:423
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
Real totalTimeAvg() const
The average time this Node plus all of its children took in seconds.
Definition: PerfNode.h:133
const std::string _root_name
Name of the root node.
Definition: PerfGraph.h:350
PerfID sectionID(const std::string &section_name) const
Given a name return the PerfID The name of the section.
Use to hold the cumulative time and memory for each section, which comes from all of the PerfNodes th...
Definition: PerfGraph.h:221
const libMesh::processor_id_type _pid
This processor id.
Definition: PerfGraph.h:347
long int _children_memory
Amount of memory gained by children.
Definition: PerfGraph.h:239
bool _destructing
Tell the print thread to teardown.
Definition: PerfGraph.h:400
void printHeaviestSections(const ConsoleStream &console, const unsigned int num_sections)
Print out the heaviest sections that were timed.
Definition: PerfGraph.C:430
const ConsoleStream _console
An instance of helper class to write streams to the Console objects.
void push(const PerfID id)
Add a Node onto the end of the end of the current callstack.
Definition: PerfGraph.C:186
FullTable treeTable(const unsigned int level, const bool heaviest=false)
Helper for building a VariadicTable that represents the tree.
Definition: PerfGraph.C:353
The PerfGraph will hold the master list of all registered performance segments and the head PerfNode...
Definition: PerfGraph.h:43
std::size_t convertBytes(std::size_t bytes, MemUnits unit)
convert bytes to selected unit prefix
Definition: MemoryUtils.C:174
PerfGraphRegistry & getPerfGraphRegistry()
Get the global PerfGraphRegistry singleton.
const PerfGraphSectionInfo & readSectionInfo(PerfID section_id) const
Special accessor just for PerfGraph so that no locking is needed in PerfGraph.
std::array< SectionIncrement, MAX_EXECUTION_LIST_SIZE > _execution_list
A circular buffer for holding the execution list, this is read by the printing loop.
Definition: PerfGraph.h:365
void enableLivePrint()
Enables Live Print.
Definition: PerfGraph.C:58
void dataStore(std::ostream &stream, PerfGraph &perf_graph, void *)
Definition: PerfGraph.C:511
const PerfID _root_node_id
The id for the root node.
Definition: PerfGraph.h:353
A node in the PerfGraph.
Definition: PerfNode.h:25