www.mooseframework.org
PerfGraph.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://www.mooseframework.org
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 #include "PerfGraph.h"
11 
12 // MOOSE Includes
13 #include "PerfGuard.h"
14 #include "MooseError.h"
15 
16 // Note: do everything we can to make sure this only gets #included
17 // in the .C file... this is a heavily templated header that we
18 // don't want to expose to EVERY file in MOOSE...
19 #include "VariadicTable.h"
20 
21 // System Includes
22 #include <chrono>
23 
24 const std::string PerfGraph::ROOT_NAME = "Root";
25 
26 PerfGraph::PerfGraph(const std::string & root_name)
27  : _root_name(root_name), _current_position(0), _active(true)
28 {
29  // Not done in the initialization list on purpose because this object needs to be complete first
30  _root_node = libmesh_make_unique<PerfNode>(registerSection(ROOT_NAME, 0));
31 
32  // Set the initial time
33  _root_node->setStartTime(std::chrono::steady_clock::now());
34 
35  // Add a call
36  _root_node->incrementNumCalls();
37 
38  _stack[0] = _root_node.get();
39 }
40 
41 unsigned int
42 PerfGraph::registerSection(const std::string & section_name, unsigned int level)
43 {
44  auto it = _section_name_to_id.lower_bound(section_name);
45 
46  // Is it already registered?
47  if (it != _section_name_to_id.end() && it->first == section_name)
48  return it->second;
49 
50  // It's not...
51  auto id = _section_name_to_id.size();
52  _section_name_to_id.emplace_hint(it, section_name, id);
53  _id_to_section_name[id] = section_name;
54  _id_to_level[id] = level;
55 
56  return id;
57 }
58 
59 const std::string &
61 {
62  auto find_it = _id_to_section_name.find(id);
63 
64  if (find_it == _id_to_section_name.end())
65  mooseError("PerfGraph cannot find a section name associated with id: ", id);
66 
67  return find_it->second;
68 }
69 
70 unsigned long int
71 PerfGraph::getNumCalls(const std::string & section_name)
72 {
73  updateTiming();
74 
75  auto section_it = _section_time.find(section_name);
76 
77  if (section_it == _section_time.end())
78  mooseError(
79  "Unknown section_name: ",
80  section_name,
81  " in PerfGraph::getNumCalls()\nIf you are attempting to retrieve the root use \"Root\".");
82 
83  return section_it->second._num_calls;
84 }
85 
86 Real
87 PerfGraph::getTime(const TimeType type, const std::string & section_name)
88 {
89  updateTiming();
90 
91  auto section_it = _section_time.find(section_name);
92 
93  if (section_it == _section_time.end())
94  mooseError(
95  "Unknown section_name: ",
96  section_name,
97  " in PerfGraph::getTime()\nIf you are attempting to retrieve the root use \"Root\".");
98 
99  auto app_time = _section_time_ptrs[0]->_total;
100 
101  switch (type)
102  {
103  case SELF:
104  return section_it->second._self;
105  case CHILDREN:
106  return section_it->second._children;
107  case TOTAL:
108  return section_it->second._total;
109  case SELF_AVG:
110  return section_it->second._self / static_cast<Real>(section_it->second._num_calls);
111  case CHILDREN_AVG:
112  return section_it->second._children / static_cast<Real>(section_it->second._num_calls);
113  case TOTAL_AVG:
114  return section_it->second._total / static_cast<Real>(section_it->second._num_calls);
115  case SELF_PERCENT:
116  return 100. * (section_it->second._self / app_time);
117  case CHILDREN_PERCENT:
118  return 100. * (section_it->second._children / app_time);
119  case TOTAL_PERCENT:
120  return 100. * (section_it->second._total / app_time);
121  default:
122  ::mooseError("Unknown TimeType");
123  }
124 }
125 
126 void
128 {
129  if (!_active)
130  return;
131 
132  auto new_node = _stack[_current_position]->getChild(id);
133 
134  // Set the start time
135  new_node->setStartTime(std::chrono::steady_clock::now());
136 
137  // Increment the number of calls
138  new_node->incrementNumCalls();
139 
141 
142  if (_current_position >= MAX_STACK_SIZE)
143  mooseError("PerfGraph is out of stack space!");
144 
145  _stack[_current_position] = new_node;
146 }
147 
148 void
150 {
151  if (!_active)
152  return;
153 
154  _stack[_current_position]->addTime(std::chrono::steady_clock::now());
155 
157 }
158 
159 void
161 {
162  // First update all of the currently running nodes
163  auto now = std::chrono::steady_clock::now();
164  for (unsigned int i = 0; i <= _current_position; i++)
165  {
166  auto node = _stack[i];
167  node->addTime(now);
168  node->setStartTime(now);
169  }
170 
171  // Zero out the entries
172  for (auto & section_time_it : _section_time)
173  {
174  auto & section_time = section_time_it.second;
175 
176  section_time._num_calls = 0;
177  section_time._self = 0.;
178  section_time._children = 0.;
179  section_time._total = 0.;
180  }
181 
183 
184  // Update vector pointing to section times
185  // Note: we are doing this _after_ recursively filling
186  // because new entries may have been created
188 
189  for (auto & section_time_it : _section_time)
190  {
191  auto id = _section_name_to_id[section_time_it.first];
192 
193  _section_time_ptrs[id] = &section_time_it.second;
194  }
195 }
196 
197 void
199 {
200  auto id = current_node->id();
201 
202  auto self = std::chrono::duration<double>(current_node->selfTime()).count();
203  auto children = std::chrono::duration<double>(current_node->childrenTime()).count();
204  auto total = std::chrono::duration<double>(current_node->totalTime()).count();
205  auto num_calls = current_node->numCalls();
206 
207  // RHS insertion on purpose
208  auto & section_time = _section_time[_id_to_section_name[id]];
209 
210  section_time._self += self;
211  section_time._children += children;
212  section_time._total += total;
213  section_time._num_calls += num_calls;
214 
215  for (auto & child_it : current_node->children())
216  recursivelyFillTime(child_it.second.get());
217 }
218 
219 void
221  FullTable & vtable,
222  unsigned int level,
223  unsigned int current_depth)
224 {
225  mooseAssert(_id_to_section_name.find(current_node->id()) != _id_to_section_name.end(),
226  "Unable to find section name!");
227 
228  auto & name = current_node->id() == 0 ? _root_name : _id_to_section_name[current_node->id()];
229 
230  mooseAssert(_id_to_level.find(current_node->id()) != _id_to_level.end(), "Unable to find level!");
231  auto & node_level = _id_to_level[current_node->id()];
232 
233  if (node_level <= level)
234  {
235  mooseAssert(!_section_time_ptrs.empty(),
236  "updateTiming() must be run before recursivelyPrintGraph!");
237 
238  auto section = std::string(current_depth * 2, ' ') + name;
239 
240  // The total time of the root node
241  auto total_root_time = _section_time_ptrs[0]->_total;
242 
243  auto num_calls = current_node->numCalls();
244  auto self = std::chrono::duration<double>(current_node->selfTime()).count();
245  auto self_avg = self / static_cast<Real>(num_calls);
246  auto self_percent = 100. * self / total_root_time;
247 
248  auto children = std::chrono::duration<double>(current_node->childrenTime()).count();
249  auto children_avg = children / static_cast<Real>(num_calls);
250  auto children_percent = 100. * children / total_root_time;
251 
252  auto total = std::chrono::duration<double>(current_node->totalTime()).count();
253  auto total_avg = total / static_cast<Real>(num_calls);
254  auto total_percent = 100. * total / total_root_time;
255 
256  vtable.addRow(section,
257  num_calls,
258  self,
259  self_avg,
260  self_percent,
261  children,
262  children_avg,
263  children_percent,
264  total,
265  total_avg,
266  total_percent);
267 
268  current_depth++;
269  }
270 
271  for (auto & child_it : current_node->children())
272  recursivelyPrintGraph(child_it.second.get(), vtable, level, current_depth);
273 }
274 
275 void
277  FullTable & vtable,
278  unsigned int current_depth)
279 {
280  mooseAssert(!_section_time_ptrs.empty(),
281  "updateTiming() must be run before recursivelyPrintGraph!");
282 
283  auto & name = current_node->id() == 0 ? _root_name : _id_to_section_name[current_node->id()];
284 
285  auto section = std::string(current_depth * 2, ' ') + name;
286 
287  // The total time of the root node
288  auto total_root_time = _section_time_ptrs[0]->_total;
289 
290  auto num_calls = current_node->numCalls();
291  auto self = std::chrono::duration<double>(current_node->selfTime()).count();
292  auto self_avg = self / static_cast<Real>(num_calls);
293  auto self_percent = 100. * self / total_root_time;
294 
295  auto children = std::chrono::duration<double>(current_node->childrenTime()).count();
296  auto children_avg = children / static_cast<Real>(num_calls);
297  auto children_percent = 100. * children / total_root_time;
298 
299  auto total = std::chrono::duration<double>(current_node->totalTime()).count();
300  auto total_avg = total / static_cast<Real>(num_calls);
301  auto total_percent = 100. * total / total_root_time;
302 
303  vtable.addRow(section,
304  num_calls,
305  self,
306  self_avg,
307  self_percent,
308  children,
309  children_avg,
310  children_percent,
311  total,
312  total_avg,
313  total_percent);
314 
315  current_depth++;
316 
317  if (!current_node->children().empty())
318  {
319  PerfNode * heaviest_child = nullptr;
320 
321  for (auto & child_it : current_node->children())
322  {
323  auto current_child = child_it.second.get();
324 
325  if (!heaviest_child || (current_child->totalTime() > heaviest_child->totalTime()))
326  heaviest_child = current_child;
327  }
328 
329  recursivelyPrintHeaviestGraph(heaviest_child, vtable, current_depth);
330  }
331 }
332 
333 void
334 PerfGraph::print(const ConsoleStream & console, unsigned int level)
335 {
336  updateTiming();
337 
338  console << "\nPerformance Graph:\n";
339  FullTable vtable({"Section",
340  "Calls",
341  "Self(s)",
342  "Avg(s)",
343  "%",
344  "Children(s)",
345  "Avg(s)",
346  "%",
347  "Total(s)",
348  "Avg(s)",
349  "%"},
350  10);
351 
352  vtable.setColumnFormat({VariadicTableColumnFormat::AUTO, // Section Name
363 
364  vtable.setColumnPrecision({1, 0, 3, 3, 2, 3, 3, 2, 3, 3, 2});
365 
366  recursivelyPrintGraph(_root_node.get(), vtable, level);
367  vtable.print(console);
368 }
369 
370 void
372 {
373  updateTiming();
374 
375  console << "\nHeaviest Branch:\n";
376  FullTable vtable({"Section",
377  "Calls",
378  "Self(s)",
379  "Avg(s)",
380  "%",
381  "Children(s)",
382  "Avg(s)",
383  "%",
384  "Total(s)",
385  "Avg(s)",
386  "%"},
387  10);
388 
389  vtable.setColumnFormat({VariadicTableColumnFormat::AUTO, // Section Name
400 
401  vtable.setColumnPrecision({1, 0, 3, 3, 2, 3, 3, 2, 3, 3, 2});
402 
404  vtable.print(console);
405 }
406 
407 void
408 PerfGraph::printHeaviestSections(const ConsoleStream & console, const unsigned int num_sections)
409 {
410  updateTiming();
411 
412  console << "\nHeaviest Sections:\n";
413 
414  // Indirect Sort The Self Time
415  std::vector<size_t> sorted;
417  _section_time_ptrs.end(),
418  sorted,
419  [](SectionTime * lhs, SectionTime * rhs) {
420  if (lhs && rhs)
421  return lhs->_self > rhs->_self;
422 
423  // If the LHS exists - it's definitely bigger than a non-existant RHS
424  if (lhs)
425  return true;
426 
427  // Both don't exist - so it doesn't matter how we sort them
428  return false;
429  });
430 
431  HeaviestTable vtable({"Section", "Calls", "Self(s)", "Avg.", "%"}, 10);
432 
433  vtable.setColumnFormat({VariadicTableColumnFormat::AUTO, // Doesn't matter
438 
439  vtable.setColumnPrecision({1, 1, 3, 3, 2});
440 
441  mooseAssert(!_section_time_ptrs.empty(),
442  "updateTiming() must be run before printHeaviestSections()!");
443 
444  // The total time of the root node
445  auto total_root_time = _section_time_ptrs[0]->_total;
446 
447  // Now print out the largest ones
448  for (unsigned int i = 0; i < num_sections; i++)
449  {
450  auto id = sorted[i];
451 
452  vtable.addRow(id == 0 ? _root_name : _id_to_section_name[id],
453  _section_time_ptrs[id]->_num_calls,
454  _section_time_ptrs[id]->_self,
455  _section_time_ptrs[id]->_self /
456  static_cast<Real>(_section_time_ptrs[id]->_num_calls),
457  100 * _section_time_ptrs[id]->_self / total_root_time);
458  }
459 
460  vtable.print(console);
461 }
A helper class for re-directing output streams to Console output objects form MooseObjects.
Definition: ConsoleStream.h:28
std::map< std::string, SectionTime > _section_time
The time for each section.
Definition: PerfGraph.h:273
static const std::string ROOT_NAME
The name (handle) of the root node.
Definition: PerfGraph.h:241
void recursivelyPrintGraph(PerfNode *current_node, FullTable &vtable, unsigned int level, unsigned int current_depth=0)
Helper for printing out the graph.
Definition: PerfGraph.C:220
unsigned long int getNumCalls(const std::string &section_name)
Get the number of calls for a section.
Definition: PerfGraph.C:71
void mooseError(Args &&... args)
Emit an error message with the given stringified, concatenated args and terminate the application...
Definition: MooseError.h:207
A class for "pretty printing" a table of data.
Definition: PerfGraph.h:26
void indirectSort(RandomAccessIterator beg, RandomAccessIterator end, std::vector< size_t > &b)
Definition: IndirectSort.h:68
void updateTiming()
Udates the time section_time and time for all currently running nodes.
Definition: PerfGraph.C:160
std::chrono::steady_clock::duration childrenTime() const
Get the time this nodes children took.
Definition: PerfNode.C:27
unsigned int PerfID
Definition: MooseTypes.h:163
unsigned int _current_position
The current node position in the stack.
Definition: PerfGraph.h:250
std::map< PerfID, std::string > _id_to_section_name
Map of IDs to section names.
Definition: PerfGraph.h:259
void recursivelyFillTime(PerfNode *current_node)
Updates the cumulative self/children/total time.
Definition: PerfGraph.C:198
std::array< PerfNode *, MAX_STACK_SIZE > _stack
The full callstack. Currently capped at a depth of 100.
Definition: PerfGraph.h:253
const std::map< PerfID, std::unique_ptr< PerfNode > > & children() const
Get the children.
Definition: PerfNode.h:86
PerfID id() const
Get the ID of this Node.
Definition: PerfNode.h:34
bool _active
Whether or not timing is active.
Definition: PerfGraph.h:282
std::unique_ptr< PerfNode > _root_node
The root node of the graph.
Definition: PerfGraph.h:247
std::chrono::steady_clock::duration totalTime() const
The time this Node plus all of it&#39;s children took.
Definition: PerfNode.C:19
void pop()
Remove a Node from the end of the current scope.
Definition: PerfGraph.C:149
void addRow(Ts... entries)
Add a row of data.
Definition: VariadicTable.h:91
void setColumnFormat(const std::vector< VariadicTableColumnFormat > &column_format)
Set how to format numbers for each column.
void print(const ConsoleStream &console, unsigned int level)
Print the tree out.
Definition: PerfGraph.C:334
std::map< PerfID, unsigned int > _id_to_level
Map of IDs to level.
Definition: PerfGraph.h:262
MatType type
void printHeaviestBranch(const ConsoleStream &console)
Print out the heaviest branch through the tree.
Definition: PerfGraph.C:371
const std::string & sectionName(const PerfID id) const
Grab the name of a section.
Definition: PerfGraph.C:60
TimeType
For retrieving values.
Definition: PerfGraph.h:40
const std::string _root_name
The name to display for the root of the graph when printing the entire graph.
Definition: PerfGraph.h:244
void printHeaviestSections(const ConsoleStream &console, const unsigned int num_sections)
Print out the heaviest sections that were timed.
Definition: PerfGraph.C:408
Real getTime(const TimeType type, const std::string &section_name)
Get a reference to the time for a section.
Definition: PerfGraph.C:87
std::map< std::string, PerfID > _section_name_to_id
Map of section names to IDs.
Definition: PerfGraph.h:256
std::chrono::steady_clock::duration selfTime() const
Get the time this node took.
Definition: PerfNode.C:13
void push(const PerfID id)
Add a Node onto the end of the end of the current callstack.
Definition: PerfGraph.C:127
void recursivelyPrintHeaviestGraph(PerfNode *current_node, FullTable &vtable, unsigned int current_depth=0)
Helper for printing out the trace that has taken the most time.
Definition: PerfGraph.C:276
unsigned long int numCalls()
Get the number of times this node was called.
Definition: PerfNode.h:106
PerfID registerSection(const std::string &section_name, unsigned int level)
Registers a named section of code.
Definition: PerfGraph.C:42
PerfGraph(const std::string &root_name)
Create a new PerfGraph.
Definition: PerfGraph.C:26
Use to hold the time for each section.
Definition: PerfGraph.h:178
std::vector< SectionTime * > _section_time_ptrs
Pointers into _section_time indexed on PerfID This is here for convenience and speed so we don&#39;t need...
Definition: PerfGraph.h:279
A node in the PerfGraph.
Definition: PerfNode.h:23