30 const bool perf_graph_live)
33 _live_print_all(live_all),
34 _disable_live_print(!perf_graph_live),
36 _pid(app.comm().rank()),
37 _root_name(root_name),
38 _root_node_id(_perf_graph_registry.registerSection(root_name, 0)),
39 _root_node(
std::make_unique<
PerfNode>(_root_node_id)),
40 _current_position(-1),
42 _execution_list_begin(0),
43 _execution_list_end(0),
45 _live_print_active(true),
47 _live_print_time_limit(5.0),
48 _live_print_mem_limit(100),
90 const std::string & section_name,
91 const bool must_exist )
95 const auto section_it =
105 mooseError(
"Unknown PerfGraph section name \"",
107 "\" in PerfGraph::sectionData().\nIf you are attempting to retrieve the root use " 121 return section_info.
_self;
125 return section_info.
_total;
133 return 100. * (section_info.
_self / app_time);
135 return 100. * (section_info.
_children / app_time);
137 return 100. * (section_info.
_total / app_time);
152 const std::chrono::time_point<std::chrono::steady_clock> time,
153 const long int memory)
157 section_increment._id = id;
158 section_increment._state = state;
159 section_increment._time = time;
160 section_increment._memory = memory;
170 if (next_execution_list_end >= MAX_EXECUTION_LIST_SIZE)
171 next_execution_list_end = 0;
196 long int start_memory = 0;
206 auto current_time = std::chrono::steady_clock::now();
216 mooseError(
"PerfGraph is out of stack space!");
232 auto current_time = std::chrono::steady_clock::now();
239 long int current_memory = 0;
248 current_node->addTimeAndMemory(current_time, current_memory);
257 addToExecutionList(current_node->id(), IncrementState::FINISHED, current_time, current_memory);
274 auto now = std::chrono::steady_clock::now();
284 node->addTimeAndMemory(now, now_memory);
285 node->setStartTimeAndMemory(now, now_memory);
291 auto & section_time = section_time_it.second;
293 section_time._num_calls = 0;
294 section_time._self = 0.;
295 section_time._children = 0.;
296 section_time._total = 0.;
297 section_time._self_memory = 0;
298 section_time._children_memory = 0;
299 section_time._total_memory = 0.;
328 section_time._num_calls += current_node.
numCalls();
330 section_time._self_memory += current_node.
selfMemory();
332 section_time._total_memory += current_node.
totalMemory();
334 for (
auto & child_it : current_node.
children())
368 vtable.setColumnPrecision({
381 auto act = [
this, &vtable](
const PerfNode & node,
383 const unsigned int depth)
385 vtable.addRow(std::string(depth * 2,
' ') + section_info._name,
404 console <<
"\nPerformance Graph:\n";
411 console <<
"\nHeaviest Branch:\n";
420 console <<
"\nHeaviest Sections:\n";
423 std::vector<size_t> sorted;
430 return lhs->_self > rhs->_self;
440 HeaviestTable vtable({
"Section",
"Calls",
"Self(s)",
"Avg.",
"%",
"Mem(MB)"}, 10);
450 vtable.setColumnPrecision({
460 "update() must be run before printHeaviestSections()!");
466 for (
unsigned int i = 0; i < num_sections; i++)
477 entry._self /
static_cast<Real>(entry._num_calls),
478 100. * entry._self / total_root_time,
482 vtable.print(console);
503 std::vector<moose::internal::PerfGraphSectionInfo> recovered_section_info;
504 dataLoad(stream, recovered_section_info,
nullptr);
505 for (
const auto &
info : recovered_section_info)
507 if (
info._live_message.size())
long int totalMemory() const
Get the amount of memory added by this node.
void recursivelyUpdate(const PerfNode ¤t_node)
Updates the cumulative self/children/total time and memory for each section across all nodes that con...
Real selfTimeSec() const
Get the time this node took in seconds.
PerfGraphRegistry & _perf_graph_registry
The PerfGraphRegistry.
A helper class for re-directing output streams to Console output objects form MooseObjects.
long int _total_memory
Total memory gain for this section.
long int _self_memory
Amount of memory gained within this section (without children)
unsigned long int _num_calls
Number of times this section has been called.
PerfGraph(const std::string &root_name, MooseApp &app, const bool live_all, const bool perf_graph_live)
Create a new PerfGraph.
const processor_id_type _pid
This processor id.
std::string _live_message
Message to print while the section is running.
bool sectionExists(const std::string §ion_name) const
Whether or not a section with that name has been registered The name of the section.
void setStartTimeAndMemory(const std::chrono::time_point< std::chrono::steady_clock > time, const long int memory)
Set the current start time.
Real sectionData(const DataType type, const std::string §ion_name, const bool must_exist=true)
Gets a PerfGraph result pertaining to a section.
long int selfMemory() const
Get the amount of memory added by this node.
void print(StreamType &stream)
Pretty print the table of data.
bool getMemoryStats(Stats &stats)
get all memory stats for the current process stats The Stats object to fill with the data ...
void treeRecurse(const Functor &act, const unsigned int level=MAX_STACK_SIZE, const bool heaviest=false) const
std::thread _print_thread
The thread for printing sections as they execute.
void mooseError(Args &&... args)
Emit an error message with the given stringified, concatenated args and terminate the application...
Real selfTimeAvg() const
The average time this node took in seconds.
std::size_t _physical_memory
DataType
For retrieving values.
A class for "pretty printing" a table of data.
void indirectSort(RandomAccessIterator beg, RandomAccessIterator end, std::vector< size_t > &b)
const std::unique_ptr< PerfGraphLivePrint > _live_print
The object that is doing live printing.
Real _total
Total amount of time used.
Base class for MOOSE-based applications.
std::atomic< unsigned int > _execution_list_end
Where the print thread should stop reading the execution list.
long int childrenMemory() const
Get the amount of memory added by this node.
IncrementState
The execution state of an increment.
bool _live_print_all
Whether or not to put everything in the perf graph.
Real totalTimeSec() const
The time this Node plus all of its children took in seconds.
Real _self
Amount of time used within this section (without children)
Used to hold metadata about the registered sections Note: this is a class instead of a struct because...
std::condition_variable _finished_section
The condition_variable to wake the print thread.
void update()
Updates the time section_time and time for all currently running nodes.
void incrementNumCalls()
Increments the number of calls.
void addToExecutionList(const PerfID id, const IncrementState state, const std::chrono::time_point< std::chrono::steady_clock > time, const long int memory)
Add the information to the execution list.
std::vector< CumulativeSectionInfo * > _cumulative_section_info_ptrs
Pointers into _cumulative_section_info indexed on PerfID This is here for convenience and speed so we...
unsigned long int numCalls() const
Get the number of times this node was called.
unsigned long long int numPrinted() const
The number of times something has been printed.
const std::unique_ptr< PerfNode > _root_node
The root node of the graph.
std::array< PerfNode *, MAX_STACK_SIZE > _stack
The full callstack. Currently capped at a depth of 100.
const std::map< PerfID, std::unique_ptr< PerfNode > > & children() const
Get the children.
Real childrenTimeSec() const
The time this node's children took in seconds.
PerfID id() const
Get the ID of this Node.
void dataLoad(std::istream &stream, PerfGraph &perf_graph, void *)
std::mutex _destructing_mutex
The mutex to use with a condition_variable predicate to guard _destructing.
std::unordered_map< std::string, CumulativeSectionInfo > _cumulative_section_info
The cumulative time and memory for each section.
An inteface for the _console for outputting to the Console object.
bool _active
Whether or not timing is active.
This is effectively a functor that runs on a separate thread and watches the state of the call stack ...
PerfID registerSection(const std::string §ion_name, const unsigned int level)
Call to register a named section for timing.
void disableLivePrint()
Completely disables Live Print (cannot be restarted)
int _current_position
The current node position in the stack.
Real _children
Amount of time used by children.
void pop()
Remove a Node from the end of the current scope.
const PerfGraphSectionInfo & sectionInfo(const PerfID section_id) const
Given a PerfID return the PerfGraphSectionInfo The ID.
bool _disable_live_print
Whether or not live print is disabled (cannot be turned on again)
void setColumnFormat(const std::vector< VariadicTableColumnFormat > &column_format)
Set how to format numbers for each column.
void print(const ConsoleStream &console, unsigned int level)
Print the tree out.
void printHeaviestBranch(const ConsoleStream &console)
Print out the heaviest branch through the tree.
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
Real totalTimeAvg() const
The average time this Node plus all of its children took in seconds.
const std::string _root_name
Name of the root node.
PerfID sectionID(const std::string §ion_name) const
Given a name return the PerfID The name of the section.
Use to hold the cumulative time and memory for each section, which comes from all of the PerfNodes th...
long int _children_memory
Amount of memory gained by children.
bool _destructing
Tell the print thread to teardown.
void printHeaviestSections(const ConsoleStream &console, const unsigned int num_sections)
Print out the heaviest sections that were timed.
std::string _name
The name.
std::vector< Item > _id_to_item
Vector of IDs to Items.
const ConsoleStream _console
An instance of helper class to write streams to the Console objects.
void push(const PerfID id)
Add a Node onto the end of the end of the current callstack.
FullTable treeTable(const unsigned int level, const bool heaviest=false)
Helper for building a VariadicTable that represents the tree.
The PerfGraph will hold the master list of all registered performance segments and the head PerfNode...
std::size_t convertBytes(std::size_t bytes, MemUnits unit)
convert bytes to selected unit prefix
PerfGraphRegistry & getPerfGraphRegistry()
Get the global PerfGraphRegistry singleton.
const PerfGraphSectionInfo & readSectionInfo(PerfID section_id) const
Special accessor just for PerfGraph so that no locking is needed in PerfGraph.
std::array< SectionIncrement, MAX_EXECUTION_LIST_SIZE > _execution_list
A circular buffer for holding the execution list, this is read by the printing loop.
std::atomic< bool > _live_print_active
Whether or not live printing is active.
void dataStore(std::ostream &stream, PerfGraph &perf_graph, void *)
std::size_t numSections() const
const PerfID _root_node_id
The id for the root node.