31 const bool perf_graph_live)
34 _live_print_all(live_all),
35 _disable_live_print(!perf_graph_live),
37 _pid(app.comm().rank()),
38 _root_name(root_name),
39 _root_node_id(_perf_graph_registry.registerSection(root_name, 0)),
40 _root_node(
std::make_unique<
PerfNode>(_root_node_id)),
41 _current_position(-1),
43 _execution_list_begin(0),
44 _execution_list_end(0),
47 _live_print_time_limit(5.0),
48 _live_print_mem_limit(100),
94 const std::string & section_name,
95 const bool must_exist )
99 const auto section_it =
109 mooseError(
"Unknown PerfGraph section name \"",
111 "\" in PerfGraph::sectionData().\nIf you are attempting to retrieve the root use " 125 return section_info.
_self;
129 return section_info.
_total;
137 return 100. * (section_info.
_self / app_time);
139 return 100. * (section_info.
_children / app_time);
141 return 100. * (section_info.
_total / app_time);
156 const std::chrono::time_point<std::chrono::steady_clock> time,
157 const long int memory)
161 section_increment._id = id;
162 section_increment._state = state;
163 section_increment._time = time;
164 section_increment._memory = memory;
174 if (next_execution_list_end >= MAX_EXECUTION_LIST_SIZE)
175 next_execution_list_end = 0;
200 long int start_memory = 0;
210 auto current_time = std::chrono::steady_clock::now();
220 mooseError(
"PerfGraph is out of stack space!");
236 auto current_time = std::chrono::steady_clock::now();
243 long int current_memory = 0;
252 current_node->addTimeAndMemory(current_time, current_memory);
261 addToExecutionList(current_node->id(), IncrementState::FINISHED, current_time, current_memory);
278 auto now = std::chrono::steady_clock::now();
288 node->addTimeAndMemory(now, now_memory);
289 node->setStartTimeAndMemory(now, now_memory);
295 auto & section_time = section_time_it.second;
297 section_time._num_calls = 0;
298 section_time._self = 0.;
299 section_time._children = 0.;
300 section_time._total = 0.;
301 section_time._self_memory = 0;
302 section_time._children_memory = 0;
303 section_time._total_memory = 0.;
332 section_time._num_calls += current_node.
numCalls();
334 section_time._self_memory += current_node.
selfMemory();
336 section_time._total_memory += current_node.
totalMemory();
338 for (
auto & child_it : current_node.
children())
372 vtable.setColumnPrecision({
385 auto act = [
this, &vtable](
const PerfNode & node,
387 const unsigned int depth)
389 vtable.addRow(std::string(depth * 2,
' ') + section_info._name,
408 console <<
"\nPerformance Graph:\n";
415 console <<
"\nHeaviest Branch:\n";
424 console <<
"\nHeaviest Sections:\n";
427 std::vector<size_t> sorted;
434 return lhs->_self > rhs->_self;
444 HeaviestTable vtable({
"Section",
"Calls",
"Self(s)",
"Avg.",
"%",
"Mem(MB)"}, 10);
454 vtable.setColumnPrecision({
464 "update() must be run before printHeaviestSections()!");
470 for (
unsigned int i = 0; i < num_sections; i++)
481 entry._self /
static_cast<Real>(entry._num_calls),
482 100. * entry._self / total_root_time,
486 vtable.print(console);
507 std::vector<moose::internal::PerfGraphSectionInfo> recovered_section_info;
508 dataLoad(stream, recovered_section_info,
nullptr);
509 for (
const auto &
info : recovered_section_info)
511 if (
info._live_message.size())
long int totalMemory() const
Get the amount of memory added by this node.
void recursivelyUpdate(const PerfNode ¤t_node)
Updates the cumulative self/children/total time and memory for each section across all nodes that con...
Real selfTimeSec() const
Get the time this node took in seconds.
PerfGraphRegistry & _perf_graph_registry
The PerfGraphRegistry.
A helper class for re-directing output streams to Console output objects form MooseObjects.
long int _total_memory
Total memory gain for this section.
long int _self_memory
Amount of memory gained within this section (without children)
unsigned long int _num_calls
Number of times this section has been called.
PerfGraph(const std::string &root_name, MooseApp &app, const bool live_all, const bool perf_graph_live)
Create a new PerfGraph.
std::string _live_message
Message to print while the section is running.
bool sectionExists(const std::string §ion_name) const
Whether or not a section with that name has been registered The name of the section.
void setStartTimeAndMemory(const std::chrono::time_point< std::chrono::steady_clock > time, const long int memory)
Set the current start time.
Real sectionData(const DataType type, const std::string §ion_name, const bool must_exist=true)
Gets a PerfGraph result pertaining to a section.
long int selfMemory() const
Get the amount of memory added by this node.
void print(StreamType &stream)
Pretty print the table of data.
bool getMemoryStats(Stats &stats)
get all memory stats for the current process stats The Stats object to fill with the data ...
std::thread _print_thread
The thread for printing sections as they execute.
void mooseError(Args &&... args)
Emit an error message with the given stringified, concatenated args and terminate the application...
Real selfTimeAvg() const
The average time this node took in seconds.
std::size_t _physical_memory
DataType
For retrieving values.
A class for "pretty printing" a table of data.
void indirectSort(RandomAccessIterator beg, RandomAccessIterator end, std::vector< size_t > &b)
const std::unique_ptr< PerfGraphLivePrint > _live_print
The object that is doing live printing.
Real _total
Total amount of time used.
Base class for MOOSE-based applications.
std::atomic< unsigned int > _execution_list_end
Where the print thread should stop reading the execution list.
long int childrenMemory() const
Get the amount of memory added by this node.
IncrementState
The execution state of an increment.
bool _live_print_all
Whether or not to put everything in the perf graph.
Real totalTimeSec() const
The time this Node plus all of its children took in seconds.
Real _self
Amount of time used within this section (without children)
Used to hold metadata about the registered sections Note: this is a class instead of a struct because...
std::condition_variable _finished_section
The condition_variable to wake the print thread.
void update()
Updates the time section_time and time for all currently running nodes.
void incrementNumCalls()
Increments the number of calls.
void addToExecutionList(const PerfID id, const IncrementState state, const std::chrono::time_point< std::chrono::steady_clock > time, const long int memory)
Add the information to the execution list.
std::vector< CumulativeSectionInfo * > _cumulative_section_info_ptrs
Pointers into _cumulative_section_info indexed on PerfID This is here for convenience and speed so we...
unsigned long int numCalls() const
Get the number of times this node was called.
void treeRecurse(const Functor &act, const unsigned int level=MOOSE_MAX_STACK_SIZE, const bool heaviest=false) const
unsigned long long int numPrinted() const
The number of times something has been printed.
const std::unique_ptr< PerfNode > _root_node
The root node of the graph.
const std::map< PerfID, std::unique_ptr< PerfNode > > & children() const
Get the children.
Real childrenTimeSec() const
The time this node's children took in seconds.
PerfID id() const
Get the ID of this Node.
void dataLoad(std::istream &stream, PerfGraph &perf_graph, void *)
std::mutex _destructing_mutex
The mutex to use with a condition_variable predicate to guard _destructing.
std::unordered_map< std::string, CumulativeSectionInfo > _cumulative_section_info
The cumulative time and memory for each section.
An inteface for the _console for outputting to the Console object.
std::deque< Item > _id_to_item
Vector of IDs to Items.
bool _active
Whether or not timing is active.
This is effectively a functor that runs on a separate thread and watches the state of the call stack ...
PerfID registerSection(const std::string §ion_name, const unsigned int level)
Call to register a named section for timing.
void disableLivePrint()
Completely disables Live Print (cannot be restarted)
int _current_position
The current node position in the stack.
Real _children
Amount of time used by children.
void pop()
Remove a Node from the end of the current scope.
const PerfGraphSectionInfo & sectionInfo(const PerfID section_id) const
Given a PerfID return the PerfGraphSectionInfo The ID.
bool _disable_live_print
Whether or not live print is disabled (cannot be turned on again)
void setColumnFormat(const std::vector< VariadicTableColumnFormat > &column_format)
Set how to format numbers for each column.
void print(const ConsoleStream &console, unsigned int level)
Print the tree out.
std::array< PerfNode *, MOOSE_MAX_STACK_SIZE > _stack
The full callstack. Currently capped at a depth of 100.
void printHeaviestBranch(const ConsoleStream &console)
Print out the heaviest branch through the tree.
DIE A HORRIBLE DEATH HERE typedef LIBMESH_DEFAULT_SCALAR_TYPE Real
Real totalTimeAvg() const
The average time this Node plus all of its children took in seconds.
const std::string _root_name
Name of the root node.
PerfID sectionID(const std::string §ion_name) const
Given a name return the PerfID The name of the section.
Use to hold the cumulative time and memory for each section, which comes from all of the PerfNodes th...
const libMesh::processor_id_type _pid
This processor id.
long int _children_memory
Amount of memory gained by children.
bool _destructing
Tell the print thread to teardown.
void printHeaviestSections(const ConsoleStream &console, const unsigned int num_sections)
Print out the heaviest sections that were timed.
std::string _name
The name.
const ConsoleStream _console
An instance of helper class to write streams to the Console objects.
void push(const PerfID id)
Add a Node onto the end of the end of the current callstack.
FullTable treeTable(const unsigned int level, const bool heaviest=false)
Helper for building a VariadicTable that represents the tree.
The PerfGraph will hold the master list of all registered performance segments and the head PerfNode...
std::size_t convertBytes(std::size_t bytes, MemUnits unit)
convert bytes to selected unit prefix
PerfGraphRegistry & getPerfGraphRegistry()
Get the global PerfGraphRegistry singleton.
const PerfGraphSectionInfo & readSectionInfo(PerfID section_id) const
Special accessor just for PerfGraph so that no locking is needed in PerfGraph.
std::array< SectionIncrement, MAX_EXECUTION_LIST_SIZE > _execution_list
A circular buffer for holding the execution list, this is read by the printing loop.
void enableLivePrint()
Enables Live Print.
void dataStore(std::ostream &stream, PerfGraph &perf_graph, void *)
std::size_t numSections() const
const PerfID _root_node_id
The id for the root node.