LCOV - code coverage report
Current view: top level - include/utils - PerfGraph.h (source / functions) Hit Total Coverage
Test: idaholab/moose framework: #32971 (54bef8) with base c6cf66 Lines: 38 38 100.0 %
Date: 2026-05-29 20:35:17 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://mooseframework.inl.gov
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : #pragma once
      11             : 
      12             : // MOOSE Includes
      13             : #include "MooseTypes.h"
      14             : #include "PerfNode.h"
      15             : #include "IndirectSort.h"
      16             : #include "ConsoleStream.h"
      17             : #include "ConsoleStreamInterface.h"
      18             : #include "MooseError.h"
      19             : #include "MemoryUtils.h"
      20             : #include "PerfGraphRegistry.h"
      21             : 
      22             : // System Includes
      23             : #include <array>
      24             : #include <atomic>
      25             : #include <thread>
      26             : #include <future>
      27             : #include <mutex>
      28             : 
      29             : // Forward Declarations
      30             : class PerfGuard;
      31             : class PerfGraphLivePrint;
      32             : 
      33             : template <class... Ts>
      34             : class VariadicTable;
      35             : 
      36             : #define MOOSE_MAX_STACK_SIZE 100
      37             : #define MAX_EXECUTION_LIST_SIZE 10000
      38             : 
      39             : /**
      40             :  * The PerfGraph will hold the master list of all registered performance segments and
      41             :  * the head PerfNode
      42             :  */
      43             : class PerfGraph : protected ConsoleStreamInterface
      44             : {
      45             : public:
      46             :   using PerfGraphRegistry = moose::internal::PerfGraphRegistry;
      47             : 
      48             :   /**
      49             :    * For retrieving values
      50             :    */
      51             :   enum DataType
      52             :   {
      53             :     SELF,
      54             :     CHILDREN,
      55             :     TOTAL,
      56             :     SELF_AVG,
      57             :     CHILDREN_AVG,
      58             :     TOTAL_AVG,
      59             :     SELF_PERCENT,
      60             :     CHILDREN_PERCENT,
      61             :     TOTAL_PERCENT,
      62             :     SELF_MEMORY,
      63             :     CHILDREN_MEMORY,
      64             :     TOTAL_MEMORY,
      65             :     CALLS
      66             :   };
      67             : 
      68             :   /**
      69             :    * DataType in a MooseEnum for use in InputParameters in objects that query
      70             :    * the PerfGraph with sectionData.
      71             :    */
      72        3907 :   static MooseEnum dataTypeEnum()
      73             :   {
      74             :     return MooseEnum(
      75             :         "SELF CHILDREN TOTAL SELF_AVG CHILDREN_AVG TOTAL_AVG SELF_PERCENT CHILDREN_PERCENT "
      76       15628 :         "TOTAL_PERCENT SELF_MEMORY CHILDREN_MEMORY TOTAL_MEMORY CALLS");
      77             :   }
      78             : 
      79             :   /**
      80             :    * Create a new PerfGraph
      81             :    *
      82             :    * @param root_name The name of the root node
      83             :    * @param app The MooseApp this PerfGraph is for
      84             :    * @param live_all Whether every message should be printed
      85             :    * @param perf_graph_live Enable/disable PerfGraphLive (permanently)
      86             :    */
      87             :   PerfGraph(const std::string & root_name,
      88             :             MooseApp & app,
      89             :             const bool live_all,
      90             :             const bool perf_graph_live);
      91             : 
      92             :   /**
      93             :    * Destructor
      94             :    */
      95             :   ~PerfGraph();
      96             : 
      97             :   /**
      98             :    * Print the tree out
      99             :    *
     100             :    * @param console The output stream to output to
     101             :    * @param level The log level, the higher the number the more output you get
     102             :    */
     103             :   void print(const ConsoleStream & console, unsigned int level);
     104             : 
     105             :   /**
     106             :    * Print out the heaviest branch through the tree
     107             :    *
     108             :    * @param console The output stream to output to
     109             :    */
     110             :   void printHeaviestBranch(const ConsoleStream & console);
     111             : 
     112             :   /**
     113             :    * Print out the heaviest sections that were timed
     114             :    *
     115             :    * @param console The output stream to output to
     116             :    */
     117             :   void printHeaviestSections(const ConsoleStream & console, const unsigned int num_sections);
     118             : 
     119             :   /**
     120             :    * Whether or not timing is active
     121             :    *
     122             :    * When not active no timing information will be kept
     123             :    */
     124             :   bool active() const { return _active; }
     125             : 
     126             :   /**
     127             :    * Turn on or off timing
     128             :    */
     129           9 :   void setActive(bool active) { _active = active; }
     130             : 
     131             :   /**
     132             :    * Enables Live Print
     133             :    */
     134             :   void enableLivePrint();
     135             : 
     136             :   /**
     137             :    * Completely disables Live Print (cannot be restarted)
     138             :    */
     139             :   void disableLivePrint();
     140             : 
     141             :   /**
     142             :    * Forces all sections to be output live
     143             :    */
     144             :   void setLivePrintAll(bool active) { _live_print_all = active; }
     145             : 
     146             :   /**
     147             :    * Set the time limit before a message prints
     148             :    */
     149       65924 :   void setLiveTimeLimit(Real time_limit)
     150             :   {
     151       65924 :     _live_print_time_limit.store(time_limit, std::memory_order_relaxed);
     152       65924 :   }
     153             : 
     154             :   /**
     155             :    * Sert the memory limit before a message prints
     156             :    */
     157       65924 :   void setLiveMemoryLimit(unsigned int mem_limit)
     158             :   {
     159       65924 :     _live_print_mem_limit.store(mem_limit, std::memory_order_relaxed);
     160       65924 :   }
     161             : 
     162             :   /**
     163             :    * Gets a PerfGraph result pertaining to a section
     164             :    * @param type The result type to retrieve
     165             :    * @param section_name The name of the section
     166             :    * @param must_exist Whether not the section must exist; if false and the
     167             :    * section does not exist, returns 0, if true and the section does not exist,
     168             :    * exit with an error
     169             :    */
     170             :   Real
     171             :   sectionData(const DataType type, const std::string & section_name, const bool must_exist = true);
     172             : 
     173             :   /**
     174             :    * Updates the time section_time and time for all currently running nodes
     175             :    */
     176             :   void update();
     177             : 
     178             :   /**
     179             :    * Get the maximum memory allocation in MB.
     180             :    *
     181             :    * This is thread safe.
     182             :    */
     183   171731170 :   std::size_t getMaxMemory() const { return _max_memory; }
     184             : 
     185             :   /**
     186             :    * @returns The MooseApp
     187             :    */
     188     3758759 :   MooseApp & mooseApp() { return _moose_app; }
     189             : 
     190             :   /**
     191             :    * @returns A constant reference to the root node
     192             :    */
     193     2068749 :   const PerfNode & rootNode() const { return *_root_node; }
     194             : 
     195             :   template <typename Functor>
     196             :   void treeRecurse(const Functor & act,
     197             :                    const unsigned int level = MOOSE_MAX_STACK_SIZE,
     198             :                    const bool heaviest = false) const;
     199             : 
     200             : protected:
     201             :   typedef VariadicTable<std::string,
     202             :                         unsigned long int,
     203             :                         Real,
     204             :                         Real,
     205             :                         Real,
     206             :                         long int,
     207             :                         Real,
     208             :                         Real,
     209             :                         Real,
     210             :                         long int>
     211             :       FullTable;
     212             : 
     213             :   typedef VariadicTable<std::string, unsigned long int, Real, Real, Real, long int> HeaviestTable;
     214             : 
     215             :   /**
     216             :    * Use to hold the cumulative time and memory for each section, which comes
     217             :    * from all of the PerfNodes that contribute to said section
     218             :    *
     219             :    * These will be filled by update()
     220             :    */
     221             :   struct CumulativeSectionInfo
     222             :   {
     223             :     /// Amount of time used within this section (without children)
     224             :     Real _self = 0.;
     225             : 
     226             :     /// Amount of time used by children
     227             :     Real _children = 0.;
     228             : 
     229             :     /// Total amount of time used
     230             :     Real _total = 0.;
     231             : 
     232             :     /// Number of times this section has been called
     233             :     unsigned long int _num_calls = 0;
     234             : 
     235             :     /// Amount of memory gained within this section (without children)
     236             :     long int _self_memory = 0;
     237             : 
     238             :     /// Amount of memory gained by children
     239             :     long int _children_memory = 0;
     240             : 
     241             :     /// Total memory gain for this section
     242             :     long int _total_memory = 0;
     243             :   };
     244             : 
     245             :   /**
     246             :    * The execution state of an increment.
     247             :    */
     248             :   enum IncrementState
     249             :   {
     250             :     /// Section just started running
     251             :     STARTED,
     252             : 
     253             :     /// This section has already started printing
     254             :     PRINTED,
     255             : 
     256             :     /// The section is complete
     257             :     FINISHED
     258             :   };
     259             : 
     260             :   /**
     261             :    * Use to hold an increment of time and memory for a section
     262             :    * This is used in the LivePrint capability.
     263             :    */
     264             :   class SectionIncrement
     265             :   {
     266             :   public:
     267   676649500 :     SectionIncrement()
     268   676649500 :       : _state(IncrementState::FINISHED),
     269   676649500 :         _print_stack_level(0),
     270   676649500 :         _num_dots(0),
     271   676649500 :         _time(std::chrono::seconds(0)),
     272   676649500 :         _memory(0),
     273   676649500 :         _beginning_num_printed(0)
     274             :     {
     275   676649500 :     }
     276             : 
     277             :     PerfID _id;
     278             : 
     279             :     /// Whether or not this increment is the start of an increment or
     280             :     /// the finishing of an increment.
     281             :     IncrementState _state;
     282             : 
     283             :     /// How much to indent this section
     284             :     unsigned int _print_stack_level;
     285             : 
     286             :     /// How many dots have been printed for this section
     287             :     unsigned int _num_dots;
     288             : 
     289             :     /// Either the starting time or final time depending on _state
     290             :     std::chrono::time_point<std::chrono::steady_clock> _time;
     291             : 
     292             :     /// Either the starting memory or final memory depending on _state
     293             :     long int _memory;
     294             : 
     295             :     /// The _console numPrinted() at the time this section was created
     296             :     unsigned long long int _beginning_num_printed;
     297             :   };
     298             : 
     299             :   /**
     300             :    * Add the information to the execution list
     301             :    *
     302             :    * Should only be called by push() and pop()
     303             :    */
     304             :   inline void addToExecutionList(const PerfID id,
     305             :                                  const IncrementState state,
     306             :                                  const std::chrono::time_point<std::chrono::steady_clock> time,
     307             :                                  const long int memory);
     308             : 
     309             :   /**
     310             :    * Add a Node onto the end of the end of the current callstack
     311             :    *
     312             :    * Note: only accessible by using PerfGuard!
     313             :    */
     314             :   void push(const PerfID id);
     315             : 
     316             :   /**
     317             :    * Remove a Node from the end of the current scope
     318             :    *
     319             :    * Note: only accessible by using PerfGuard!
     320             :    */
     321             :   void pop();
     322             : 
     323             :   /**
     324             :    * Updates the cumulative self/children/total time and memory for each section
     325             :    * across all nodes that contribute to said section in _cumulative_section_info
     326             :    *
     327             :    * Note: requires that the contents in each CumulativeSectionInfo in
     328             :    * _cumulative_section_info be initially resized and zeroed
     329             :    *
     330             :    * @param current_node The current node to work on
     331             :    */
     332             :   void recursivelyUpdate(const PerfNode & current_node);
     333             : 
     334             :   /// The MooseApp
     335             :   MooseApp & _moose_app;
     336             : 
     337             :   /// Whether or not to put everything in the perf graph
     338             :   bool _live_print_all;
     339             : 
     340             :   /// Whether or not live print is disabled (cannot be turned on again)
     341             :   bool _disable_live_print;
     342             : 
     343             :   /// The PerfGraphRegistry
     344             :   PerfGraphRegistry & _perf_graph_registry;
     345             : 
     346             :   /// This processor id
     347             :   const libMesh::processor_id_type _pid;
     348             : 
     349             :   /// Name of the root node
     350             :   const std::string _root_name;
     351             : 
     352             :   /// The id for the root node
     353             :   const PerfID _root_node_id;
     354             : 
     355             :   /// The root node of the graph
     356             :   const std::unique_ptr<PerfNode> _root_node;
     357             : 
     358             :   /// The current node position in the stack
     359             :   int _current_position;
     360             : 
     361             :   /// The full callstack.  Currently capped at a depth of 100
     362             :   std::array<PerfNode *, MOOSE_MAX_STACK_SIZE> _stack;
     363             : 
     364             :   /// A circular buffer for holding the execution list, this is read by the printing loop
     365             :   std::array<SectionIncrement, MAX_EXECUTION_LIST_SIZE> _execution_list;
     366             : 
     367             :   /// Where the print thread should start reading the execution list
     368             :   std::atomic<unsigned int> _execution_list_begin;
     369             : 
     370             :   /// Where the print thread should stop reading the execution list
     371             :   std::atomic<unsigned int> _execution_list_end;
     372             : 
     373             :   /// The cumulative time and memory for each section.  This is updated on update()
     374             :   /// Note that this is _total_ cumulative time/memory across every place
     375             :   /// that section is in the graph
     376             :   ///
     377             :   /// I'm making this a map so that we can give out references to the values
     378             :   /// The three values are: self, children
     379             :   /// The map is on std::string because we might need to be able to retrieve
     380             :   /// timing values in a "late binding" situation _before_ the section
     381             :   /// has been registered.
     382             :   std::unordered_map<std::string, CumulativeSectionInfo> _cumulative_section_info;
     383             : 
     384             :   /// Pointers into _cumulative_section_info indexed on PerfID
     385             :   /// This is here for convenience and speed so we don't need
     386             :   /// to iterate over the above map much - and it makes it
     387             :   /// easier to sort
     388             :   std::vector<CumulativeSectionInfo *> _cumulative_section_info_ptrs;
     389             : 
     390             :   /// Maximum memory encountered during push and pop
     391             :   std::atomic<std::size_t> _max_memory;
     392             : 
     393             :   /// Whether or not timing is active
     394             :   bool _active;
     395             : 
     396             :   /// The promise to the print thread that will signal when to stop
     397             :   std::promise<bool> _done;
     398             : 
     399             :   /// Tell the print thread to teardown
     400             :   bool _destructing;
     401             : 
     402             :   /// The mutex to use with a condition_variable predicate to guard _destructing
     403             :   std::mutex _destructing_mutex;
     404             : 
     405             :   /// The condition_variable to wake the print thread
     406             :   std::condition_variable _finished_section;
     407             : 
     408             :   /// The time limit before a message is printed (in seconds)
     409             :   std::atomic<Real> _live_print_time_limit;
     410             : 
     411             :   /// The memory limit before a message is printed (in MB)
     412             :   std::atomic<unsigned int> _live_print_mem_limit;
     413             : 
     414             :   /// The object that is doing live printing
     415             :   const std::unique_ptr<PerfGraphLivePrint> _live_print;
     416             : 
     417             :   /// The thread for printing sections as they execute
     418             :   std::thread _print_thread;
     419             : 
     420             :   // Here so PerfGuard is the only thing that can call push/pop
     421             :   friend class PerfGuard;
     422             :   friend class PerfGraphLivePrint;
     423             :   friend void dataStore(std::ostream &, PerfGraph &, void *);
     424             :   friend void dataLoad(std::istream &, PerfGraph &, void *);
     425             : 
     426             : private:
     427             :   /**
     428             :    * Helper for building a VariadicTable that represents the tree.
     429             :    *
     430             :    * @param level The level to print out below (<=)
     431             :    * @param heaviest Show only the heaviest branch
     432             :    */
     433             :   FullTable treeTable(const unsigned int level, const bool heaviest = false);
     434             : 
     435             :   template <typename Functor>
     436             :   void treeRecurseInternal(const PerfNode & node,
     437             :                            const Functor & act,
     438             :                            const unsigned int level,
     439             :                            const bool heaviest,
     440             :                            unsigned int current_depth) const;
     441             : 
     442             :   /**
     443             :    * Update _max_memory if current_memory > _max_memory.
     444             :    */
     445             :   void updateMaxMemory(const std::size_t current_memory);
     446             : };
     447             : 
     448             : template <typename Functor>
     449             : void
     450     8835609 : PerfGraph::treeRecurseInternal(const PerfNode & node,
     451             :                                const Functor & act,
     452             :                                const unsigned int level,
     453             :                                const bool heaviest,
     454             :                                unsigned int current_depth) const
     455             : {
     456             :   mooseAssert(_perf_graph_registry.sectionExists(node.id()), "Unable to find section name!");
     457             : 
     458     8835609 :   const auto & current_section_info = _perf_graph_registry.readSectionInfo(node.id());
     459     8835609 :   if (current_section_info._level <= level)
     460             :   {
     461             :     mooseAssert(!_cumulative_section_info_ptrs.empty(), "update() must be run before treeRecurse!");
     462      666378 :     act(node, current_section_info, current_depth++);
     463             :   }
     464             : 
     465     8835609 :   if (heaviest)
     466             :   {
     467         174 :     const PerfNode * heaviest_child = nullptr;
     468        1063 :     for (const auto & child_it : node.children())
     469             :     {
     470         889 :       const auto & current_child = *child_it.second;
     471             : 
     472         889 :       if (!heaviest_child || (current_child.totalTime() > heaviest_child->totalTime()))
     473         297 :         heaviest_child = &current_child;
     474             :     }
     475             : 
     476         174 :     if (heaviest_child)
     477         154 :       treeRecurseInternal(*heaviest_child, act, level, true, current_depth);
     478             :   }
     479             :   else
     480             :   {
     481    17617544 :     for (const auto & child_it : node.children())
     482     8782109 :       treeRecurseInternal(*child_it.second, act, level, false, current_depth);
     483             :   }
     484     8835609 : }
     485             : 
     486             : template <typename Functor>
     487             : void
     488       53346 : PerfGraph::treeRecurse(const Functor & act,
     489             :                        const unsigned int level /* = MOOSE_MAX_STACK_SIZE */,
     490             :                        const bool heaviest /* = false */) const
     491             : {
     492             :   mooseAssert(_root_node, "Root node does not exist; calling this too early");
     493       53346 :   treeRecurseInternal(*_root_node, act, level, heaviest, 0);
     494       53346 : }
     495             : 
     496             : void dataStore(std::ostream & stream, PerfGraph & perf_graph, void * context);
     497             : void dataLoad(std::istream & stream, PerfGraph & perf_graph, void * context);

Generated by: LCOV version 1.14