LCOV - code coverage report
Current view: top level - include/utils - PerfGraph.h (source / functions) Hit Total Coverage
Test: idaholab/moose framework: 8601ad Lines: 37 37 100.0 %
Date: 2025-07-18 13:27:08 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://mooseframework.inl.gov
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : #pragma once
      11             : 
      12             : // MOOSE Includes
      13             : #include "MooseTypes.h"
      14             : #include "PerfNode.h"
      15             : #include "IndirectSort.h"
      16             : #include "ConsoleStream.h"
      17             : #include "ConsoleStreamInterface.h"
      18             : #include "MooseError.h"
      19             : #include "MemoryUtils.h"
      20             : #include "PerfGraphRegistry.h"
      21             : 
      22             : // System Includes
      23             : #include <array>
      24             : #include <atomic>
      25             : #include <thread>
      26             : #include <future>
      27             : #include <mutex>
      28             : 
      29             : // Forward Declarations
      30             : class PerfGuard;
      31             : class PerfGraphLivePrint;
      32             : 
      33             : template <class... Ts>
      34             : class VariadicTable;
      35             : 
      36             : #define MOOSE_MAX_STACK_SIZE 100
      37             : #define MAX_EXECUTION_LIST_SIZE 10000
      38             : 
      39             : /**
      40             :  * The PerfGraph will hold the master list of all registered performance segments and
      41             :  * the head PerfNode
      42             :  */
      43             : class PerfGraph : protected ConsoleStreamInterface
      44             : {
      45             : public:
      46             :   using PerfGraphRegistry = moose::internal::PerfGraphRegistry;
      47             : 
      48             :   /**
      49             :    * For retrieving values
      50             :    */
      51             :   enum DataType
      52             :   {
      53             :     SELF,
      54             :     CHILDREN,
      55             :     TOTAL,
      56             :     SELF_AVG,
      57             :     CHILDREN_AVG,
      58             :     TOTAL_AVG,
      59             :     SELF_PERCENT,
      60             :     CHILDREN_PERCENT,
      61             :     TOTAL_PERCENT,
      62             :     SELF_MEMORY,
      63             :     CHILDREN_MEMORY,
      64             :     TOTAL_MEMORY,
      65             :     CALLS
      66             :   };
      67             : 
      68             :   /**
      69             :    * DataType in a MooseEnum for use in InputParameters in objects that query
      70             :    * the PerfGraph with sectionData.
      71             :    */
      72       15161 :   static MooseEnum dataTypeEnum()
      73             :   {
      74             :     return MooseEnum(
      75             :         "SELF CHILDREN TOTAL SELF_AVG CHILDREN_AVG TOTAL_AVG SELF_PERCENT CHILDREN_PERCENT "
      76       15161 :         "TOTAL_PERCENT SELF_MEMORY CHILDREN_MEMORY TOTAL_MEMORY CALLS");
      77             :   }
      78             : 
      79             :   /**
      80             :    * Create a new PerfGraph
      81             :    *
      82             :    * @param root_name The name of the root node
      83             :    * @param app The MooseApp this PerfGraph is for
      84             :    * @param live_all Whether every message should be printed
      85             :    * @param perf_graph_live Enable/disable PerfGraphLive (permanently)
      86             :    */
      87             :   PerfGraph(const std::string & root_name,
      88             :             MooseApp & app,
      89             :             const bool live_all,
      90             :             const bool perf_graph_live);
      91             : 
      92             :   /**
      93             :    * Destructor
      94             :    */
      95             :   ~PerfGraph();
      96             : 
      97             :   /**
      98             :    * Print the tree out
      99             :    *
     100             :    * @param console The output stream to output to
     101             :    * @param level The log level, the higher the number the more output you get
     102             :    */
     103             :   void print(const ConsoleStream & console, unsigned int level);
     104             : 
     105             :   /**
     106             :    * Print out the heaviest branch through the tree
     107             :    *
     108             :    * @param console The output stream to output to
     109             :    */
     110             :   void printHeaviestBranch(const ConsoleStream & console);
     111             : 
     112             :   /**
     113             :    * Print out the heaviest sections that were timed
     114             :    *
     115             :    * @param console The output stream to output to
     116             :    */
     117             :   void printHeaviestSections(const ConsoleStream & console, const unsigned int num_sections);
     118             : 
     119             :   /**
     120             :    * Whether or not timing is active
     121             :    *
     122             :    * When not active no timing information will be kept
     123             :    */
     124             :   bool active() const { return _active; }
     125             : 
     126             :   /**
     127             :    * Turn on or off timing
     128             :    */
     129          10 :   void setActive(bool active) { _active = active; }
     130             : 
     131             :   /**
     132             :    * Enables Live Print
     133             :    */
     134             :   void enableLivePrint();
     135             : 
     136             :   /**
     137             :    * Completely disables Live Print (cannot be restarted)
     138             :    */
     139             :   void disableLivePrint();
     140             : 
     141             :   /**
     142             :    * Forces all sections to be output live
     143             :    */
     144             :   void setLivePrintAll(bool active) { _live_print_all = active; }
     145             : 
     146             :   /**
     147             :    * Set the time limit before a message prints
     148             :    */
     149       62078 :   void setLiveTimeLimit(Real time_limit)
     150             :   {
     151       62078 :     _live_print_time_limit.store(time_limit, std::memory_order_relaxed);
     152       62078 :   }
     153             : 
     154             :   /**
     155             :    * Sert the memory limit before a message prints
     156             :    */
     157       62078 :   void setLiveMemoryLimit(unsigned int mem_limit)
     158             :   {
     159       62078 :     _live_print_mem_limit.store(mem_limit, std::memory_order_relaxed);
     160       62078 :   }
     161             : 
     162             :   /**
     163             :    * Gets a PerfGraph result pertaining to a section
     164             :    * @param type The result type to retrieve
     165             :    * @param section_name The name of the section
     166             :    * @param must_exist Whether not the section must exist; if false and the
     167             :    * section does not exist, returns 0, if true and the section does not exist,
     168             :    * exit with an error
     169             :    */
     170             :   Real
     171             :   sectionData(const DataType type, const std::string & section_name, const bool must_exist = true);
     172             : 
     173             :   /**
     174             :    * Updates the time section_time and time for all currently running nodes
     175             :    */
     176             :   void update();
     177             : 
     178             :   /**
     179             :    * @returns The MooseApp
     180             :    */
     181     3439252 :   MooseApp & mooseApp() { return _moose_app; }
     182             : 
     183             :   /**
     184             :    * @returns A constant reference to the root node
     185             :    */
     186     2014466 :   const PerfNode & rootNode() const { return *_root_node; }
     187             : 
     188             :   template <typename Functor>
     189             :   void treeRecurse(const Functor & act,
     190             :                    const unsigned int level = MOOSE_MAX_STACK_SIZE,
     191             :                    const bool heaviest = false) const;
     192             : 
     193             : protected:
     194             :   typedef VariadicTable<std::string,
     195             :                         unsigned long int,
     196             :                         Real,
     197             :                         Real,
     198             :                         Real,
     199             :                         long int,
     200             :                         Real,
     201             :                         Real,
     202             :                         Real,
     203             :                         long int>
     204             :       FullTable;
     205             : 
     206             :   typedef VariadicTable<std::string, unsigned long int, Real, Real, Real, long int> HeaviestTable;
     207             : 
     208             :   /**
     209             :    * Use to hold the cumulative time and memory for each section, which comes
     210             :    * from all of the PerfNodes that contribute to said section
     211             :    *
     212             :    * These will be filled by update()
     213             :    */
     214             :   struct CumulativeSectionInfo
     215             :   {
     216             :     /// Amount of time used within this section (without children)
     217             :     Real _self = 0.;
     218             : 
     219             :     /// Amount of time used by children
     220             :     Real _children = 0.;
     221             : 
     222             :     /// Total amount of time used
     223             :     Real _total = 0.;
     224             : 
     225             :     /// Number of times this section has been called
     226             :     unsigned long int _num_calls = 0;
     227             : 
     228             :     /// Amount of memory gained within this section (without children)
     229             :     long int _self_memory = 0;
     230             : 
     231             :     /// Amount of memory gained by children
     232             :     long int _children_memory = 0;
     233             : 
     234             :     /// Total memory gain for this section
     235             :     long int _total_memory = 0;
     236             :   };
     237             : 
     238             :   /**
     239             :    * The execution state of an increment.
     240             :    */
     241             :   enum IncrementState
     242             :   {
     243             :     /// Section just started running
     244             :     STARTED,
     245             : 
     246             :     /// This section has already started printing
     247             :     PRINTED,
     248             : 
     249             :     /// The section is complete
     250             :     FINISHED
     251             :   };
     252             : 
     253             :   /**
     254             :    * Use to hold an increment of time and memory for a section
     255             :    * This is used in the LivePrint capability.
     256             :    */
     257             :   class SectionIncrement
     258             :   {
     259             :   public:
     260   635522300 :     SectionIncrement()
     261   635522300 :       : _state(IncrementState::FINISHED),
     262   635522300 :         _print_stack_level(0),
     263   635522300 :         _num_dots(0),
     264   635522300 :         _time(std::chrono::seconds(0)),
     265   635522300 :         _memory(0),
     266   635522300 :         _beginning_num_printed(0)
     267             :     {
     268   635522300 :     }
     269             : 
     270             :     PerfID _id;
     271             : 
     272             :     /// Whether or not this increment is the start of an increment or
     273             :     /// the finishing of an increment.
     274             :     IncrementState _state;
     275             : 
     276             :     /// How much to indent this section
     277             :     unsigned int _print_stack_level;
     278             : 
     279             :     /// How many dots have been printed for this section
     280             :     unsigned int _num_dots;
     281             : 
     282             :     /// Either the starting time or final time depending on _state
     283             :     std::chrono::time_point<std::chrono::steady_clock> _time;
     284             : 
     285             :     /// Either the starting memory or final memory depending on _state
     286             :     long int _memory;
     287             : 
     288             :     /// The _console numPrinted() at the time this section was created
     289             :     unsigned long long int _beginning_num_printed;
     290             :   };
     291             : 
     292             :   /**
     293             :    * Add the information to the execution list
     294             :    *
     295             :    * Should only be called by push() and pop()
     296             :    */
     297             :   inline void addToExecutionList(const PerfID id,
     298             :                                  const IncrementState state,
     299             :                                  const std::chrono::time_point<std::chrono::steady_clock> time,
     300             :                                  const long int memory);
     301             : 
     302             :   /**
     303             :    * Add a Node onto the end of the end of the current callstack
     304             :    *
     305             :    * Note: only accessible by using PerfGuard!
     306             :    */
     307             :   void push(const PerfID id);
     308             : 
     309             :   /**
     310             :    * Remove a Node from the end of the current scope
     311             :    *
     312             :    * Note: only accessible by using PerfGuard!
     313             :    */
     314             :   void pop();
     315             : 
     316             :   /**
     317             :    * Updates the cumulative self/children/total time and memory for each section
     318             :    * across all nodes that contribute to said section in _cumulative_section_info
     319             :    *
     320             :    * Note: requires that the contents in each CumulativeSectionInfo in
     321             :    * _cumulative_section_info be initially resized and zeroed
     322             :    *
     323             :    * @param current_node The current node to work on
     324             :    */
     325             :   void recursivelyUpdate(const PerfNode & current_node);
     326             : 
     327             :   /// The MooseApp
     328             :   MooseApp & _moose_app;
     329             : 
     330             :   /// Whether or not to put everything in the perf graph
     331             :   bool _live_print_all;
     332             : 
     333             :   /// Whether or not live print is disabled (cannot be turned on again)
     334             :   bool _disable_live_print;
     335             : 
     336             :   /// The PerfGraphRegistry
     337             :   PerfGraphRegistry & _perf_graph_registry;
     338             : 
     339             :   /// This processor id
     340             :   const libMesh::processor_id_type _pid;
     341             : 
     342             :   /// Name of the root node
     343             :   const std::string _root_name;
     344             : 
     345             :   /// The id for the root node
     346             :   const PerfID _root_node_id;
     347             : 
     348             :   /// The root node of the graph
     349             :   const std::unique_ptr<PerfNode> _root_node;
     350             : 
     351             :   /// The current node position in the stack
     352             :   int _current_position;
     353             : 
     354             :   /// The full callstack.  Currently capped at a depth of 100
     355             :   std::array<PerfNode *, MOOSE_MAX_STACK_SIZE> _stack;
     356             : 
     357             :   /// A circular buffer for holding the execution list, this is read by the printing loop
     358             :   std::array<SectionIncrement, MAX_EXECUTION_LIST_SIZE> _execution_list;
     359             : 
     360             :   /// Where the print thread should start reading the execution list
     361             :   std::atomic<unsigned int> _execution_list_begin;
     362             : 
     363             :   /// Where the print thread should stop reading the execution list
     364             :   std::atomic<unsigned int> _execution_list_end;
     365             : 
     366             :   /// The cumulative time and memory for each section.  This is updated on update()
     367             :   /// Note that this is _total_ cumulative time/memory across every place
     368             :   /// that section is in the graph
     369             :   ///
     370             :   /// I'm making this a map so that we can give out references to the values
     371             :   /// The three values are: self, children
     372             :   /// The map is on std::string because we might need to be able to retrieve
     373             :   /// timing values in a "late binding" situation _before_ the section
     374             :   /// has been registered.
     375             :   std::unordered_map<std::string, CumulativeSectionInfo> _cumulative_section_info;
     376             : 
     377             :   /// Pointers into _cumulative_section_info indexed on PerfID
     378             :   /// This is here for convenience and speed so we don't need
     379             :   /// to iterate over the above map much - and it makes it
     380             :   /// easier to sort
     381             :   std::vector<CumulativeSectionInfo *> _cumulative_section_info_ptrs;
     382             : 
     383             :   /// Whether or not timing is active
     384             :   bool _active;
     385             : 
     386             :   /// The promise to the print thread that will signal when to stop
     387             :   std::promise<bool> _done;
     388             : 
     389             :   /// Tell the print thread to teardown
     390             :   bool _destructing;
     391             : 
     392             :   /// The mutex to use with a condition_variable predicate to guard _destructing
     393             :   std::mutex _destructing_mutex;
     394             : 
     395             :   /// The condition_variable to wake the print thread
     396             :   std::condition_variable _finished_section;
     397             : 
     398             :   /// The time limit before a message is printed (in seconds)
     399             :   std::atomic<Real> _live_print_time_limit;
     400             : 
     401             :   /// The memory limit before a message is printed (in MB)
     402             :   std::atomic<unsigned int> _live_print_mem_limit;
     403             : 
     404             :   /// The object that is doing live printing
     405             :   const std::unique_ptr<PerfGraphLivePrint> _live_print;
     406             : 
     407             :   /// The thread for printing sections as they execute
     408             :   std::thread _print_thread;
     409             : 
     410             :   // Here so PerfGuard is the only thing that can call push/pop
     411             :   friend class PerfGuard;
     412             :   friend class PerfGraphLivePrint;
     413             :   friend void dataStore(std::ostream &, PerfGraph &, void *);
     414             :   friend void dataLoad(std::istream &, PerfGraph &, void *);
     415             : 
     416             : private:
     417             :   /**
     418             :    * Helper for building a VariadicTable that represents the tree.
     419             :    *
     420             :    * @param level The level to print out below (<=)
     421             :    * @param heaviest Show only the heaviest branch
     422             :    */
     423             :   FullTable treeTable(const unsigned int level, const bool heaviest = false);
     424             : 
     425             :   template <typename Functor>
     426             :   void treeRecurseInternal(const PerfNode & node,
     427             :                            const Functor & act,
     428             :                            const unsigned int level,
     429             :                            const bool heaviest,
     430             :                            unsigned int current_depth) const;
     431             : };
     432             : 
     433             : template <typename Functor>
     434             : void
     435     8027458 : PerfGraph::treeRecurseInternal(const PerfNode & node,
     436             :                                const Functor & act,
     437             :                                const unsigned int level,
     438             :                                const bool heaviest,
     439             :                                unsigned int current_depth) const
     440             : {
     441             :   mooseAssert(_perf_graph_registry.sectionExists(node.id()), "Unable to find section name!");
     442             : 
     443     8027458 :   const auto & current_section_info = _perf_graph_registry.readSectionInfo(node.id());
     444     8027458 :   if (current_section_info._level <= level)
     445             :   {
     446             :     mooseAssert(!_cumulative_section_info_ptrs.empty(), "update() must be run before treeRecurse!");
     447      809389 :     act(node, current_section_info, current_depth++);
     448             :   }
     449             : 
     450     8027458 :   if (heaviest)
     451             :   {
     452         188 :     const PerfNode * heaviest_child = nullptr;
     453        1118 :     for (const auto & child_it : node.children())
     454             :     {
     455         930 :       const auto & current_child = *child_it.second;
     456             : 
     457         930 :       if (!heaviest_child || (current_child.totalTime() > heaviest_child->totalTime()))
     458         343 :         heaviest_child = &current_child;
     459             :     }
     460             : 
     461         188 :     if (heaviest_child)
     462         167 :       treeRecurseInternal(*heaviest_child, act, level, true, current_depth);
     463             :   }
     464             :   else
     465             :   {
     466    16006735 :     for (const auto & child_it : node.children())
     467     7979465 :       treeRecurseInternal(*child_it.second, act, level, false, current_depth);
     468             :   }
     469     8027458 : }
     470             : 
     471             : template <typename Functor>
     472             : void
     473       47826 : PerfGraph::treeRecurse(const Functor & act,
     474             :                        const unsigned int level /* = MOOSE_MAX_STACK_SIZE */,
     475             :                        const bool heaviest /* = false */) const
     476             : {
     477             :   mooseAssert(_root_node, "Root node does not exist; calling this too early");
     478       47826 :   treeRecurseInternal(*_root_node, act, level, heaviest, 0);
     479       47826 : }
     480             : 
     481             : void dataStore(std::ostream & stream, PerfGraph & perf_graph, void * context);
     482             : void dataLoad(std::istream & stream, PerfGraph & perf_graph, void * context);

Generated by: LCOV version 1.14