LCOV - code coverage report
Current view: top level - include/utils - perf_log.h (source / functions) Hit Total Coverage
Test: libMesh/libmesh: #4229 (6a9aeb) with base 727f46 Lines: 70 83 84.3 %
Date: 2025-08-19 19:27:09 Functions: 12 12 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // The libMesh Finite Element Library.
       2             : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
       3             : 
       4             : // This library is free software; you can redistribute it and/or
       5             : // modify it under the terms of the GNU Lesser General Public
       6             : // License as published by the Free Software Foundation; either
       7             : // version 2.1 of the License, or (at your option) any later version.
       8             : 
       9             : // This library is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             : // Lesser General Public License for more details.
      13             : 
      14             : // You should have received a copy of the GNU Lesser General Public
      15             : // License along with this library; if not, write to the Free Software
      16             : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      17             : 
      18             : 
      19             : 
      20             : #ifndef LIBMESH_PERFLOG_H
      21             : #define LIBMESH_PERFLOG_H
      22             : 
      23             : 
      24             : // Local includes
      25             : #include "libmesh/libmesh_common.h"
      26             : 
      27             : // C++ includes
      28             : #include <cstddef>
      29             : #include <map>
      30             : #include <stack>
      31             : #include <string>
      32             : #include <vector>
      33             : #include <deque>
      34             : #include <memory>
      35             : #ifdef LIBMESH_HAVE_SYS_TIME_H
      36             : #include <sys/time.h> // gettimeofday() on Unix
      37             : #endif
      38             : #ifndef LIBMESH_HAVE_GETTIMEOFDAY
      39             : #include "libmesh/win_gettimeofday.h" // gettimeofday() on Windows
      40             : #endif
      41             : 
      42             : namespace libMesh
      43             : {
      44             : 
      45             : /**
      46             :  * The \p PerfData class simply contains the performance
      47             :  * data that is recorded for individual events.
      48             :  *
      49             :  * \author Benjamin Kirk
      50             :  * \date 2003
      51             :  * \brief Data object managed by PerfLog
      52             :  */
      53             : class PerfData
      54             : {
      55             : public:
      56             : 
      57             :   /**
      58             :    * Constructor.  Initializes data to be empty.
      59             :    */
      60      101284 :   PerfData () :
      61       49041 :     tot_time(0.),
      62       49041 :     tot_time_incl_sub(0.),
      63       49041 :     tstart(),
      64       49041 :     tstart_incl_sub(),
      65       49041 :     count(0),
      66       49041 :     open(false),
      67      101284 :     called_recursively(0)
      68       34252 :   {}
      69             : 
      70             : 
      71             :   /**
      72             :    * Total time spent in this event.
      73             :    */
      74             :   double tot_time;
      75             : 
      76             :   /**
      77             :    * Total time spent in this event, including sub-events.
      78             :    */
      79             :   double tot_time_incl_sub;
      80             : 
      81             :   /**
      82             :    * Structure defining when the event
      83             :    * was last started.
      84             :    */
      85             :   struct timeval tstart;
      86             : 
      87             :   /**
      88             :    * Structure defining when the event
      89             :    * was last started, including sub-events.
      90             :    */
      91             :   struct timeval tstart_incl_sub;
      92             : 
      93             :   /**
      94             :    * The number of times this event has
      95             :    * been executed
      96             :    */
      97             :   unsigned int count;
      98             : 
      99             :   /**
     100             :    * Flag indicating if we are currently
     101             :    * monitoring this event.  Should only
     102             :    * be true while the event is executing.
     103             :    */
     104             :   bool open;
     105             : 
     106             :   void   start ();
     107             :   void   restart ();
     108             :   double pause ();
     109             :   double pause_for(PerfData & other);
     110             :   double stopit ();
     111             : 
     112             :   /**
     113             :    * Sums timing results from \p other
     114             :    */
     115        1382 :   PerfData & operator += (const PerfData & other)
     116             :   {
     117        1382 :     libmesh_assert(!open);
     118        2764 :     tot_time += other.tot_time;
     119        2764 :     tot_time_incl_sub += other.tot_time_incl_sub;
     120        2764 :     count += other.count;
     121             : 
     122        1382 :     return *this;
     123             :   }
     124             : 
     125             :   int called_recursively;
     126             : 
     127             : protected:
     128             :   double stop_or_pause(const bool do_stop);
     129             : };
     130             : 
     131             : 
     132             : 
     133             : 
     134             : /**
     135             :  * The \p PerfLog class allows monitoring of specific events.
     136             :  * An event is defined by a unique string that functions as
     137             :  * a label.  Each time the event is executed data are recorded.
     138             :  * This class is particularly useful for finding performance
     139             :  * bottlenecks.
     140             :  *
     141             :  * \author Benjamin Kirk
     142             :  * \date 2003
     143             :  * \brief Responsible for timing and summarizing events.
     144             :  */
     145             : class PerfLog
     146             : {
     147             : public:
     148             : 
     149             :   /**
     150             :    * Constructor.  \p label_name is the name of the object, which
     151             :    * will bw print in the log to distinguish it from other objects.
     152             :    * \p log_events is a flag to optionally
     153             :    * disable logging.  You can use this flag to turn off
     154             :    * logging without touching any other code.
     155             :    */
     156             :   PerfLog(std::string label_name="",
     157             :           const bool log_events=true);
     158             : 
     159             :   /**
     160             :    * Destructor. Calls \p clear() and \p print_log().
     161             :    */
     162             :   ~PerfLog();
     163             : 
     164             :   /**
     165             :    * Clears all the internal data and restores the
     166             :    * data structures to a pristine state.  This function
     167             :    * checks to see if it is currently monitoring any
     168             :    * events, and if so errors.  Be sure you are not
     169             :    * logging any events when you call this function.
     170             :    */
     171             :   void clear();
     172             : 
     173             :   /**
     174             :    * Disables performance logging for an active object.
     175             :    */
     176      188012 :   void disable_logging() { log_events = false; }
     177             : 
     178             :   /**
     179             :    * Enables performance logging for an active object.
     180             :    */
     181      188012 :   void enable_logging() { log_events = true; }
     182             : 
     183             :   /**
     184             :    * \returns \p true iff performance logging is enabled
     185             :    */
     186       94006 :   bool logging_enabled() const { return log_events; }
     187             : 
     188             :   /**
     189             :    * Tells the PerfLog to only print log results summarized by header
     190             :    */
     191             :   void enable_summarized_logs() { summarize_logs = true; }
     192             : 
     193             :   /**
     194             :    * Tells the PerfLog to print detailed log results (this is the
     195             :    * default behavior)
     196             :    */
     197             :   void disable_summarized_logs() { summarize_logs = false; }
     198             : 
     199             :   /**
     200             :    * \returns \p true iff log results will be summarized by header
     201             :    */
     202             :   bool summarized_logs_enabled() { return summarize_logs; }
     203             : 
     204             :   /**
     205             :    * Push the event \p label onto the stack, pausing any active event.
     206             :    *
     207             :    * Note - for speed the PerfLog directly considers the *pointers*
     208             :    * here.  Supply pointers to string literals or other character
     209             :    * arrays whose lifetime will exceed the lifetime of the PerfLog
     210             :    * object, not to temporarily allocated arrays.
     211             :    */
     212             :   void fast_push (const char * label,
     213             :                   const char * header="");
     214             : 
     215             :   /**
     216             :    * Push the event \p label onto the stack, pausing any active event.
     217             :    *
     218             :    * This method will eventually be deprecated.  For backwards
     219             :    * compatibility, the PerfLog must copy the contents of these
     220             :    * character arrays into strings, which ironically damages the
     221             :    * performance we are trying to profile.  Use fast_push() (with
     222             :    * compatible long-lived character array data) instead.
     223             :    */
     224             :   void push (const char * label,
     225             :              const char * header="");
     226             : 
     227             :   /**
     228             :    * Push the event \p label onto the stack, pausing any active event.
     229             :    *
     230             :    * This method will eventually be deprecated.  String manipulation
     231             :    * is too low performance to use when performance monitoring hot
     232             :    * spots.  Use fast_push() instead.
     233             :    */
     234             :   void push (const std::string & label,
     235             :              const std::string & header="");
     236             : 
     237             :   /**
     238             :    * Pop the event \p label off the stack, resuming any lower event.
     239             :    *
     240             :    * This method must be passed the exact same pointers as were passed
     241             :    * to fast_push, not merely pointers to identical strings.
     242             :    * This method is called from the PerfItem destructor, so it should
     243             :    * not throw. We have therefore marked it noexcept as a reminder.
     244             :    */
     245             :   void fast_pop (const char * label,
     246             :                  const char * header="") noexcept;
     247             : 
     248             :   /**
     249             :    * Pop the event \p label off the stack, resuming any lower event.
     250             :    *
     251             :    * This method will eventually be deprecated.  Use fast_pop() (with
     252             :    * the exact same pointers supplied to fast_push()) instead.
     253             :    */
     254             :   void pop (const char * label,
     255             :             const char * header="");
     256             : 
     257             :   /**
     258             :    * Pop the event \p label off the stack, resuming any lower event.
     259             :    *
     260             :    * This method will eventually be deprecated.  String manipulation
     261             :    * is too low performance to use when performance monitoring hot
     262             :    * spots.  Use fast_pop() instead.
     263             :    */
     264             :   void pop (const std::string & label,
     265             :             const std::string & header="");
     266             : 
     267             :   /**
     268             :    * Start monitoring the event named \p label.
     269             :    */
     270             :   void start_event(const std::string & label,
     271             :                    const std::string & header="");
     272             : 
     273             :   /**
     274             :    * Stop monitoring the event named \p label.
     275             :    */
     276             :   void stop_event(const std::string & label,
     277             :                   const std::string & header="");
     278             : 
     279             :   /**
     280             :    * Suspend monitoring of the event.
     281             :    */
     282             :   void pause_event(const std::string & label,
     283             :                    const std::string & header="");
     284             : 
     285             :   /**
     286             :    * Restart monitoring the event.
     287             :    */
     288             :   void restart_event(const std::string & label,
     289             :                      const std::string & header="");
     290             : 
     291             :   /**
     292             :    * \returns A string containing:
     293             :    * (1) Basic machine information (if first call)
     294             :    * (2) The performance log
     295             :    */
     296             :   std::string get_log() const;
     297             : 
     298             :   /**
     299             :    * \returns A string containing ONLY the information header.
     300             :    */
     301             :   std::string get_info_header() const;
     302             : 
     303             :   /**
     304             :    * \returns A string containing ONLY the log information
     305             :    */
     306             :   std::string get_perf_info() const;
     307             : 
     308             :   /**
     309             :    * Print the log.
     310             :    */
     311             :   void print_log() const;
     312             : 
     313             :   /**
     314             :    * \returns The total time spent on this event.
     315             :    */
     316             :   double get_elapsed_time() const;
     317             : 
     318             :   /**
     319             :    * \returns The active time
     320             :    */
     321             :   double get_active_time() const;
     322             : 
     323             :   /**
     324             :    * Return the PerfData object associated with a label and header.
     325             :    */
     326             :   PerfData get_perf_data(const std::string & label, const std::string & header="");
     327             : 
     328             :   /**
     329             :    * Typdef for the underlying logging data structure.
     330             :    */
     331             :   typedef std::map<std::pair<const char *,
     332             :                              const char *>,
     333             :                    PerfData> log_type;
     334             : 
     335             : private:
     336             : 
     337             : 
     338             :   /**
     339             :    * The label for this object.
     340             :    */
     341             :   const std::string label_name;
     342             : 
     343             :   /**
     344             :    * Flag to optionally disable all logging.
     345             :    */
     346             :   bool log_events;
     347             : 
     348             :   /**
     349             :    * Flag to optionally summarize logs
     350             :    */
     351             :   bool summarize_logs;
     352             : 
     353             :   /**
     354             :    * The total running time for recorded events.
     355             :    */
     356             :   double total_time;
     357             : 
     358             :   /**
     359             :    * The time we were constructed or last cleared.
     360             :    */
     361             :   struct timeval tstart;
     362             : 
     363             :   /**
     364             :    * The actual log.
     365             :    *
     366             :    * An unsorted_map would work fine here and would be asymptotically
     367             :    * faster, but in my tests for our log sizes there was no
     368             :    * improvement.
     369             :    */
     370             :   log_type log;
     371             : 
     372             :   /**
     373             :    * A stack to hold the current performance log trace.
     374             :    */
     375             :   std::stack<PerfData*> log_stack;
     376             : 
     377             :   /**
     378             :    * Flag indicating if print_log() has been called.
     379             :    * This is used to print a header with machine-specific
     380             :    * data the first time that print_log() is called.
     381             :    */
     382             :   static bool called;
     383             : 
     384             :   /**
     385             :    * Splits a string on whitespace into a vector of separate strings.  This is used to make the
     386             :    * LIBMESH_CONFIGURE_INFO a little more manageable.
     387             :    */
     388             :   void split_on_whitespace(const std::string & input,
     389             :                            std::vector<std::string> & output) const;
     390             : 
     391             :   /**
     392             :    * Workaround to give us fixed pointers to character arrays for
     393             :    * every string.  Using std::set instead might work: it won't
     394             :    * invalidate iterators, which I *think* means it doesn't have any
     395             :    * reason to modify or copy their contents or otherwise invalidate
     396             :    * their c_str() pointers... but I can't prove it from the standards
     397             :    * doc, so let's be safe.
     398             :    */
     399             :   std::map<std::string, std::unique_ptr<const char[]>> non_temporary_strings;
     400             : };
     401             : 
     402             : 
     403             : 
     404             : // ------------------------------------------------------------
     405             : // PerfData class member functions
     406             : inline
     407    10474892 : void PerfData::start ()
     408             : {
     409    20511493 :   this->count++;
     410    20511493 :   this->called_recursively++;
     411    20511493 :   gettimeofday (&(this->tstart), nullptr);
     412    20511493 :   this->tstart_incl_sub = this->tstart;
     413    10474892 : }
     414             : 
     415             : 
     416             : 
     417             : inline
     418    39936528 : void PerfData::restart ()
     419             : {
     420    79813444 :   gettimeofday (&(this->tstart), nullptr);
     421    39936930 : }
     422             : 
     423             : 
     424             : 
     425             : inline
     426             : double PerfData::pause ()
     427             : {
     428             :   return this->stop_or_pause(false);
     429             : }
     430             : 
     431             : 
     432             : inline
     433   100327375 : double PerfData::stop_or_pause(const bool do_stop)
     434             : {
     435             :   // save the start times, reuse the structure we have rather than create
     436             :   // a new one.
     437             :   const time_t
     438   100327375 :     tstart_tv_sec  = this->tstart.tv_sec,
     439   100327375 :     tstart_tv_usec = this->tstart.tv_usec;
     440             : 
     441   100327375 :   gettimeofday (&(this->tstart), nullptr);
     442             : 
     443   200235541 :   const double elapsed_time = (static_cast<double>(this->tstart.tv_sec  - tstart_tv_sec) +
     444   100327375 :                                static_cast<double>(this->tstart.tv_usec - tstart_tv_usec)*1.e-6);
     445             : 
     446   100327375 :   this->tot_time += elapsed_time;
     447             : 
     448   100327375 :   if (do_stop)
     449             :     {
     450   200235541 :       const double elapsed_time_incl_sub = (static_cast<double>(this->tstart.tv_sec  - this->tstart_incl_sub.tv_sec) +
     451   100327375 :                                             static_cast<double>(this->tstart.tv_usec - this->tstart_incl_sub.tv_usec)*1.e-6);
     452             : 
     453   100327375 :       this->tot_time_incl_sub += elapsed_time_incl_sub;
     454             :     }
     455             : 
     456   100327375 :   return elapsed_time;
     457             : }
     458             : 
     459             : 
     460             : 
     461             : inline
     462    79583617 : double PerfData::pause_for(PerfData & other)
     463             : {
     464    79583617 :   gettimeofday (&(other.tstart), nullptr);
     465             : 
     466   159166832 :   const double elapsed_time = (static_cast<double>(other.tstart.tv_sec  - this->tstart.tv_sec) +
     467    79583617 :                                static_cast<double>(other.tstart.tv_usec - this->tstart.tv_usec)*1.e-6);
     468    79583617 :   this->tot_time += elapsed_time;
     469             : 
     470    79583617 :   other.count++;
     471    79583617 :   other.called_recursively++;
     472    79583617 :   other.tstart_incl_sub = other.tstart;
     473             : 
     474    79583617 :   return elapsed_time;
     475             : }
     476             : 
     477             : 
     478             : 
     479             : inline
     480    49992613 : double PerfData::stopit ()
     481             : {
     482             :   // stopit is just similar to pause except that it decrements the
     483             :   // recursive call counter
     484             : 
     485   100327375 :   this->called_recursively--;
     486   100327375 :   return this->stop_or_pause(true);
     487             : }
     488             : 
     489             : 
     490             : 
     491             : // ------------------------------------------------------------
     492             : // PerfLog class inline member functions
     493             : inline
     494   145214634 : void PerfLog::fast_push (const char * label,
     495             :                          const char * header)
     496             : {
     497   145214634 :   if (this->log_events)
     498             :     {
     499             :       // Get a reference to the event data to avoid
     500             :       // repeated map lookups
     501   200080336 :       PerfData * perf_data = &(log[std::make_pair(header,label)]);
     502             : 
     503   100095110 :       if (!log_stack.empty())
     504    79583617 :         total_time += log_stack.top()->pause_for(*perf_data);
     505             :       else
     506    10474892 :         perf_data->start();
     507    49992613 :       log_stack.push(perf_data);
     508             :     }
     509   145214634 : }
     510             : 
     511             : 
     512             : 
     513             : inline
     514   145383727 : void PerfLog::fast_pop(const char * libmesh_dbg_var(label),
     515             :                        const char * libmesh_dbg_var(header)) noexcept
     516             : {
     517   145383727 :   if (this->log_events)
     518             :     {
     519             :       // If there's nothing on the stack, then we can't pop anything. Previously we
     520             :       // asserted that the log_stack was not empty, but we should not throw from
     521             :       // this function, so instead just return in that case.
     522   100327375 :       if (log_stack.empty())
     523           0 :         return;
     524             : 
     525             : #ifndef NDEBUG
     526             :       // In debug mode, we carefully check that before popping the stack,
     527             :       // we have the correct event.
     528             : 
     529             :       // Get pointer to corresponding PerfData, or nullptr if it does not exist.
     530    49992613 :       auto it = log.find(std::make_pair(header, label));
     531    49992613 :       PerfData * perf_data = (it == log.end()) ? nullptr : &(it->second);
     532             : 
     533             :       // If this event is not found in the log, then it could indicate
     534             :       // that you have a STOP_LOG(a,b) without a corresponding
     535             :       // START_LOG(a,b) call. We don't pop anything from the log_stack
     536             :       // in this case.
     537    49992613 :       if (!perf_data)
     538             :         {
     539           0 :           libMesh::err << "PerfLog can't pop (" << header << ',' << label << ')' << std::endl;
     540           0 :           libMesh::err << "No such event was found in the log, you may have a mismatched START/STOP_LOG statement." << std::endl;
     541           0 :           return;
     542             :         }
     543             : 
     544    49992613 :       if (perf_data != log_stack.top())
     545             :         {
     546             :           // We've been asked to pop an event which is not at the
     547             :           // top() of the log_stack. The event may be deeper in the
     548             :           // stack instead, so search for it now. If we find it, we'll
     549             :           // pop all intermediate items off the stack (abandoning any
     550             :           // attempt to time them accurately) and let the optimized
     551             :           // code path take over from there.
     552           0 :           std::deque<PerfData*> tmp_stack;
     553             : 
     554           0 :           while (!log_stack.empty() && perf_data != log_stack.top())
     555             :             {
     556           0 :               tmp_stack.push_front(log_stack.top());
     557           0 :               log_stack.pop();
     558             :             }
     559             : 
     560             :           // If we exhaustively searched the log_stack without finding
     561             :           // the event, we can't do anything else so just put
     562             :           // everything back and return, not doing anything for this
     563             :           // event.
     564           0 :           if (log_stack.empty())
     565             :             {
     566           0 :               while (!tmp_stack.empty())
     567             :                 {
     568           0 :                   log_stack.push(tmp_stack.back());
     569           0 :                   tmp_stack.pop_back();
     570             :                 }
     571             : 
     572           0 :               return;
     573             :             }
     574             :         }
     575             : #endif
     576             : 
     577             :       // In optimized mode, we just pop from the top of the stack and
     578             :       // resume timing the next entry.
     579   100327375 :       total_time += log_stack.top()->stopit();
     580             : 
     581    49992613 :       log_stack.pop();
     582             : 
     583   100327375 :       if (!log_stack.empty())
     584    79813444 :         log_stack.top()->restart();
     585             :     }
     586             : }
     587             : 
     588             : 
     589             : 
     590             : inline
     591             : double PerfLog::get_elapsed_time () const
     592             : {
     593             :   struct timeval tnow;
     594             : 
     595             :   gettimeofday (&tnow, nullptr);
     596             : 
     597             :   const double elapsed_time = (static_cast<double>(tnow.tv_sec  - tstart.tv_sec) +
     598             :                                static_cast<double>(tnow.tv_usec - tstart.tv_usec)*1.e-6);
     599             :   return elapsed_time;
     600             : }
     601             : 
     602             : inline
     603             : double PerfLog::get_active_time() const
     604             : {
     605             :   return total_time;
     606             : }
     607             : 
     608             : } // namespace libMesh
     609             : 
     610             : 
     611             : 
     612             : #endif // LIBMESH_PERFLOG_H

Generated by: LCOV version 1.14