LCOV - code coverage report
Current view: top level - include/utils - perf_log.h (source / functions) Hit Total Coverage
Test: libMesh/libmesh: #4475 (55045b) with base a68cc6 Lines: 74 87 85.1 %
Date: 2026-06-03 14:29:06 Functions: 12 12 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // The libMesh Finite Element Library.
       2             : // Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
       3             : 
       4             : // This library is free software; you can redistribute it and/or
       5             : // modify it under the terms of the GNU Lesser General Public
       6             : // License as published by the Free Software Foundation; either
       7             : // version 2.1 of the License, or (at your option) any later version.
       8             : 
       9             : // This library is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             : // Lesser General Public License for more details.
      13             : 
      14             : // You should have received a copy of the GNU Lesser General Public
      15             : // License along with this library; if not, write to the Free Software
      16             : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      17             : 
      18             : 
      19             : 
      20             : #ifndef LIBMESH_PERFLOG_H
      21             : #define LIBMESH_PERFLOG_H
      22             : 
      23             : 
      24             : // Local includes
      25             : #include "libmesh/libmesh_common.h"
      26             : 
      27             : #include "libmesh/threads.h"
      28             : 
      29             : // C++ includes
      30             : #include <cstddef>
      31             : #include <map>
      32             : #include <stack>
      33             : #include <string>
      34             : #include <vector>
      35             : #include <deque>
      36             : #include <memory>
      37             : #ifdef LIBMESH_HAVE_SYS_TIME_H
      38             : #include <sys/time.h> // gettimeofday() on Unix
      39             : #endif
      40             : #ifndef LIBMESH_HAVE_GETTIMEOFDAY
      41             : #include "libmesh/win_gettimeofday.h" // gettimeofday() on Windows
      42             : #endif
      43             : #ifdef LIBMESH_HAVE_NVTX_API
      44             : #include <nvtx3/nvtx3.hpp>
      45             : #endif
      46             : #ifdef LIBMESH_HAVE_OPENMP
      47             : #include <omp.h>
      48             : #endif
      49             : 
      50             : namespace libMesh
      51             : {
      52             : 
      53             : /**
      54             :  * The \p PerfData class simply contains the performance
      55             :  * data that is recorded for individual events.
      56             :  *
      57             :  * \author Benjamin Kirk
      58             :  * \date 2003
      59             :  * \brief Data object managed by PerfLog
      60             :  */
      61             : class PerfData
      62             : {
      63             : public:
      64             : 
      65             :   /**
      66             :    * Constructor.  Initializes data to be empty.
      67             :    */
      68      107427 :   PerfData () :
      69       55877 :     tot_time(0.),
      70       55877 :     tot_time_incl_sub(0.),
      71       55877 :     tstart(),
      72       55877 :     tstart_incl_sub(),
      73       55877 :     count(0),
      74       55877 :     open(false),
      75      107427 :     called_recursively(0)
      76       33640 :   {}
      77             : 
      78             : 
      79             :   /**
      80             :    * Total time spent in this event.
      81             :    */
      82             :   double tot_time;
      83             : 
      84             :   /**
      85             :    * Total time spent in this event, including sub-events.
      86             :    */
      87             :   double tot_time_incl_sub;
      88             : 
      89             :   /**
      90             :    * Structure defining when the event
      91             :    * was last started.
      92             :    */
      93             :   struct timeval tstart;
      94             : 
      95             :   /**
      96             :    * Structure defining when the event
      97             :    * was last started, including sub-events.
      98             :    */
      99             :   struct timeval tstart_incl_sub;
     100             : 
     101             :   /**
     102             :    * The number of times this event has
     103             :    * been executed
     104             :    */
     105             :   unsigned int count;
     106             : 
     107             :   /**
     108             :    * Flag indicating if we are currently
     109             :    * monitoring this event.  Should only
     110             :    * be true while the event is executing.
     111             :    */
     112             :   bool open;
     113             : 
     114             :   void   start ();
     115             :   void   restart ();
     116             :   double pause ();
     117             :   double pause_for(PerfData & other);
     118             :   double stopit ();
     119             : 
     120             :   /**
     121             :    * Sums timing results from \p other
     122             :    */
     123        1588 :   PerfData & operator += (const PerfData & other)
     124             :   {
     125        1588 :     libmesh_assert(!open);
     126        3176 :     tot_time += other.tot_time;
     127        3176 :     tot_time_incl_sub += other.tot_time_incl_sub;
     128        3176 :     count += other.count;
     129             : 
     130        1588 :     return *this;
     131             :   }
     132             : 
     133             :   int called_recursively;
     134             : 
     135             : protected:
     136             :   double stop_or_pause(const bool do_stop);
     137             : };
     138             : 
     139             : 
     140             : 
     141             : 
     142             : /**
     143             :  * The \p PerfLog class allows monitoring of specific events.
     144             :  * An event is defined by a unique string that functions as
     145             :  * a label.  Each time the event is executed data are recorded.
     146             :  * This class is particularly useful for finding performance
     147             :  * bottlenecks.
     148             :  *
     149             :  * \author Benjamin Kirk
     150             :  * \date 2003
     151             :  * \brief Responsible for timing and summarizing events.
     152             :  */
     153             : class PerfLog
     154             : {
     155             : public:
     156             : 
     157             :   /**
     158             :    * Constructor.  \p label_name is the name of the object, which
     159             :    * will bw print in the log to distinguish it from other objects.
     160             :    * \p log_events is a flag to optionally
     161             :    * disable logging.  You can use this flag to turn off
     162             :    * logging without touching any other code.
     163             :    */
     164             :   PerfLog(std::string label_name="",
     165             :           const bool log_events=true);
     166             : 
     167             :   /**
     168             :    * Destructor. Calls \p clear() and \p print_log().
     169             :    */
     170             :   ~PerfLog();
     171             : 
     172             :   /**
     173             :    * Clears all the internal data and restores the
     174             :    * data structures to a pristine state.  This function
     175             :    * checks to see if it is currently monitoring any
     176             :    * events, and if so errors.  Be sure you are not
     177             :    * logging any events when you call this function.
     178             :    */
     179             :   void clear();
     180             : 
     181             :   /**
     182             :    * Disables performance logging for an active object.
     183             :    */
     184      147633 :   void disable_logging() { log_events = false; }
     185             : 
     186             :   /**
     187             :    * Enables performance logging for an active object.
     188             :    */
     189      147633 :   void enable_logging() { log_events = true; }
     190             : 
     191             :   /**
     192             :    * \returns \p true iff performance logging is enabled
     193             :    */
     194       73821 :   bool logging_enabled() const { return log_events; }
     195             : 
     196             :   /**
     197             :    * Tells the PerfLog to only print log results summarized by header
     198             :    */
     199             :   void enable_summarized_logs() { summarize_logs = true; }
     200             : 
     201             :   /**
     202             :    * Tells the PerfLog to print detailed log results (this is the
     203             :    * default behavior)
     204             :    */
     205             :   void disable_summarized_logs() { summarize_logs = false; }
     206             : 
     207             :   /**
     208             :    * \returns \p true iff log results will be summarized by header
     209             :    */
     210             :   bool summarized_logs_enabled() { return summarize_logs; }
     211             : 
     212             :   /**
     213             :    * Push the event \p label onto the stack, pausing any active event.
     214             :    *
     215             :    * Note - for speed the PerfLog directly considers the *pointers*
     216             :    * here.  Supply pointers to string literals or other character
     217             :    * arrays whose lifetime will exceed the lifetime of the PerfLog
     218             :    * object, not to temporarily allocated arrays.
     219             :    */
     220             :   void fast_push (const char * label,
     221             :                   const char * header="");
     222             : 
     223             :   /**
     224             :    * Push the event \p label onto the stack, pausing any active event.
     225             :    *
     226             :    * This method will eventually be deprecated.  For backwards
     227             :    * compatibility, the PerfLog must copy the contents of these
     228             :    * character arrays into strings, which ironically damages the
     229             :    * performance we are trying to profile.  Use fast_push() (with
     230             :    * compatible long-lived character array data) instead.
     231             :    */
     232             :   void push (const char * label,
     233             :              const char * header="");
     234             : 
     235             :   /**
     236             :    * Push the event \p label onto the stack, pausing any active event.
     237             :    *
     238             :    * This method will eventually be deprecated.  String manipulation
     239             :    * is too low performance to use when performance monitoring hot
     240             :    * spots.  Use fast_push() instead.
     241             :    */
     242             :   void push (const std::string & label,
     243             :              const std::string & header="");
     244             : 
     245             :   /**
     246             :    * Pop the event \p label off the stack, resuming any lower event.
     247             :    *
     248             :    * This method must be passed the exact same pointers as were passed
     249             :    * to fast_push, not merely pointers to identical strings.
     250             :    * This method is called from the PerfItem destructor, so it should
     251             :    * not throw. We have therefore marked it noexcept as a reminder.
     252             :    */
     253             :   void fast_pop (const char * label,
     254             :                  const char * header="") noexcept;
     255             : 
     256             :   /**
     257             :    * Pop the event \p label off the stack, resuming any lower event.
     258             :    *
     259             :    * This method will eventually be deprecated.  Use fast_pop() (with
     260             :    * the exact same pointers supplied to fast_push()) instead.
     261             :    */
     262             :   void pop (const char * label,
     263             :             const char * header="");
     264             : 
     265             :   /**
     266             :    * Pop the event \p label off the stack, resuming any lower event.
     267             :    *
     268             :    * This method will eventually be deprecated.  String manipulation
     269             :    * is too low performance to use when performance monitoring hot
     270             :    * spots.  Use fast_pop() instead.
     271             :    */
     272             :   void pop (const std::string & label,
     273             :             const std::string & header="");
     274             : 
     275             :   /**
     276             :    * Start monitoring the event named \p label.
     277             :    */
     278             :   void start_event(const std::string & label,
     279             :                    const std::string & header="");
     280             : 
     281             :   /**
     282             :    * Stop monitoring the event named \p label.
     283             :    */
     284             :   void stop_event(const std::string & label,
     285             :                   const std::string & header="");
     286             : 
     287             :   /**
     288             :    * Suspend monitoring of the event.
     289             :    */
     290             :   void pause_event(const std::string & label,
     291             :                    const std::string & header="");
     292             : 
     293             :   /**
     294             :    * Restart monitoring the event.
     295             :    */
     296             :   void restart_event(const std::string & label,
     297             :                      const std::string & header="");
     298             : 
     299             :   /**
     300             :    * \returns A string containing:
     301             :    * (1) Basic machine information (if first call)
     302             :    * (2) The performance log
     303             :    */
     304             :   std::string get_log() const;
     305             : 
     306             :   /**
     307             :    * \returns A string containing ONLY the information header.
     308             :    */
     309             :   std::string get_info_header() const;
     310             : 
     311             :   /**
     312             :    * \returns A string containing ONLY the log information
     313             :    */
     314             :   std::string get_perf_info() const;
     315             : 
     316             :   /**
     317             :    * Print the log.
     318             :    */
     319             :   void print_log() const;
     320             : 
     321             :   /**
     322             :    * \returns The total time spent on this event.
     323             :    */
     324             :   double get_elapsed_time() const;
     325             : 
     326             :   /**
     327             :    * \returns The active time
     328             :    */
     329             :   double get_active_time() const;
     330             : 
     331             :   /**
     332             :    * Return the PerfData object associated with a label and header.
     333             :    */
     334             :   PerfData get_perf_data(const std::string & label, const std::string & header="");
     335             : 
     336             :   /**
     337             :    * Typdef for the underlying logging data structure.
     338             :    */
     339             :   typedef std::map<std::pair<const char *,
     340             :                              const char *>,
     341             :                    PerfData> log_type;
     342             : 
     343             : private:
     344             : 
     345             : 
     346             :   /**
     347             :    * The label for this object.
     348             :    */
     349             :   const std::string label_name;
     350             : 
     351             :   /**
     352             :    * Flag to optionally disable all logging.
     353             :    */
     354             :   bool log_events;
     355             : 
     356             :   /**
     357             :    * Flag to optionally summarize logs
     358             :    */
     359             :   bool summarize_logs;
     360             : 
     361             :   /**
     362             :    * The total running time for recorded events.
     363             :    */
     364             :   double total_time;
     365             : 
     366             :   /**
     367             :    * The time we were constructed or last cleared.
     368             :    */
     369             :   struct timeval tstart;
     370             : 
     371             :   /**
     372             :    * The actual log.
     373             :    *
     374             :    * An unsorted_map would work fine here and would be asymptotically
     375             :    * faster, but in my tests for our log sizes there was no
     376             :    * improvement.
     377             :    */
     378             :   log_type log;
     379             : 
     380             :   /**
     381             :    * A stack to hold the current performance log trace.
     382             :    */
     383             :   std::stack<PerfData*> log_stack;
     384             : 
     385             :   /**
     386             :    * Flag indicating if print_log() has been called.
     387             :    * This is used to print a header with machine-specific
     388             :    * data the first time that print_log() is called.
     389             :    */
     390             :   static bool called;
     391             : 
     392             :   /**
     393             :    * Splits a string on whitespace into a vector of separate strings.  This is used to make the
     394             :    * LIBMESH_CONFIGURE_INFO a little more manageable.
     395             :    */
     396             :   void split_on_whitespace(const std::string & input,
     397             :                            std::vector<std::string> & output) const;
     398             : 
     399             :   /**
     400             :    * Workaround to give us fixed pointers to character arrays for
     401             :    * every string.  Using std::set instead might work: it won't
     402             :    * invalidate iterators, which I *think* means it doesn't have any
     403             :    * reason to modify or copy their contents or otherwise invalidate
     404             :    * their c_str() pointers... but I can't prove it from the standards
     405             :    * doc, so let's be safe.
     406             :    */
     407             :   std::map<std::string, std::unique_ptr<const char[]>> non_temporary_strings;
     408             : };
     409             : 
     410             : 
     411             : 
     412             : // ------------------------------------------------------------
     413             : // PerfData class member functions
     414             : inline
     415     9826210 : void PerfData::start ()
     416             : {
     417    19189977 :   this->count++;
     418    19189977 :   this->called_recursively++;
     419    19189977 :   gettimeofday (&(this->tstart), nullptr);
     420    19189977 :   this->tstart_incl_sub = this->tstart;
     421     9826210 : }
     422             : 
     423             : 
     424             : 
     425             : inline
     426    40275482 : void PerfData::restart ()
     427             : {
     428    80526614 :   gettimeofday (&(this->tstart), nullptr);
     429    40275884 : }
     430             : 
     431             : 
     432             : 
     433             : inline
     434             : double PerfData::pause ()
     435             : {
     436             :   return this->stop_or_pause(false);
     437             : }
     438             : 
     439             : 
     440             : inline
     441    99726243 : double PerfData::stop_or_pause(const bool do_stop)
     442             : {
     443             :   // save the start times, reuse the structure we have rather than create
     444             :   // a new one.
     445             :   const time_t
     446    99726243 :     tstart_tv_sec  = this->tstart.tv_sec,
     447    99726243 :     tstart_tv_usec = this->tstart.tv_usec;
     448             : 
     449    99726243 :   gettimeofday (&(this->tstart), nullptr);
     450             : 
     451   199015719 :   const double elapsed_time = (static_cast<double>(this->tstart.tv_sec  - tstart_tv_sec) +
     452    99726243 :                                static_cast<double>(this->tstart.tv_usec - tstart_tv_usec)*1.e-6);
     453             : 
     454    99726243 :   this->tot_time += elapsed_time;
     455             : 
     456    99726243 :   if (do_stop)
     457             :     {
     458   199015719 :       const double elapsed_time_incl_sub = (static_cast<double>(this->tstart.tv_sec  - this->tstart_incl_sub.tv_sec) +
     459    99726243 :                                             static_cast<double>(this->tstart.tv_usec - this->tstart_incl_sub.tv_usec)*1.e-6);
     460             : 
     461    99726243 :       this->tot_time_incl_sub += elapsed_time_incl_sub;
     462             :     }
     463             : 
     464    99726243 :   return elapsed_time;
     465             : }
     466             : 
     467             : 
     468             : 
     469             : inline
     470    80234020 : double PerfData::pause_for(PerfData & other)
     471             : {
     472    80234020 :   gettimeofday (&(other.tstart), nullptr);
     473             : 
     474   160467638 :   const double elapsed_time = (static_cast<double>(other.tstart.tv_sec  - this->tstart.tv_sec) +
     475    80234020 :                                static_cast<double>(other.tstart.tv_usec - this->tstart.tv_usec)*1.e-6);
     476    80234020 :   this->tot_time += elapsed_time;
     477             : 
     478    80234020 :   other.count++;
     479    80234020 :   other.called_recursively++;
     480    80234020 :   other.tstart_incl_sub = other.tstart;
     481             : 
     482    80234020 :   return elapsed_time;
     483             : }
     484             : 
     485             : 
     486             : 
     487             : inline
     488    49665327 : double PerfData::stopit ()
     489             : {
     490             :   // stopit is just similar to pause except that it decrements the
     491             :   // recursive call counter
     492             : 
     493    99726243 :   this->called_recursively--;
     494    99726243 :   return this->stop_or_pause(true);
     495             : }
     496             : 
     497             : 
     498             : 
     499             : // ------------------------------------------------------------
     500             : // PerfLog class inline member functions
     501             : inline
     502   131356777 : void PerfLog::fast_push (const char * label,
     503             :                          const char * header)
     504             : {
     505   131356777 :   if (this->log_events)
     506             :     {
     507             :       // The global perflog stack may not be thread-safe, but if we're
     508             :       // creating threads we should have disabled it already
     509    49665327 :       libmesh_assert_equal_to(Threads::active_threads, 1);
     510             : #ifdef LIBMESH_HAVE_OPENMP
     511             :       // Users might be doing their own non-libMesh threading.  We
     512             :       // can't catch every case of that but we can catch OpenMP
     513    49665327 :       libmesh_assert_equal_to(omp_get_num_threads(), 1);
     514             : #endif
     515             : 
     516             :       // Get a reference to the event data to avoid
     517             :       // repeated map lookups
     518   198754651 :       PerfData * perf_data = &(log[std::make_pair(header,label)]);
     519             : 
     520    99423997 :       if (!log_stack.empty())
     521    80234020 :         total_time += log_stack.top()->pause_for(*perf_data);
     522             :       else
     523     9826210 :         perf_data->start();
     524    49665327 :       log_stack.push(perf_data);
     525             : 
     526             : #ifdef LIBMESH_HAVE_NVTX_API
     527             :       // This takes a single const char *, and we don't want to waste
     528             :       // time allocating a new string on every push, so for now we'll
     529             :       // drop the header.  Maybe we should allocate a new string in
     530             :       // the PerfData?
     531             :       nvtxRangePushA(label);
     532             : #endif
     533             :     }
     534   131356777 : }
     535             : 
     536             : 
     537             : 
     538             : inline
     539   131646787 : void PerfLog::fast_pop(const char * libmesh_dbg_var(label),
     540             :                        const char * libmesh_dbg_var(header)) noexcept
     541             : {
     542   131646787 :   if (this->log_events)
     543             :     {
     544             :       // The global perflog stack may not be thread-safe, but if we're
     545             :       // creating threads we should have disabled it already
     546    49665327 :       libmesh_exceptionless_assert(Threads::active_threads == 1);
     547             : #ifdef LIBMESH_HAVE_OPENMP
     548             :       // Users might be doing their own non-libMesh threading.  We
     549             :       // can't catch every case of that but we can catch OpenMP.
     550    49665327 :       libmesh_exceptionless_assert(omp_get_num_threads() == 1);
     551             : #endif
     552             : 
     553             : #ifdef LIBMESH_HAVE_NVTX_API
     554             :       nvtxRangePop();
     555             : #endif
     556             :       // If there's nothing on the stack, then we can't pop anything. Previously we
     557             :       // asserted that the log_stack was not empty, but we should not throw from
     558             :       // this function, so instead just return in that case.
     559    99726243 :       if (log_stack.empty())
     560           0 :         return;
     561             : 
     562             : #ifndef NDEBUG
     563             :       // In debug mode, we carefully check that before popping the stack,
     564             :       // we have the correct event.
     565             : 
     566             :       // Get pointer to corresponding PerfData, or nullptr if it does not exist.
     567    49665327 :       auto it = log.find(std::make_pair(header, label));
     568    49665327 :       PerfData * perf_data = (it == log.end()) ? nullptr : &(it->second);
     569             : 
     570             :       // If this event is not found in the log, then it could indicate
     571             :       // that you have a STOP_LOG(a,b) without a corresponding
     572             :       // START_LOG(a,b) call. We don't pop anything from the log_stack
     573             :       // in this case.
     574    49665327 :       if (!perf_data)
     575             :         {
     576           0 :           libMesh::err << "PerfLog can't pop (" << header << ',' << label << ')' << std::endl;
     577           0 :           libMesh::err << "No such event was found in the log, you may have a mismatched START/STOP_LOG statement." << std::endl;
     578           0 :           return;
     579             :         }
     580             : 
     581    49665327 :       if (perf_data != log_stack.top())
     582             :         {
     583             :           // We've been asked to pop an event which is not at the
     584             :           // top() of the log_stack. The event may be deeper in the
     585             :           // stack instead, so search for it now. If we find it, we'll
     586             :           // pop all intermediate items off the stack (abandoning any
     587             :           // attempt to time them accurately) and let the optimized
     588             :           // code path take over from there.
     589           0 :           std::deque<PerfData*> tmp_stack;
     590             : 
     591           0 :           while (!log_stack.empty() && perf_data != log_stack.top())
     592             :             {
     593           0 :               tmp_stack.push_front(log_stack.top());
     594           0 :               log_stack.pop();
     595             :             }
     596             : 
     597             :           // If we exhaustively searched the log_stack without finding
     598             :           // the event, we can't do anything else so just put
     599             :           // everything back and return, not doing anything for this
     600             :           // event.
     601           0 :           if (log_stack.empty())
     602             :             {
     603           0 :               while (!tmp_stack.empty())
     604             :                 {
     605           0 :                   log_stack.push(tmp_stack.back());
     606           0 :                   tmp_stack.pop_back();
     607             :                 }
     608             : 
     609           0 :               return;
     610             :             }
     611             :         }
     612             : #endif
     613             : 
     614             :       // In optimized mode, we just pop from the top of the stack and
     615             :       // resume timing the next entry.
     616    99726243 :       total_time += log_stack.top()->stopit();
     617             : 
     618    49665327 :       log_stack.pop();
     619             : 
     620    99726243 :       if (!log_stack.empty())
     621    80526614 :         log_stack.top()->restart();
     622             :     }
     623             : }
     624             : 
     625             : 
     626             : 
     627             : inline
     628             : double PerfLog::get_elapsed_time () const
     629             : {
     630             :   struct timeval tnow;
     631             : 
     632             :   gettimeofday (&tnow, nullptr);
     633             : 
     634             :   const double elapsed_time = (static_cast<double>(tnow.tv_sec  - tstart.tv_sec) +
     635             :                                static_cast<double>(tnow.tv_usec - tstart.tv_usec)*1.e-6);
     636             :   return elapsed_time;
     637             : }
     638             : 
     639             : inline
     640             : double PerfLog::get_active_time() const
     641             : {
     642             :   return total_time;
     643             : }
     644             : 
     645             : } // namespace libMesh
     646             : 
     647             : 
     648             : 
     649             : #endif // LIBMESH_PERFLOG_H

Generated by: LCOV version 1.14