libMesh
perf_log.h
Go to the documentation of this file.
1 // The libMesh Finite Element Library.
2 // Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 
4 // This library is free software; you can redistribute it and/or
5 // modify it under the terms of the GNU Lesser General Public
6 // License as published by the Free Software Foundation; either
7 // version 2.1 of the License, or (at your option) any later version.
8 
9 // This library is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // Lesser General Public License for more details.
13 
14 // You should have received a copy of the GNU Lesser General Public
15 // License along with this library; if not, write to the Free Software
16 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 
18 
19 
20 #ifndef LIBMESH_PERFLOG_H
21 #define LIBMESH_PERFLOG_H
22 
23 
24 // Local includes
25 #include "libmesh/libmesh_common.h"
26 
27 #include "libmesh/threads.h"
28 
29 // C++ includes
30 #include <cstddef>
31 #include <map>
32 #include <stack>
33 #include <string>
34 #include <vector>
35 #include <deque>
36 #include <memory>
37 #ifdef LIBMESH_HAVE_SYS_TIME_H
38 #include <sys/time.h> // gettimeofday() on Unix
39 #endif
40 #ifndef LIBMESH_HAVE_GETTIMEOFDAY
41 #include "libmesh/win_gettimeofday.h" // gettimeofday() on Windows
42 #endif
43 #ifdef LIBMESH_HAVE_NVTX_API
44 #include <nvtx3/nvtx3.hpp>
45 #endif
46 #ifdef LIBMESH_HAVE_OPENMP
47 #include <omp.h>
48 #endif
49 
50 namespace libMesh
51 {
52 
61 class PerfData
62 {
63 public:
64 
68  PerfData () :
69  tot_time(0.),
71  tstart(),
73  count(0),
74  open(false),
76  {}
77 
78 
82  double tot_time;
83 
88 
93  struct timeval tstart;
94 
100 
105  unsigned int count;
106 
112  bool open;
113 
114  void start ();
115  void restart ();
116  double pause ();
117  double pause_for(PerfData & other);
118  double stopit ();
119 
123  PerfData & operator += (const PerfData & other)
124  {
126  tot_time += other.tot_time;
128  count += other.count;
129 
130  return *this;
131  }
132 
134 
135 protected:
136  double stop_or_pause(const bool do_stop);
137 };
138 
139 
140 
141 
153 class PerfLog
154 {
155 public:
156 
164  PerfLog(std::string label_name="",
165  const bool log_events=true);
166 
170  ~PerfLog();
171 
179  void clear();
180 
184  void disable_logging() { log_events = false; }
185 
189  void enable_logging() { log_events = true; }
190 
194  bool logging_enabled() const { return log_events; }
195 
200 
206 
211 
220  void fast_push (const char * label,
221  const char * header="");
222 
232  void push (const char * label,
233  const char * header="");
234 
242  void push (const std::string & label,
243  const std::string & header="");
244 
253  void fast_pop (const char * label,
254  const char * header="") noexcept;
255 
262  void pop (const char * label,
263  const char * header="");
264 
272  void pop (const std::string & label,
273  const std::string & header="");
274 
278  void start_event(const std::string & label,
279  const std::string & header="");
280 
284  void stop_event(const std::string & label,
285  const std::string & header="");
286 
290  void pause_event(const std::string & label,
291  const std::string & header="");
292 
296  void restart_event(const std::string & label,
297  const std::string & header="");
298 
304  std::string get_log() const;
305 
309  std::string get_info_header() const;
310 
314  std::string get_perf_info() const;
315 
319  void print_log() const;
320 
324  double get_elapsed_time() const;
325 
329  double get_active_time() const;
330 
334  PerfData get_perf_data(const std::string & label, const std::string & header="");
335 
339  typedef std::map<std::pair<const char *,
340  const char *>,
342 
343 private:
344 
345 
349  const std::string label_name;
350 
355 
360 
364  double total_time;
365 
369  struct timeval tstart;
370 
379 
384 
390  static bool called;
391 
396  void split_on_whitespace(const std::string & input,
397  std::vector<std::string> & output) const;
398 
407  std::map<std::string, std::unique_ptr<const char[]>> non_temporary_strings;
408 };
409 
410 
411 
412 // ------------------------------------------------------------
413 // PerfData class member functions
414 inline
415 void PerfData::start ()
416 {
417  this->count++;
418  this->called_recursively++;
419  gettimeofday (&(this->tstart), nullptr);
420  this->tstart_incl_sub = this->tstart;
421 }
422 
423 
424 
425 inline
427 {
428  gettimeofday (&(this->tstart), nullptr);
429 }
430 
431 
432 
433 inline
435 {
436  return this->stop_or_pause(false);
437 }
438 
439 
440 inline
441 double PerfData::stop_or_pause(const bool do_stop)
442 {
443  // save the start times, reuse the structure we have rather than create
444  // a new one.
445  const time_t
446  tstart_tv_sec = this->tstart.tv_sec,
447  tstart_tv_usec = this->tstart.tv_usec;
448 
449  gettimeofday (&(this->tstart), nullptr);
450 
451  const double elapsed_time = (static_cast<double>(this->tstart.tv_sec - tstart_tv_sec) +
452  static_cast<double>(this->tstart.tv_usec - tstart_tv_usec)*1.e-6);
453 
454  this->tot_time += elapsed_time;
455 
456  if (do_stop)
457  {
458  const double elapsed_time_incl_sub = (static_cast<double>(this->tstart.tv_sec - this->tstart_incl_sub.tv_sec) +
459  static_cast<double>(this->tstart.tv_usec - this->tstart_incl_sub.tv_usec)*1.e-6);
460 
461  this->tot_time_incl_sub += elapsed_time_incl_sub;
462  }
463 
464  return elapsed_time;
465 }
466 
467 
468 
469 inline
471 {
472  gettimeofday (&(other.tstart), nullptr);
473 
474  const double elapsed_time = (static_cast<double>(other.tstart.tv_sec - this->tstart.tv_sec) +
475  static_cast<double>(other.tstart.tv_usec - this->tstart.tv_usec)*1.e-6);
476  this->tot_time += elapsed_time;
477 
478  other.count++;
479  other.called_recursively++;
480  other.tstart_incl_sub = other.tstart;
481 
482  return elapsed_time;
483 }
484 
485 
486 
487 inline
489 {
490  // stopit is just similar to pause except that it decrements the
491  // recursive call counter
492 
493  this->called_recursively--;
494  return this->stop_or_pause(true);
495 }
496 
497 
498 
499 // ------------------------------------------------------------
500 // PerfLog class inline member functions
501 inline
502 void PerfLog::fast_push (const char * label,
503  const char * header)
504 {
505  if (this->log_events)
506  {
507  // The global perflog stack may not be thread-safe, but if we're
508  // creating threads we should have disabled it already
509  libmesh_assert_equal_to(Threads::active_threads, 1);
510 #ifdef LIBMESH_HAVE_OPENMP
511  // Users might be doing their own non-libMesh threading. We
512  // can't catch every case of that but we can catch OpenMP
513  libmesh_assert_equal_to(omp_get_num_threads(), 1);
514 #endif
515 
516  // Get a reference to the event data to avoid
517  // repeated map lookups
518  PerfData * perf_data = &(log[std::make_pair(header,label)]);
519 
520  if (!log_stack.empty())
521  total_time += log_stack.top()->pause_for(*perf_data);
522  else
523  perf_data->start();
524  log_stack.push(perf_data);
525 
526 #ifdef LIBMESH_HAVE_NVTX_API
527  // This takes a single const char *, and we don't want to waste
528  // time allocating a new string on every push, so for now we'll
529  // drop the header. Maybe we should allocate a new string in
530  // the PerfData?
531  nvtxRangePushA(label);
532 #endif
533  }
534 }
535 
536 
537 
538 inline
539 void PerfLog::fast_pop(const char * libmesh_dbg_var(label),
540  const char * libmesh_dbg_var(header)) noexcept
541 {
542  if (this->log_events)
543  {
544  // The global perflog stack may not be thread-safe, but if we're
545  // creating threads we should have disabled it already
546  libmesh_exceptionless_assert(Threads::active_threads == 1);
547 #ifdef LIBMESH_HAVE_OPENMP
548  // Users might be doing their own non-libMesh threading. We
549  // can't catch every case of that but we can catch OpenMP.
550  libmesh_exceptionless_assert(omp_get_num_threads() == 1);
551 #endif
552 
553 #ifdef LIBMESH_HAVE_NVTX_API
554  nvtxRangePop();
555 #endif
556  // If there's nothing on the stack, then we can't pop anything. Previously we
557  // asserted that the log_stack was not empty, but we should not throw from
558  // this function, so instead just return in that case.
559  if (log_stack.empty())
560  return;
561 
562 #ifndef NDEBUG
563  // In debug mode, we carefully check that before popping the stack,
564  // we have the correct event.
565 
566  // Get pointer to corresponding PerfData, or nullptr if it does not exist.
567  auto it = log.find(std::make_pair(header, label));
568  PerfData * perf_data = (it == log.end()) ? nullptr : &(it->second);
569 
570  // If this event is not found in the log, then it could indicate
571  // that you have a STOP_LOG(a,b) without a corresponding
572  // START_LOG(a,b) call. We don't pop anything from the log_stack
573  // in this case.
574  if (!perf_data)
575  {
576  libMesh::err << "PerfLog can't pop (" << header << ',' << label << ')' << std::endl;
577  libMesh::err << "No such event was found in the log, you may have a mismatched START/STOP_LOG statement." << std::endl;
578  return;
579  }
580 
581  if (perf_data != log_stack.top())
582  {
583  // We've been asked to pop an event which is not at the
584  // top() of the log_stack. The event may be deeper in the
585  // stack instead, so search for it now. If we find it, we'll
586  // pop all intermediate items off the stack (abandoning any
587  // attempt to time them accurately) and let the optimized
588  // code path take over from there.
589  std::deque<PerfData*> tmp_stack;
590 
591  while (!log_stack.empty() && perf_data != log_stack.top())
592  {
593  tmp_stack.push_front(log_stack.top());
594  log_stack.pop();
595  }
596 
597  // If we exhaustively searched the log_stack without finding
598  // the event, we can't do anything else so just put
599  // everything back and return, not doing anything for this
600  // event.
601  if (log_stack.empty())
602  {
603  while (!tmp_stack.empty())
604  {
605  log_stack.push(tmp_stack.back());
606  tmp_stack.pop_back();
607  }
608 
609  return;
610  }
611  }
612 #endif
613 
614  // In optimized mode, we just pop from the top of the stack and
615  // resume timing the next entry.
616  total_time += log_stack.top()->stopit();
617 
618  log_stack.pop();
619 
620  if (!log_stack.empty())
621  log_stack.top()->restart();
622  }
623 }
624 
625 
626 
627 inline
629 {
630  struct timeval tnow;
631 
632  gettimeofday (&tnow, nullptr);
633 
634  const double elapsed_time = (static_cast<double>(tnow.tv_sec - tstart.tv_sec) +
635  static_cast<double>(tnow.tv_usec - tstart.tv_usec)*1.e-6);
636  return elapsed_time;
637 }
638 
639 inline
641 {
642  return total_time;
643 }
644 
645 } // namespace libMesh
646 
647 
648 
649 #endif // LIBMESH_PERFLOG_H
OStreamProxy err
double pause()
Definition: perf_log.h:434
double tot_time_incl_sub
Total time spent in this event, including sub-events.
Definition: perf_log.h:87
void pop(const char *label, const char *header="")
Pop the event label off the stack, resuming any lower event.
Definition: perf_log.C:185
PerfLog(std::string label_name="", const bool log_events=true)
Constructor.
Definition: perf_log.C:55
int called_recursively
Definition: perf_log.h:133
double stopit()
Definition: perf_log.h:488
void disable_summarized_logs()
Tells the PerfLog to print detailed log results (this is the default behavior)
Definition: perf_log.h:205
void fast_push(const char *label, const char *header="")
Push the event label onto the stack, pausing any active event.
Definition: perf_log.h:502
PerfData()
Constructor.
Definition: perf_log.h:68
The PerfData class simply contains the performance data that is recorded for individual events...
Definition: perf_log.h:61
void enable_logging()
Enables performance logging for an active object.
Definition: perf_log.h:189
bool log_events
Flag to optionally disable all logging.
Definition: perf_log.h:354
int gettimeofday(struct timeval *tp, struct timezone *tzp)
double total_time
The total running time for recorded events.
Definition: perf_log.h:364
double get_elapsed_time() const
Definition: perf_log.h:628
PerfData & operator+=(const PerfData &other)
Sums timing results from other.
Definition: perf_log.h:123
double pause_for(PerfData &other)
Definition: perf_log.h:470
void stop_event(const std::string &label, const std::string &header="")
Stop monitoring the event named label.
Definition: perf_log.C:758
log_type log
The actual log.
Definition: perf_log.h:378
The libMesh namespace provides an interface to certain functionality in the library.
void fast_pop(const char *label, const char *header="") noexcept
Pop the event label off the stack, resuming any lower event.
Definition: perf_log.h:539
std::stack< PerfData * > log_stack
A stack to hold the current performance log trace.
Definition: perf_log.h:383
bool summarized_logs_enabled()
Definition: perf_log.h:210
bool summarize_logs
Flag to optionally summarize logs.
Definition: perf_log.h:359
PerfData get_perf_data(const std::string &label, const std::string &header="")
Return the PerfData object associated with a label and header.
Definition: perf_log.C:726
The PerfLog class allows monitoring of specific events.
Definition: perf_log.h:153
void enable_summarized_logs()
Tells the PerfLog to only print log results summarized by header.
Definition: perf_log.h:199
std::string get_perf_info() const
Definition: perf_log.C:360
const std::string label_name
The label for this object.
Definition: perf_log.h:349
double stop_or_pause(const bool do_stop)
Definition: perf_log.h:441
double get_active_time() const
Definition: perf_log.h:640
std::map< std::pair< const char *, const char * >, PerfData > log_type
Typdef for the underlying logging data structure.
Definition: perf_log.h:341
libmesh_assert(ctx)
std::string get_log() const
Definition: perf_log.C:688
std::string get_info_header() const
Definition: perf_log.C:193
void push(const char *label, const char *header="")
Push the event label onto the stack, pausing any active event.
Definition: perf_log.C:147
void restart_event(const std::string &label, const std::string &header="")
Restart monitoring the event.
Definition: perf_log.C:774
bool logging_enabled() const
Definition: perf_log.h:194
~PerfLog()
Destructor.
Definition: perf_log.C:70
void split_on_whitespace(const std::string &input, std::vector< std::string > &output) const
Splits a string on whitespace into a vector of separate strings.
Definition: perf_log.C:782
struct timeval tstart
Structure defining when the event was last started.
Definition: perf_log.h:93
struct timeval tstart_incl_sub
Structure defining when the event was last started, including sub-events.
Definition: perf_log.h:99
void clear()
Clears all the internal data and restores the data structures to a pristine state.
Definition: perf_log.C:78
double tot_time
Total time spent in this event.
Definition: perf_log.h:82
int active_threads
An integer which is set to the number of active threads when we are in a Threads:: parallel operation...
Definition: threads.C:32
void print_log() const
Print the log.
Definition: perf_log.C:714
void pause_event(const std::string &label, const std::string &header="")
Suspend monitoring of the event.
Definition: perf_log.C:766
unsigned int count
The number of times this event has been executed.
Definition: perf_log.h:105
struct timeval tstart
The time we were constructed or last cleared.
Definition: perf_log.h:369
static bool called
Flag indicating if print_log() has been called.
Definition: perf_log.h:390
bool open
Flag indicating if we are currently monitoring this event.
Definition: perf_log.h:112
void disable_logging()
Disables performance logging for an active object.
Definition: perf_log.h:184
std::map< std::string, std::unique_ptr< const char[]> > non_temporary_strings
Workaround to give us fixed pointers to character arrays for every string.
Definition: perf_log.h:407
void start_event(const std::string &label, const std::string &header="")
Start monitoring the event named label.
Definition: perf_log.C:750