Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2026 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 :
20 : #ifndef LIBMESH_PERFLOG_H
21 : #define LIBMESH_PERFLOG_H
22 :
23 :
24 : // Local includes
25 : #include "libmesh/libmesh_common.h"
26 :
27 : #include "libmesh/threads.h"
28 :
29 : // C++ includes
30 : #include <cstddef>
31 : #include <map>
32 : #include <stack>
33 : #include <string>
34 : #include <vector>
35 : #include <deque>
36 : #include <memory>
37 : #ifdef LIBMESH_HAVE_SYS_TIME_H
38 : #include <sys/time.h> // gettimeofday() on Unix
39 : #endif
40 : #ifndef LIBMESH_HAVE_GETTIMEOFDAY
41 : #include "libmesh/win_gettimeofday.h" // gettimeofday() on Windows
42 : #endif
43 : #ifdef LIBMESH_HAVE_NVTX_API
44 : #include <nvtx3/nvtx3.hpp>
45 : #endif
46 : #ifdef LIBMESH_HAVE_OPENMP
47 : #include <omp.h>
48 : #endif
49 :
50 : namespace libMesh
51 : {
52 :
53 : /**
54 : * The \p PerfData class simply contains the performance
55 : * data that is recorded for individual events.
56 : *
57 : * \author Benjamin Kirk
58 : * \date 2003
59 : * \brief Data object managed by PerfLog
60 : */
61 : class PerfData
62 : {
63 : public:
64 :
65 : /**
66 : * Constructor. Initializes data to be empty.
67 : */
68 107427 : PerfData () :
69 55877 : tot_time(0.),
70 55877 : tot_time_incl_sub(0.),
71 55877 : tstart(),
72 55877 : tstart_incl_sub(),
73 55877 : count(0),
74 55877 : open(false),
75 107427 : called_recursively(0)
76 33640 : {}
77 :
78 :
79 : /**
80 : * Total time spent in this event.
81 : */
82 : double tot_time;
83 :
84 : /**
85 : * Total time spent in this event, including sub-events.
86 : */
87 : double tot_time_incl_sub;
88 :
89 : /**
90 : * Structure defining when the event
91 : * was last started.
92 : */
93 : struct timeval tstart;
94 :
95 : /**
96 : * Structure defining when the event
97 : * was last started, including sub-events.
98 : */
99 : struct timeval tstart_incl_sub;
100 :
101 : /**
102 : * The number of times this event has
103 : * been executed
104 : */
105 : unsigned int count;
106 :
107 : /**
108 : * Flag indicating if we are currently
109 : * monitoring this event. Should only
110 : * be true while the event is executing.
111 : */
112 : bool open;
113 :
114 : void start ();
115 : void restart ();
116 : double pause ();
117 : double pause_for(PerfData & other);
118 : double stopit ();
119 :
120 : /**
121 : * Sums timing results from \p other
122 : */
123 1588 : PerfData & operator += (const PerfData & other)
124 : {
125 1588 : libmesh_assert(!open);
126 3176 : tot_time += other.tot_time;
127 3176 : tot_time_incl_sub += other.tot_time_incl_sub;
128 3176 : count += other.count;
129 :
130 1588 : return *this;
131 : }
132 :
133 : int called_recursively;
134 :
135 : protected:
136 : double stop_or_pause(const bool do_stop);
137 : };
138 :
139 :
140 :
141 :
142 : /**
143 : * The \p PerfLog class allows monitoring of specific events.
144 : * An event is defined by a unique string that functions as
145 : * a label. Each time the event is executed data are recorded.
146 : * This class is particularly useful for finding performance
147 : * bottlenecks.
148 : *
149 : * \author Benjamin Kirk
150 : * \date 2003
151 : * \brief Responsible for timing and summarizing events.
152 : */
153 : class PerfLog
154 : {
155 : public:
156 :
157 : /**
158 : * Constructor. \p label_name is the name of the object, which
159 : * will bw print in the log to distinguish it from other objects.
160 : * \p log_events is a flag to optionally
161 : * disable logging. You can use this flag to turn off
162 : * logging without touching any other code.
163 : */
164 : PerfLog(std::string label_name="",
165 : const bool log_events=true);
166 :
167 : /**
168 : * Destructor. Calls \p clear() and \p print_log().
169 : */
170 : ~PerfLog();
171 :
172 : /**
173 : * Clears all the internal data and restores the
174 : * data structures to a pristine state. This function
175 : * checks to see if it is currently monitoring any
176 : * events, and if so errors. Be sure you are not
177 : * logging any events when you call this function.
178 : */
179 : void clear();
180 :
181 : /**
182 : * Disables performance logging for an active object.
183 : */
184 147633 : void disable_logging() { log_events = false; }
185 :
186 : /**
187 : * Enables performance logging for an active object.
188 : */
189 147633 : void enable_logging() { log_events = true; }
190 :
191 : /**
192 : * \returns \p true iff performance logging is enabled
193 : */
194 73821 : bool logging_enabled() const { return log_events; }
195 :
196 : /**
197 : * Tells the PerfLog to only print log results summarized by header
198 : */
199 : void enable_summarized_logs() { summarize_logs = true; }
200 :
201 : /**
202 : * Tells the PerfLog to print detailed log results (this is the
203 : * default behavior)
204 : */
205 : void disable_summarized_logs() { summarize_logs = false; }
206 :
207 : /**
208 : * \returns \p true iff log results will be summarized by header
209 : */
210 : bool summarized_logs_enabled() { return summarize_logs; }
211 :
212 : /**
213 : * Push the event \p label onto the stack, pausing any active event.
214 : *
215 : * Note - for speed the PerfLog directly considers the *pointers*
216 : * here. Supply pointers to string literals or other character
217 : * arrays whose lifetime will exceed the lifetime of the PerfLog
218 : * object, not to temporarily allocated arrays.
219 : */
220 : void fast_push (const char * label,
221 : const char * header="");
222 :
223 : /**
224 : * Push the event \p label onto the stack, pausing any active event.
225 : *
226 : * This method will eventually be deprecated. For backwards
227 : * compatibility, the PerfLog must copy the contents of these
228 : * character arrays into strings, which ironically damages the
229 : * performance we are trying to profile. Use fast_push() (with
230 : * compatible long-lived character array data) instead.
231 : */
232 : void push (const char * label,
233 : const char * header="");
234 :
235 : /**
236 : * Push the event \p label onto the stack, pausing any active event.
237 : *
238 : * This method will eventually be deprecated. String manipulation
239 : * is too low performance to use when performance monitoring hot
240 : * spots. Use fast_push() instead.
241 : */
242 : void push (const std::string & label,
243 : const std::string & header="");
244 :
245 : /**
246 : * Pop the event \p label off the stack, resuming any lower event.
247 : *
248 : * This method must be passed the exact same pointers as were passed
249 : * to fast_push, not merely pointers to identical strings.
250 : * This method is called from the PerfItem destructor, so it should
251 : * not throw. We have therefore marked it noexcept as a reminder.
252 : */
253 : void fast_pop (const char * label,
254 : const char * header="") noexcept;
255 :
256 : /**
257 : * Pop the event \p label off the stack, resuming any lower event.
258 : *
259 : * This method will eventually be deprecated. Use fast_pop() (with
260 : * the exact same pointers supplied to fast_push()) instead.
261 : */
262 : void pop (const char * label,
263 : const char * header="");
264 :
265 : /**
266 : * Pop the event \p label off the stack, resuming any lower event.
267 : *
268 : * This method will eventually be deprecated. String manipulation
269 : * is too low performance to use when performance monitoring hot
270 : * spots. Use fast_pop() instead.
271 : */
272 : void pop (const std::string & label,
273 : const std::string & header="");
274 :
275 : /**
276 : * Start monitoring the event named \p label.
277 : */
278 : void start_event(const std::string & label,
279 : const std::string & header="");
280 :
281 : /**
282 : * Stop monitoring the event named \p label.
283 : */
284 : void stop_event(const std::string & label,
285 : const std::string & header="");
286 :
287 : /**
288 : * Suspend monitoring of the event.
289 : */
290 : void pause_event(const std::string & label,
291 : const std::string & header="");
292 :
293 : /**
294 : * Restart monitoring the event.
295 : */
296 : void restart_event(const std::string & label,
297 : const std::string & header="");
298 :
299 : /**
300 : * \returns A string containing:
301 : * (1) Basic machine information (if first call)
302 : * (2) The performance log
303 : */
304 : std::string get_log() const;
305 :
306 : /**
307 : * \returns A string containing ONLY the information header.
308 : */
309 : std::string get_info_header() const;
310 :
311 : /**
312 : * \returns A string containing ONLY the log information
313 : */
314 : std::string get_perf_info() const;
315 :
316 : /**
317 : * Print the log.
318 : */
319 : void print_log() const;
320 :
321 : /**
322 : * \returns The total time spent on this event.
323 : */
324 : double get_elapsed_time() const;
325 :
326 : /**
327 : * \returns The active time
328 : */
329 : double get_active_time() const;
330 :
331 : /**
332 : * Return the PerfData object associated with a label and header.
333 : */
334 : PerfData get_perf_data(const std::string & label, const std::string & header="");
335 :
336 : /**
337 : * Typdef for the underlying logging data structure.
338 : */
339 : typedef std::map<std::pair<const char *,
340 : const char *>,
341 : PerfData> log_type;
342 :
343 : private:
344 :
345 :
346 : /**
347 : * The label for this object.
348 : */
349 : const std::string label_name;
350 :
351 : /**
352 : * Flag to optionally disable all logging.
353 : */
354 : bool log_events;
355 :
356 : /**
357 : * Flag to optionally summarize logs
358 : */
359 : bool summarize_logs;
360 :
361 : /**
362 : * The total running time for recorded events.
363 : */
364 : double total_time;
365 :
366 : /**
367 : * The time we were constructed or last cleared.
368 : */
369 : struct timeval tstart;
370 :
371 : /**
372 : * The actual log.
373 : *
374 : * An unsorted_map would work fine here and would be asymptotically
375 : * faster, but in my tests for our log sizes there was no
376 : * improvement.
377 : */
378 : log_type log;
379 :
380 : /**
381 : * A stack to hold the current performance log trace.
382 : */
383 : std::stack<PerfData*> log_stack;
384 :
385 : /**
386 : * Flag indicating if print_log() has been called.
387 : * This is used to print a header with machine-specific
388 : * data the first time that print_log() is called.
389 : */
390 : static bool called;
391 :
392 : /**
393 : * Splits a string on whitespace into a vector of separate strings. This is used to make the
394 : * LIBMESH_CONFIGURE_INFO a little more manageable.
395 : */
396 : void split_on_whitespace(const std::string & input,
397 : std::vector<std::string> & output) const;
398 :
399 : /**
400 : * Workaround to give us fixed pointers to character arrays for
401 : * every string. Using std::set instead might work: it won't
402 : * invalidate iterators, which I *think* means it doesn't have any
403 : * reason to modify or copy their contents or otherwise invalidate
404 : * their c_str() pointers... but I can't prove it from the standards
405 : * doc, so let's be safe.
406 : */
407 : std::map<std::string, std::unique_ptr<const char[]>> non_temporary_strings;
408 : };
409 :
410 :
411 :
412 : // ------------------------------------------------------------
413 : // PerfData class member functions
414 : inline
415 9826210 : void PerfData::start ()
416 : {
417 19189977 : this->count++;
418 19189977 : this->called_recursively++;
419 19189977 : gettimeofday (&(this->tstart), nullptr);
420 19189977 : this->tstart_incl_sub = this->tstart;
421 9826210 : }
422 :
423 :
424 :
425 : inline
426 40275482 : void PerfData::restart ()
427 : {
428 80526614 : gettimeofday (&(this->tstart), nullptr);
429 40275884 : }
430 :
431 :
432 :
433 : inline
434 : double PerfData::pause ()
435 : {
436 : return this->stop_or_pause(false);
437 : }
438 :
439 :
440 : inline
441 99726243 : double PerfData::stop_or_pause(const bool do_stop)
442 : {
443 : // save the start times, reuse the structure we have rather than create
444 : // a new one.
445 : const time_t
446 99726243 : tstart_tv_sec = this->tstart.tv_sec,
447 99726243 : tstart_tv_usec = this->tstart.tv_usec;
448 :
449 99726243 : gettimeofday (&(this->tstart), nullptr);
450 :
451 199015719 : const double elapsed_time = (static_cast<double>(this->tstart.tv_sec - tstart_tv_sec) +
452 99726243 : static_cast<double>(this->tstart.tv_usec - tstart_tv_usec)*1.e-6);
453 :
454 99726243 : this->tot_time += elapsed_time;
455 :
456 99726243 : if (do_stop)
457 : {
458 199015719 : const double elapsed_time_incl_sub = (static_cast<double>(this->tstart.tv_sec - this->tstart_incl_sub.tv_sec) +
459 99726243 : static_cast<double>(this->tstart.tv_usec - this->tstart_incl_sub.tv_usec)*1.e-6);
460 :
461 99726243 : this->tot_time_incl_sub += elapsed_time_incl_sub;
462 : }
463 :
464 99726243 : return elapsed_time;
465 : }
466 :
467 :
468 :
469 : inline
470 80234020 : double PerfData::pause_for(PerfData & other)
471 : {
472 80234020 : gettimeofday (&(other.tstart), nullptr);
473 :
474 160467638 : const double elapsed_time = (static_cast<double>(other.tstart.tv_sec - this->tstart.tv_sec) +
475 80234020 : static_cast<double>(other.tstart.tv_usec - this->tstart.tv_usec)*1.e-6);
476 80234020 : this->tot_time += elapsed_time;
477 :
478 80234020 : other.count++;
479 80234020 : other.called_recursively++;
480 80234020 : other.tstart_incl_sub = other.tstart;
481 :
482 80234020 : return elapsed_time;
483 : }
484 :
485 :
486 :
487 : inline
488 49665327 : double PerfData::stopit ()
489 : {
490 : // stopit is just similar to pause except that it decrements the
491 : // recursive call counter
492 :
493 99726243 : this->called_recursively--;
494 99726243 : return this->stop_or_pause(true);
495 : }
496 :
497 :
498 :
499 : // ------------------------------------------------------------
500 : // PerfLog class inline member functions
501 : inline
502 131356777 : void PerfLog::fast_push (const char * label,
503 : const char * header)
504 : {
505 131356777 : if (this->log_events)
506 : {
507 : // The global perflog stack may not be thread-safe, but if we're
508 : // creating threads we should have disabled it already
509 49665327 : libmesh_assert_equal_to(Threads::active_threads, 1);
510 : #ifdef LIBMESH_HAVE_OPENMP
511 : // Users might be doing their own non-libMesh threading. We
512 : // can't catch every case of that but we can catch OpenMP
513 49665327 : libmesh_assert_equal_to(omp_get_num_threads(), 1);
514 : #endif
515 :
516 : // Get a reference to the event data to avoid
517 : // repeated map lookups
518 198754651 : PerfData * perf_data = &(log[std::make_pair(header,label)]);
519 :
520 99423997 : if (!log_stack.empty())
521 80234020 : total_time += log_stack.top()->pause_for(*perf_data);
522 : else
523 9826210 : perf_data->start();
524 49665327 : log_stack.push(perf_data);
525 :
526 : #ifdef LIBMESH_HAVE_NVTX_API
527 : // This takes a single const char *, and we don't want to waste
528 : // time allocating a new string on every push, so for now we'll
529 : // drop the header. Maybe we should allocate a new string in
530 : // the PerfData?
531 : nvtxRangePushA(label);
532 : #endif
533 : }
534 131356777 : }
535 :
536 :
537 :
538 : inline
539 131646787 : void PerfLog::fast_pop(const char * libmesh_dbg_var(label),
540 : const char * libmesh_dbg_var(header)) noexcept
541 : {
542 131646787 : if (this->log_events)
543 : {
544 : // The global perflog stack may not be thread-safe, but if we're
545 : // creating threads we should have disabled it already
546 49665327 : libmesh_exceptionless_assert(Threads::active_threads == 1);
547 : #ifdef LIBMESH_HAVE_OPENMP
548 : // Users might be doing their own non-libMesh threading. We
549 : // can't catch every case of that but we can catch OpenMP.
550 49665327 : libmesh_exceptionless_assert(omp_get_num_threads() == 1);
551 : #endif
552 :
553 : #ifdef LIBMESH_HAVE_NVTX_API
554 : nvtxRangePop();
555 : #endif
556 : // If there's nothing on the stack, then we can't pop anything. Previously we
557 : // asserted that the log_stack was not empty, but we should not throw from
558 : // this function, so instead just return in that case.
559 99726243 : if (log_stack.empty())
560 0 : return;
561 :
562 : #ifndef NDEBUG
563 : // In debug mode, we carefully check that before popping the stack,
564 : // we have the correct event.
565 :
566 : // Get pointer to corresponding PerfData, or nullptr if it does not exist.
567 49665327 : auto it = log.find(std::make_pair(header, label));
568 49665327 : PerfData * perf_data = (it == log.end()) ? nullptr : &(it->second);
569 :
570 : // If this event is not found in the log, then it could indicate
571 : // that you have a STOP_LOG(a,b) without a corresponding
572 : // START_LOG(a,b) call. We don't pop anything from the log_stack
573 : // in this case.
574 49665327 : if (!perf_data)
575 : {
576 0 : libMesh::err << "PerfLog can't pop (" << header << ',' << label << ')' << std::endl;
577 0 : libMesh::err << "No such event was found in the log, you may have a mismatched START/STOP_LOG statement." << std::endl;
578 0 : return;
579 : }
580 :
581 49665327 : if (perf_data != log_stack.top())
582 : {
583 : // We've been asked to pop an event which is not at the
584 : // top() of the log_stack. The event may be deeper in the
585 : // stack instead, so search for it now. If we find it, we'll
586 : // pop all intermediate items off the stack (abandoning any
587 : // attempt to time them accurately) and let the optimized
588 : // code path take over from there.
589 0 : std::deque<PerfData*> tmp_stack;
590 :
591 0 : while (!log_stack.empty() && perf_data != log_stack.top())
592 : {
593 0 : tmp_stack.push_front(log_stack.top());
594 0 : log_stack.pop();
595 : }
596 :
597 : // If we exhaustively searched the log_stack without finding
598 : // the event, we can't do anything else so just put
599 : // everything back and return, not doing anything for this
600 : // event.
601 0 : if (log_stack.empty())
602 : {
603 0 : while (!tmp_stack.empty())
604 : {
605 0 : log_stack.push(tmp_stack.back());
606 0 : tmp_stack.pop_back();
607 : }
608 :
609 0 : return;
610 : }
611 : }
612 : #endif
613 :
614 : // In optimized mode, we just pop from the top of the stack and
615 : // resume timing the next entry.
616 99726243 : total_time += log_stack.top()->stopit();
617 :
618 49665327 : log_stack.pop();
619 :
620 99726243 : if (!log_stack.empty())
621 80526614 : log_stack.top()->restart();
622 : }
623 : }
624 :
625 :
626 :
627 : inline
628 : double PerfLog::get_elapsed_time () const
629 : {
630 : struct timeval tnow;
631 :
632 : gettimeofday (&tnow, nullptr);
633 :
634 : const double elapsed_time = (static_cast<double>(tnow.tv_sec - tstart.tv_sec) +
635 : static_cast<double>(tnow.tv_usec - tstart.tv_usec)*1.e-6);
636 : return elapsed_time;
637 : }
638 :
639 : inline
640 : double PerfLog::get_active_time() const
641 : {
642 : return total_time;
643 : }
644 :
645 : } // namespace libMesh
646 :
647 :
648 :
649 : #endif // LIBMESH_PERFLOG_H
|