Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 :
20 : #ifndef LIBMESH_PERFLOG_H
21 : #define LIBMESH_PERFLOG_H
22 :
23 :
24 : // Local includes
25 : #include "libmesh/libmesh_common.h"
26 :
27 : // C++ includes
28 : #include <cstddef>
29 : #include <map>
30 : #include <stack>
31 : #include <string>
32 : #include <vector>
33 : #include <deque>
34 : #include <memory>
35 : #ifdef LIBMESH_HAVE_SYS_TIME_H
36 : #include <sys/time.h> // gettimeofday() on Unix
37 : #endif
38 : #ifndef LIBMESH_HAVE_GETTIMEOFDAY
39 : #include "libmesh/win_gettimeofday.h" // gettimeofday() on Windows
40 : #endif
41 :
42 : namespace libMesh
43 : {
44 :
45 : /**
46 : * The \p PerfData class simply contains the performance
47 : * data that is recorded for individual events.
48 : *
49 : * \author Benjamin Kirk
50 : * \date 2003
51 : * \brief Data object managed by PerfLog
52 : */
53 : class PerfData
54 : {
55 : public:
56 :
57 : /**
58 : * Constructor. Initializes data to be empty.
59 : */
60 101284 : PerfData () :
61 49041 : tot_time(0.),
62 49041 : tot_time_incl_sub(0.),
63 49041 : tstart(),
64 49041 : tstart_incl_sub(),
65 49041 : count(0),
66 49041 : open(false),
67 101284 : called_recursively(0)
68 34252 : {}
69 :
70 :
71 : /**
72 : * Total time spent in this event.
73 : */
74 : double tot_time;
75 :
76 : /**
77 : * Total time spent in this event, including sub-events.
78 : */
79 : double tot_time_incl_sub;
80 :
81 : /**
82 : * Structure defining when the event
83 : * was last started.
84 : */
85 : struct timeval tstart;
86 :
87 : /**
88 : * Structure defining when the event
89 : * was last started, including sub-events.
90 : */
91 : struct timeval tstart_incl_sub;
92 :
93 : /**
94 : * The number of times this event has
95 : * been executed
96 : */
97 : unsigned int count;
98 :
99 : /**
100 : * Flag indicating if we are currently
101 : * monitoring this event. Should only
102 : * be true while the event is executing.
103 : */
104 : bool open;
105 :
106 : void start ();
107 : void restart ();
108 : double pause ();
109 : double pause_for(PerfData & other);
110 : double stopit ();
111 :
112 : /**
113 : * Sums timing results from \p other
114 : */
115 1382 : PerfData & operator += (const PerfData & other)
116 : {
117 1382 : libmesh_assert(!open);
118 2764 : tot_time += other.tot_time;
119 2764 : tot_time_incl_sub += other.tot_time_incl_sub;
120 2764 : count += other.count;
121 :
122 1382 : return *this;
123 : }
124 :
125 : int called_recursively;
126 :
127 : protected:
128 : double stop_or_pause(const bool do_stop);
129 : };
130 :
131 :
132 :
133 :
134 : /**
135 : * The \p PerfLog class allows monitoring of specific events.
136 : * An event is defined by a unique string that functions as
137 : * a label. Each time the event is executed data are recorded.
138 : * This class is particularly useful for finding performance
139 : * bottlenecks.
140 : *
141 : * \author Benjamin Kirk
142 : * \date 2003
143 : * \brief Responsible for timing and summarizing events.
144 : */
145 : class PerfLog
146 : {
147 : public:
148 :
149 : /**
150 : * Constructor. \p label_name is the name of the object, which
151 : * will bw print in the log to distinguish it from other objects.
152 : * \p log_events is a flag to optionally
153 : * disable logging. You can use this flag to turn off
154 : * logging without touching any other code.
155 : */
156 : PerfLog(std::string label_name="",
157 : const bool log_events=true);
158 :
159 : /**
160 : * Destructor. Calls \p clear() and \p print_log().
161 : */
162 : ~PerfLog();
163 :
164 : /**
165 : * Clears all the internal data and restores the
166 : * data structures to a pristine state. This function
167 : * checks to see if it is currently monitoring any
168 : * events, and if so errors. Be sure you are not
169 : * logging any events when you call this function.
170 : */
171 : void clear();
172 :
173 : /**
174 : * Disables performance logging for an active object.
175 : */
176 188012 : void disable_logging() { log_events = false; }
177 :
178 : /**
179 : * Enables performance logging for an active object.
180 : */
181 188012 : void enable_logging() { log_events = true; }
182 :
183 : /**
184 : * \returns \p true iff performance logging is enabled
185 : */
186 94006 : bool logging_enabled() const { return log_events; }
187 :
188 : /**
189 : * Tells the PerfLog to only print log results summarized by header
190 : */
191 : void enable_summarized_logs() { summarize_logs = true; }
192 :
193 : /**
194 : * Tells the PerfLog to print detailed log results (this is the
195 : * default behavior)
196 : */
197 : void disable_summarized_logs() { summarize_logs = false; }
198 :
199 : /**
200 : * \returns \p true iff log results will be summarized by header
201 : */
202 : bool summarized_logs_enabled() { return summarize_logs; }
203 :
204 : /**
205 : * Push the event \p label onto the stack, pausing any active event.
206 : *
207 : * Note - for speed the PerfLog directly considers the *pointers*
208 : * here. Supply pointers to string literals or other character
209 : * arrays whose lifetime will exceed the lifetime of the PerfLog
210 : * object, not to temporarily allocated arrays.
211 : */
212 : void fast_push (const char * label,
213 : const char * header="");
214 :
215 : /**
216 : * Push the event \p label onto the stack, pausing any active event.
217 : *
218 : * This method will eventually be deprecated. For backwards
219 : * compatibility, the PerfLog must copy the contents of these
220 : * character arrays into strings, which ironically damages the
221 : * performance we are trying to profile. Use fast_push() (with
222 : * compatible long-lived character array data) instead.
223 : */
224 : void push (const char * label,
225 : const char * header="");
226 :
227 : /**
228 : * Push the event \p label onto the stack, pausing any active event.
229 : *
230 : * This method will eventually be deprecated. String manipulation
231 : * is too low performance to use when performance monitoring hot
232 : * spots. Use fast_push() instead.
233 : */
234 : void push (const std::string & label,
235 : const std::string & header="");
236 :
237 : /**
238 : * Pop the event \p label off the stack, resuming any lower event.
239 : *
240 : * This method must be passed the exact same pointers as were passed
241 : * to fast_push, not merely pointers to identical strings.
242 : * This method is called from the PerfItem destructor, so it should
243 : * not throw. We have therefore marked it noexcept as a reminder.
244 : */
245 : void fast_pop (const char * label,
246 : const char * header="") noexcept;
247 :
248 : /**
249 : * Pop the event \p label off the stack, resuming any lower event.
250 : *
251 : * This method will eventually be deprecated. Use fast_pop() (with
252 : * the exact same pointers supplied to fast_push()) instead.
253 : */
254 : void pop (const char * label,
255 : const char * header="");
256 :
257 : /**
258 : * Pop the event \p label off the stack, resuming any lower event.
259 : *
260 : * This method will eventually be deprecated. String manipulation
261 : * is too low performance to use when performance monitoring hot
262 : * spots. Use fast_pop() instead.
263 : */
264 : void pop (const std::string & label,
265 : const std::string & header="");
266 :
267 : /**
268 : * Start monitoring the event named \p label.
269 : */
270 : void start_event(const std::string & label,
271 : const std::string & header="");
272 :
273 : /**
274 : * Stop monitoring the event named \p label.
275 : */
276 : void stop_event(const std::string & label,
277 : const std::string & header="");
278 :
279 : /**
280 : * Suspend monitoring of the event.
281 : */
282 : void pause_event(const std::string & label,
283 : const std::string & header="");
284 :
285 : /**
286 : * Restart monitoring the event.
287 : */
288 : void restart_event(const std::string & label,
289 : const std::string & header="");
290 :
291 : /**
292 : * \returns A string containing:
293 : * (1) Basic machine information (if first call)
294 : * (2) The performance log
295 : */
296 : std::string get_log() const;
297 :
298 : /**
299 : * \returns A string containing ONLY the information header.
300 : */
301 : std::string get_info_header() const;
302 :
303 : /**
304 : * \returns A string containing ONLY the log information
305 : */
306 : std::string get_perf_info() const;
307 :
308 : /**
309 : * Print the log.
310 : */
311 : void print_log() const;
312 :
313 : /**
314 : * \returns The total time spent on this event.
315 : */
316 : double get_elapsed_time() const;
317 :
318 : /**
319 : * \returns The active time
320 : */
321 : double get_active_time() const;
322 :
323 : /**
324 : * Return the PerfData object associated with a label and header.
325 : */
326 : PerfData get_perf_data(const std::string & label, const std::string & header="");
327 :
328 : /**
329 : * Typdef for the underlying logging data structure.
330 : */
331 : typedef std::map<std::pair<const char *,
332 : const char *>,
333 : PerfData> log_type;
334 :
335 : private:
336 :
337 :
338 : /**
339 : * The label for this object.
340 : */
341 : const std::string label_name;
342 :
343 : /**
344 : * Flag to optionally disable all logging.
345 : */
346 : bool log_events;
347 :
348 : /**
349 : * Flag to optionally summarize logs
350 : */
351 : bool summarize_logs;
352 :
353 : /**
354 : * The total running time for recorded events.
355 : */
356 : double total_time;
357 :
358 : /**
359 : * The time we were constructed or last cleared.
360 : */
361 : struct timeval tstart;
362 :
363 : /**
364 : * The actual log.
365 : *
366 : * An unsorted_map would work fine here and would be asymptotically
367 : * faster, but in my tests for our log sizes there was no
368 : * improvement.
369 : */
370 : log_type log;
371 :
372 : /**
373 : * A stack to hold the current performance log trace.
374 : */
375 : std::stack<PerfData*> log_stack;
376 :
377 : /**
378 : * Flag indicating if print_log() has been called.
379 : * This is used to print a header with machine-specific
380 : * data the first time that print_log() is called.
381 : */
382 : static bool called;
383 :
384 : /**
385 : * Splits a string on whitespace into a vector of separate strings. This is used to make the
386 : * LIBMESH_CONFIGURE_INFO a little more manageable.
387 : */
388 : void split_on_whitespace(const std::string & input,
389 : std::vector<std::string> & output) const;
390 :
391 : /**
392 : * Workaround to give us fixed pointers to character arrays for
393 : * every string. Using std::set instead might work: it won't
394 : * invalidate iterators, which I *think* means it doesn't have any
395 : * reason to modify or copy their contents or otherwise invalidate
396 : * their c_str() pointers... but I can't prove it from the standards
397 : * doc, so let's be safe.
398 : */
399 : std::map<std::string, std::unique_ptr<const char[]>> non_temporary_strings;
400 : };
401 :
402 :
403 :
404 : // ------------------------------------------------------------
405 : // PerfData class member functions
406 : inline
407 10474892 : void PerfData::start ()
408 : {
409 20511493 : this->count++;
410 20511493 : this->called_recursively++;
411 20511493 : gettimeofday (&(this->tstart), nullptr);
412 20511493 : this->tstart_incl_sub = this->tstart;
413 10474892 : }
414 :
415 :
416 :
417 : inline
418 39936528 : void PerfData::restart ()
419 : {
420 79813444 : gettimeofday (&(this->tstart), nullptr);
421 39936930 : }
422 :
423 :
424 :
425 : inline
426 : double PerfData::pause ()
427 : {
428 : return this->stop_or_pause(false);
429 : }
430 :
431 :
432 : inline
433 100327375 : double PerfData::stop_or_pause(const bool do_stop)
434 : {
435 : // save the start times, reuse the structure we have rather than create
436 : // a new one.
437 : const time_t
438 100327375 : tstart_tv_sec = this->tstart.tv_sec,
439 100327375 : tstart_tv_usec = this->tstart.tv_usec;
440 :
441 100327375 : gettimeofday (&(this->tstart), nullptr);
442 :
443 200235541 : const double elapsed_time = (static_cast<double>(this->tstart.tv_sec - tstart_tv_sec) +
444 100327375 : static_cast<double>(this->tstart.tv_usec - tstart_tv_usec)*1.e-6);
445 :
446 100327375 : this->tot_time += elapsed_time;
447 :
448 100327375 : if (do_stop)
449 : {
450 200235541 : const double elapsed_time_incl_sub = (static_cast<double>(this->tstart.tv_sec - this->tstart_incl_sub.tv_sec) +
451 100327375 : static_cast<double>(this->tstart.tv_usec - this->tstart_incl_sub.tv_usec)*1.e-6);
452 :
453 100327375 : this->tot_time_incl_sub += elapsed_time_incl_sub;
454 : }
455 :
456 100327375 : return elapsed_time;
457 : }
458 :
459 :
460 :
461 : inline
462 79583617 : double PerfData::pause_for(PerfData & other)
463 : {
464 79583617 : gettimeofday (&(other.tstart), nullptr);
465 :
466 159166832 : const double elapsed_time = (static_cast<double>(other.tstart.tv_sec - this->tstart.tv_sec) +
467 79583617 : static_cast<double>(other.tstart.tv_usec - this->tstart.tv_usec)*1.e-6);
468 79583617 : this->tot_time += elapsed_time;
469 :
470 79583617 : other.count++;
471 79583617 : other.called_recursively++;
472 79583617 : other.tstart_incl_sub = other.tstart;
473 :
474 79583617 : return elapsed_time;
475 : }
476 :
477 :
478 :
479 : inline
480 49992613 : double PerfData::stopit ()
481 : {
482 : // stopit is just similar to pause except that it decrements the
483 : // recursive call counter
484 :
485 100327375 : this->called_recursively--;
486 100327375 : return this->stop_or_pause(true);
487 : }
488 :
489 :
490 :
491 : // ------------------------------------------------------------
492 : // PerfLog class inline member functions
493 : inline
494 145214634 : void PerfLog::fast_push (const char * label,
495 : const char * header)
496 : {
497 145214634 : if (this->log_events)
498 : {
499 : // Get a reference to the event data to avoid
500 : // repeated map lookups
501 200080336 : PerfData * perf_data = &(log[std::make_pair(header,label)]);
502 :
503 100095110 : if (!log_stack.empty())
504 79583617 : total_time += log_stack.top()->pause_for(*perf_data);
505 : else
506 10474892 : perf_data->start();
507 49992613 : log_stack.push(perf_data);
508 : }
509 145214634 : }
510 :
511 :
512 :
513 : inline
514 145383727 : void PerfLog::fast_pop(const char * libmesh_dbg_var(label),
515 : const char * libmesh_dbg_var(header)) noexcept
516 : {
517 145383727 : if (this->log_events)
518 : {
519 : // If there's nothing on the stack, then we can't pop anything. Previously we
520 : // asserted that the log_stack was not empty, but we should not throw from
521 : // this function, so instead just return in that case.
522 100327375 : if (log_stack.empty())
523 0 : return;
524 :
525 : #ifndef NDEBUG
526 : // In debug mode, we carefully check that before popping the stack,
527 : // we have the correct event.
528 :
529 : // Get pointer to corresponding PerfData, or nullptr if it does not exist.
530 49992613 : auto it = log.find(std::make_pair(header, label));
531 49992613 : PerfData * perf_data = (it == log.end()) ? nullptr : &(it->second);
532 :
533 : // If this event is not found in the log, then it could indicate
534 : // that you have a STOP_LOG(a,b) without a corresponding
535 : // START_LOG(a,b) call. We don't pop anything from the log_stack
536 : // in this case.
537 49992613 : if (!perf_data)
538 : {
539 0 : libMesh::err << "PerfLog can't pop (" << header << ',' << label << ')' << std::endl;
540 0 : libMesh::err << "No such event was found in the log, you may have a mismatched START/STOP_LOG statement." << std::endl;
541 0 : return;
542 : }
543 :
544 49992613 : if (perf_data != log_stack.top())
545 : {
546 : // We've been asked to pop an event which is not at the
547 : // top() of the log_stack. The event may be deeper in the
548 : // stack instead, so search for it now. If we find it, we'll
549 : // pop all intermediate items off the stack (abandoning any
550 : // attempt to time them accurately) and let the optimized
551 : // code path take over from there.
552 0 : std::deque<PerfData*> tmp_stack;
553 :
554 0 : while (!log_stack.empty() && perf_data != log_stack.top())
555 : {
556 0 : tmp_stack.push_front(log_stack.top());
557 0 : log_stack.pop();
558 : }
559 :
560 : // If we exhaustively searched the log_stack without finding
561 : // the event, we can't do anything else so just put
562 : // everything back and return, not doing anything for this
563 : // event.
564 0 : if (log_stack.empty())
565 : {
566 0 : while (!tmp_stack.empty())
567 : {
568 0 : log_stack.push(tmp_stack.back());
569 0 : tmp_stack.pop_back();
570 : }
571 :
572 0 : return;
573 : }
574 : }
575 : #endif
576 :
577 : // In optimized mode, we just pop from the top of the stack and
578 : // resume timing the next entry.
579 100327375 : total_time += log_stack.top()->stopit();
580 :
581 49992613 : log_stack.pop();
582 :
583 100327375 : if (!log_stack.empty())
584 79813444 : log_stack.top()->restart();
585 : }
586 : }
587 :
588 :
589 :
590 : inline
591 : double PerfLog::get_elapsed_time () const
592 : {
593 : struct timeval tnow;
594 :
595 : gettimeofday (&tnow, nullptr);
596 :
597 : const double elapsed_time = (static_cast<double>(tnow.tv_sec - tstart.tv_sec) +
598 : static_cast<double>(tnow.tv_usec - tstart.tv_usec)*1.e-6);
599 : return elapsed_time;
600 : }
601 :
602 : inline
603 : double PerfLog::get_active_time() const
604 : {
605 : return total_time;
606 : }
607 :
608 : } // namespace libMesh
609 :
610 :
611 :
612 : #endif // LIBMESH_PERFLOG_H
|