LCOV - code coverage report
Current view: top level - src/outputs - Checkpoint.C (source / functions) Hit Total Coverage
Test: idaholab/moose framework: #32971 (54bef8) with base c6cf66 Lines: 104 110 94.5 %
Date: 2026-05-29 20:35:17 Functions: 10 10 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://mooseframework.inl.gov
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : // C POSIX includes
      11             : #include <sstream>
      12             : #include <sys/stat.h>
      13             : 
      14             : #include <system_error>
      15             : 
      16             : // Moose includes
      17             : #include "Checkpoint.h"
      18             : #include "FEProblem.h"
      19             : #include "MooseApp.h"
      20             : #include "MaterialPropertyStorage.h"
      21             : #include "MooseMesh.h"
      22             : #include "MeshMetaDataInterface.h"
      23             : #include "RestartableDataWriter.h"
      24             : 
      25             : #include "libmesh/checkpoint_io.h"
      26             : #include "libmesh/enum_xdr_mode.h"
      27             : #include "libmesh/utility.h"
      28             : 
      29             : using namespace libMesh;
      30             : 
      31             : registerMooseObject("MooseApp", Checkpoint);
      32             : 
      33             : InputParameters
      34       99712 : Checkpoint::validParams()
      35             : {
      36             :   // Get the parameters from the base classes
      37       99712 :   InputParameters params = FileOutput::validParams();
      38             : 
      39      199424 :   params.addClassDescription("Output for MOOSE recovery checkpoint files.");
      40             : 
      41             :   // Typical checkpoint options
      42      398848 :   params.addParam<unsigned int>("num_files", 2, "Number of the restart files to save");
      43      398848 :   params.addParam<std::string>(
      44             :       "suffix",
      45             :       "cp",
      46             :       "This will be appended to the file_base to create the directory name for checkpoint files.");
      47             :   // For checkpoints, set the wall time output interval to defualt of 1 hour (3600 s)
      48      299136 :   params.addParam<Real>(
      49      199424 :       "wall_time_interval", 3600, "The target wall time interval (in seconds) at which to output");
      50             : 
      51             :   // Since it makes the most sense to write checkpoints at the end of time steps,
      52             :   // change the default value of execute_on to TIMESTEP_END
      53       99712 :   ExecFlagEnum & exec_enum = params.set<ExecFlagEnum>("execute_on", true);
      54      199424 :   exec_enum = {EXEC_TIMESTEP_END};
      55             : 
      56       99712 :   return params;
      57       99712 : }
      58             : 
      59       48327 : Checkpoint::Checkpoint(const InputParameters & parameters)
      60             :   : FileOutput(parameters),
      61       48327 :     _num_files(getParam<unsigned int>("num_files")),
      62      193308 :     _suffix(getParam<std::string>("suffix"))
      63             : {
      64             :   // Prevent the checkpoint from executing at any time other than INITIAL,
      65             :   // TIMESTEP_END, and FINAL
      66       48327 :   validateExecuteOn();
      67             : 
      68             :   // The following updates the value of _wall_time_interval if the
      69             :   // '--output-wall-time-interval' command line parameter is used.
      70             :   // If it is not used, _wall_time_interval keeps its current value.
      71             :   // 'The --output-wall-time-interval parameter is necessary for testing
      72             :   // and should only be used in the test suite.
      73       48324 :   Output::setWallTimeIntervalFromCommandLineParam();
      74       48324 : }
      75             : 
      76             : std::string
      77       60446 : Checkpoint::filename()
      78             : {
      79             :   // Get the time step with correct zero padding
      80       60446 :   std::ostringstream output;
      81      120892 :   output << directory() << "/" << std::setw(_padding) << std::setprecision(0) << std::setfill('0')
      82       60446 :          << std::right << timeStep();
      83             : 
      84      120892 :   return output.str();
      85       60446 : }
      86             : 
      87             : std::string
      88       73304 : Checkpoint::directory() const
      89             : {
      90      146608 :   return _file_base + "_" + _suffix;
      91             : }
      92             : 
      93             : bool
      94      303828 : Checkpoint::shouldOutput()
      95             : {
      96             :   // should_output_parent ensures that we output only when _execute_on contains
      97             :   // _current_execute_flag (see Output::shouldOutput), ensuring that we wait
      98             :   // until the end of the timestep to write, preventing the output of an
      99             :   // unconverged solution.
     100      303828 :   const bool should_output_parent = FileOutput::shouldOutput();
     101      303828 :   if (!should_output_parent)
     102      290970 :     return false; // No point in continuing
     103             : 
     104             :   // Check for signal
     105             :   // Reading checkpoint on time step 0 is not supported
     106       12858 :   const bool should_output_signal = (Moose::interrupt_signal_number != 0) && (timeStep() > 0);
     107       12858 :   if (should_output_signal)
     108             :   {
     109          22 :     _console << "Unix signal SIGUSR1 detected. Outputting checkpoint file.\n";
     110             :     // Reset signal number since we output
     111          22 :     Moose::interrupt_signal_number = 0;
     112          22 :     return true;
     113             :   }
     114             : 
     115             :   // Check if enough wall time has elapsed to output
     116       12836 :   const bool should_output_wall_time = _wall_time_since_last_output >= _wall_time_interval;
     117       12836 :   if (should_output_wall_time)
     118         275 :     return true;
     119             : 
     120             :   // Check if the checkpoint should "normally" output, i.e. if it was created
     121             :   // through the input file
     122       12561 :   const bool should_output = (onInterval() || _current_execute_flag == EXEC_FINAL);
     123             : 
     124       12561 :   return should_output;
     125             : }
     126             : 
     127             : void
     128       12858 : Checkpoint::output()
     129             : {
     130             :   // Create the output directory
     131       12858 :   const auto cp_dir = directory();
     132       12858 :   Utility::mkdir(cp_dir.c_str());
     133             : 
     134             :   // Create the output filename
     135       12858 :   const auto current_file = filename();
     136             : 
     137             :   // Create the libMesh Checkpoint_IO object
     138       12858 :   MeshBase & mesh = _es_ptr->get_mesh();
     139       12858 :   CheckpointIO io(mesh, false);
     140             : 
     141             :   // Create checkpoint file structure
     142       12858 :   CheckpointFileNames curr_file_struct;
     143             : 
     144       12858 :   curr_file_struct.checkpoint = current_file + _app.checkpointSuffix();
     145             : 
     146             :   // Write the checkpoint file
     147       12858 :   io.write(curr_file_struct.checkpoint);
     148             : 
     149             :   // Write out meta data if there is any (only on processor zero)
     150       12858 :   if (processor_id() == 0)
     151             :   {
     152       11261 :     const auto paths = _app.writeRestartableMetaData(curr_file_struct.checkpoint);
     153       11261 :     curr_file_struct.restart.insert(curr_file_struct.restart.begin(), paths.begin(), paths.end());
     154       11261 :   }
     155             : 
     156             :   // Write out the backup
     157       12858 :   const auto paths = _app.backup(_app.restartFolderBase(current_file));
     158       12855 :   curr_file_struct.restart.insert(curr_file_struct.restart.begin(), paths.begin(), paths.end());
     159             : 
     160             :   // Remove old checkpoint files
     161       12855 :   updateCheckpointFiles(curr_file_struct);
     162       12855 : }
     163             : 
     164             : void
     165       12855 : Checkpoint::updateCheckpointFiles(CheckpointFileNames file_struct)
     166             : {
     167             :   // It is possible to have already written a checkpoint with the same file
     168             :   // names contained in file_struct. If this is the case, file_struct will
     169             :   // already be stored in _file_names. When this happens, the current state of
     170             :   // the simulation is likely different than the state when the duplicately
     171             :   // named checkpoint was last written. Because of this, we want to go ahead and
     172             :   // rewrite the duplicately named checkpoint, overwritting the files
     173             :   // representing the old state. For accurate bookkeeping, we will delete the
     174             :   // existing instance of file_struct from _file_names and re-append it to the
     175             :   // end of _file_names (to keep the order in which checkpoints are written
     176             :   // accurate).
     177             : 
     178       12855 :   const auto it = std::find(_file_names.begin(), _file_names.end(), file_struct);
     179             :   // file_struct was found in _file_names.
     180             :   // Delete it so it can be re-added as the last element.
     181       12855 :   if (it != _file_names.end())
     182         121 :     _file_names.erase(it);
     183             : 
     184       12855 :   _file_names.push_back(file_struct);
     185             : 
     186             :   // Remove the file and the corresponding directory if it's empty
     187       23138 :   const auto remove_file = [this](const std::filesystem::path & path)
     188             :   {
     189       23138 :     std::error_code err;
     190             : 
     191       23138 :     if (!std::filesystem::remove(path, err))
     192           0 :       mooseWarning("Error during the deletion of checkpoint file\n",
     193           0 :                    std::filesystem::absolute(path),
     194             :                    "\n\n",
     195           0 :                    err.message());
     196             : 
     197       23138 :     const auto dir = path.parent_path();
     198       23138 :     if (std::filesystem::is_empty(dir))
     199       11569 :       if (!std::filesystem::remove(dir, err))
     200           0 :         mooseError("Error during the deletion of checkpoint directory\n",
     201           0 :                    std::filesystem::absolute(dir),
     202             :                    "\n\n",
     203           0 :                    err.message());
     204       23138 :   };
     205             : 
     206             :   // Remove un-wanted files
     207       12855 :   if (_file_names.size() > _num_files)
     208             :   {
     209             :     // Extract the filenames to be removed
     210        6055 :     CheckpointFileNames delete_files = _file_names.front();
     211             : 
     212             :     // Remove these filenames from the list
     213        6055 :     _file_names.pop_front();
     214             : 
     215             :     // Delete restartable data
     216       29193 :     for (const auto & path : delete_files.restart)
     217       23138 :       remove_file(path);
     218             : 
     219             :     // Delete checkpoint files
     220             :     // This file may not exist so don't worry about checking for success
     221        6055 :     if (processor_id() == 0)
     222        5514 :       CheckpointIO::cleanup(delete_files.checkpoint,
     223        5514 :                             _problem_ptr->mesh().isDistributedMesh() ? comm().size() : 1);
     224        6055 :   }
     225       12855 : }
     226             : 
     227             : void
     228       48327 : Checkpoint::validateExecuteOn() const
     229             : {
     230       96654 :   const auto & execute_on = getParam<ExecFlagEnum>("execute_on");
     231      241635 :   const std::set<ExecFlagType> allowed = {EXEC_INITIAL, EXEC_TIMESTEP_END, EXEC_FINAL};
     232       97273 :   for (const auto & value : execute_on)
     233       48949 :     if (!allowed.count(value))
     234           6 :       paramError("execute_on",
     235             :                  "The exec flag ",
     236             :                  value,
     237             :                  " is not allowed. Allowed flags are INITIAL, TIMESTEP_END, and FINAL.");
     238       96651 : }
     239             : 
     240             : std::stringstream
     241       73507 : Checkpoint::checkpointInfo() const
     242             : {
     243             :   static const unsigned int console_field_width = 27;
     244       73507 :   std::stringstream checkpoint_info;
     245             : 
     246       73507 :   std::stringstream interval_info_ss;
     247       73507 :   interval_info_ss << "Every " << std::defaultfloat << _wall_time_interval << " s";
     248       73507 :   const std::string interval_info = interval_info_ss.str();
     249             : 
     250       73507 :   checkpoint_info << std::left << std::setw(console_field_width)
     251       73507 :                   << "  Wall Time Interval:" << interval_info << "\n";
     252             : 
     253       73507 :   const std::string user_info = "Outputs/" + name();
     254             : 
     255       73507 :   checkpoint_info << std::left << std::setw(console_field_width) << "  Checkpoint:" << user_info
     256       73507 :                   << "\n";
     257             : 
     258       73507 :   checkpoint_info << std::left << std::setw(console_field_width)
     259       73507 :                   << "  # Checkpoints Kept:" << std::to_string(_num_files) << "\n";
     260       73507 :   std::string exec_on_values = "";
     261      148043 :   for (const auto & item : _execute_on)
     262       74536 :     exec_on_values += item.name() + " ";
     263       73507 :   checkpoint_info << std::left << std::setw(console_field_width)
     264       73507 :                   << "  Execute On:" << exec_on_values << "\n";
     265             : 
     266      147014 :   return checkpoint_info;
     267       73507 : }

Generated by: LCOV version 1.14