LCOV - code coverage report
Current view: top level - src/utils - DelimitedFileReader.C (source / functions) Hit Total Coverage
Test: idaholab/moose framework: 2bf808 Lines: 168 175 96.0 %
Date: 2025-07-17 01:28:37 Functions: 22 28 78.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //* This file is part of the MOOSE framework
       2             : //* https://mooseframework.inl.gov
       3             : //*
       4             : //* All rights reserved, see COPYRIGHT for full restrictions
       5             : //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
       6             : //*
       7             : //* Licensed under LGPL 2.1, please see LICENSE for details
       8             : //* https://www.gnu.org/licenses/lgpl-2.1.html
       9             : 
      10             : // STL includes
      11             : #include <sstream>
      12             : #include <iomanip>
      13             : #include <iterator>
      14             : #include <type_traits>
      15             : 
      16             : // MOOSE includes
      17             : #include "DelimitedFileReader.h"
      18             : #include "MooseUtils.h"
      19             : #include "MooseError.h"
      20             : #include "pcrecpp.h"
      21             : 
      22             : namespace MooseUtils
      23             : {
      24             : 
      25             : template <typename T>
      26        1148 : DelimitedFileReaderTempl<T>::DelimitedFileReaderTempl(const std::string & filename,
      27             :                                                       const libMesh::Parallel::Communicator * comm)
      28        1148 :   : _filename(filename),
      29        1148 :     _header_flag(HeaderFlag::AUTO),
      30        1148 :     _ignore_empty_lines(true),
      31        1148 :     _communicator(comm),
      32        1148 :     _format_flag(FormatFlag::COLUMNS)
      33             : {
      34        1148 : }
      35             : 
      36             : template <typename T>
      37             : void
      38        1170 : DelimitedFileReaderTempl<T>::read()
      39             : {
      40             :   // Number of columns
      41             :   std::size_t n_cols;
      42             : 
      43             :   // Storage for the raw data
      44        1170 :   std::vector<T> raw;
      45        1170 :   std::size_t size_raw = 0;
      46        1170 :   std::size_t size_offsets = 0;
      47             : 
      48             :   // Read data
      49        1170 :   if (_communicator == nullptr || _communicator->rank() == 0)
      50             :   {
      51             :     // Check the file
      52        1013 :     MooseUtils::checkFileReadable(_filename);
      53             : 
      54             :     // Create the file stream and do nothing if the file is empty
      55        1008 :     std::ifstream stream_data(_filename);
      56        1008 :     if (stream_data.peek() == std::ifstream::traits_type::eof())
      57           5 :       return;
      58             : 
      59             :     // Read/generate the header
      60        1003 :     if (_format_flag == FormatFlag::ROWS)
      61         668 :       readRowData(stream_data, raw);
      62             :     else
      63         335 :       readColumnData(stream_data, raw);
      64             : 
      65             :     // Set the number of columns
      66         999 :     n_cols = _names.size();
      67             : 
      68             :     // Close the stream
      69         999 :     stream_data.close();
      70             : 
      71             :     // Set raw data vector size
      72         999 :     size_raw = raw.size();
      73         999 :     size_offsets = _row_offsets.size();
      74        1008 :   }
      75             : 
      76        1156 :   if (_communicator != nullptr)
      77             :   {
      78             :     // Broadcast column names
      79         719 :     _communicator->broadcast(n_cols);
      80         719 :     _names.resize(n_cols);
      81         719 :     _communicator->broadcast(_names);
      82             : 
      83             :     // Broadcast raw data
      84         719 :     _communicator->broadcast(size_raw);
      85         719 :     raw.resize(size_raw);
      86         719 :     _communicator->broadcast(raw);
      87             : 
      88             :     // Broadcast row offsets
      89         719 :     if (_format_flag == FormatFlag::ROWS)
      90             :     {
      91         376 :       _communicator->broadcast(size_offsets);
      92         376 :       _row_offsets.resize(size_offsets);
      93         376 :       _communicator->broadcast(_row_offsets);
      94             :     }
      95             :   }
      96             : 
      97             :   // Resize the internal storage
      98        1156 :   _data.resize(n_cols);
      99             : 
     100             :   // Process "row" formatted data
     101        1156 :   if (_format_flag == FormatFlag::ROWS)
     102             :   {
     103         746 :     typename std::vector<T>::iterator start = raw.begin();
     104        6791 :     for (std::size_t j = 0; j < n_cols; ++j)
     105             :     {
     106        6045 :       _data[j] = std::vector<T>(start, start + _row_offsets[j]);
     107        6045 :       std::advance(start, _row_offsets[j]);
     108             :     }
     109             :   }
     110             : 
     111             :   // Process "column" formatted data
     112             :   else
     113             :   {
     114             :     mooseAssert(raw.size() % n_cols == 0,
     115             :                 "The raw data is not evenly divisible by the number of columns.");
     116         410 :     const std::size_t n_rows = raw.size() / n_cols;
     117        1820 :     for (std::size_t j = 0; j < n_cols; ++j)
     118             :     {
     119        1410 :       _data[j].resize(n_rows);
     120      267397 :       for (std::size_t i = 0; i < n_rows; ++i)
     121      265987 :         _data[j][i] = raw[i * n_cols + j];
     122             :     }
     123             :   }
     124        1166 : }
     125             : 
     126             : template <typename T>
     127             : std::size_t
     128           0 : DelimitedFileReaderTempl<T>::numEntries() const
     129             : {
     130           0 :   std::size_t n_entries = 0;
     131           0 :   for (std::size_t i = 0; i < _data.size(); ++i)
     132           0 :     n_entries += _data[i].size();
     133             : 
     134           0 :   return n_entries;
     135             : }
     136             : 
     137             : template <typename T>
     138             : const std::vector<std::string> &
     139         439 : DelimitedFileReaderTempl<T>::getNames() const
     140             : {
     141         439 :   return _names;
     142             : }
     143             : 
     144             : template <typename T>
     145             : const std::vector<std::vector<T>> &
     146        1203 : DelimitedFileReaderTempl<T>::getData() const
     147             : {
     148        1203 :   return _data;
     149             : }
     150             : 
     151             : template <>
     152             : const std::vector<Point>
     153         275 : DelimitedFileReaderTempl<double>::getDataAsPoints() const
     154             : {
     155         275 :   std::vector<Point> point_data;
     156             : 
     157        1495 :   for (std::size_t i = 0; i < _data.size(); ++i)
     158             :   {
     159        1229 :     Point point;
     160             : 
     161             :     // Other checks in this class ensure that each data entry has the same number of values;
     162             :     // here we just need to check that each data set has LIBMESH_DIM entries (which we could do by
     163             :     // equivalently checking that the total number of entries is divisibly by LIBMESH_DIM
     164             :     // _and_ one of these data sets has LIBMESH_DIM entries (consider the fringe case where
     165             :     // LIBMESH_DIM is 3, but you accidentally put a point file like
     166             :     //   0 0
     167             :     //   1 0
     168             :     //   2 0
     169             :     // where each point is the same length _and_ the total points is still divisible by 3.
     170             :     // This check here is more exact.
     171        1229 :     if (_data.at(i).size() != LIBMESH_DIM)
     172           9 :       mooseError("Each point in file ", _filename, " must have ", LIBMESH_DIM, " entries");
     173             : 
     174        4880 :     for (std::size_t j = 0; j < LIBMESH_DIM; ++j)
     175        3660 :       point(j) = _data.at(i).at(j);
     176             : 
     177        1220 :     point_data.push_back(point);
     178             :   }
     179             : 
     180         266 :   return point_data;
     181           1 : }
     182             : 
     183             : template <typename T>
     184             : const std::vector<Point>
     185           0 : DelimitedFileReaderTempl<T>::getDataAsPoints() const
     186             : {
     187           0 :   mooseError("Not implemented");
     188             : }
     189             : 
     190             : template <typename T>
     191             : const std::vector<T> &
     192          60 : DelimitedFileReaderTempl<T>::getData(const std::string & name) const
     193             : {
     194          60 :   const auto it = find(_names.begin(), _names.end(), name);
     195          60 :   if (it == _names.end())
     196           1 :     mooseError("Could not find '", name, "' in header of file ", _filename, ".");
     197         118 :   return _data[std::distance(_names.begin(), it)];
     198             : }
     199             : 
     200             : template <typename T>
     201             : const std::vector<T> &
     202        8910 : DelimitedFileReaderTempl<T>::getData(std::size_t index) const
     203             : {
     204        8910 :   if (index >= _data.size())
     205           1 :     mooseError("The supplied index ",
     206             :                index,
     207             :                " is out-of-range for the available data in file '",
     208           1 :                _filename,
     209             :                "' which contains ",
     210           1 :                _data.size(),
     211             :                " items.");
     212        8909 :   return _data[index];
     213             : }
     214             : 
     215             : template <typename T>
     216             : void
     217         335 : DelimitedFileReaderTempl<T>::readColumnData(std::ifstream & stream_data, std::vector<T> & output)
     218             : {
     219             :   // Local storage for the data being read
     220         335 :   std::string line;
     221         335 :   std::vector<T> row;
     222             : 
     223             :   // Keep track of the line number for error reporting
     224         335 :   unsigned int count = 0;
     225             : 
     226             :   // Number of columns expected based on the first row of the data
     227         335 :   std::size_t n_cols = INVALID_SIZE;
     228             : 
     229             :   // Read the lines
     230      101902 :   while (std::getline(stream_data, line))
     231             :   {
     232             :     // Increment line counter and clear any tokenized data
     233      101571 :     count++;
     234      101571 :     row.clear();
     235             : 
     236             :     // Ignore empty and/or comment lines, if applicable
     237      101571 :     if (preprocessLine(line, count))
     238          96 :       continue;
     239             : 
     240             :     // Read header, if the header exists and the column names do not exist.
     241      101474 :     if (_names.empty() && header(line))
     242             :     {
     243         227 :       MooseUtils::tokenize(line, _names, 1, delimiter(line));
     244        1128 :       for (std::string & str : _names)
     245         901 :         str = MooseUtils::trim(str);
     246         227 :       continue;
     247         227 :     }
     248             : 
     249             :     // Separate the row and error if it fails
     250      101247 :     processLine(line, row, count);
     251             : 
     252             :     // Set the number of columns
     253      101245 :     if (n_cols == INVALID_SIZE)
     254         334 :       n_cols = row.size();
     255             : 
     256             :     // Check number of columns
     257      101245 :     if (row.size() != n_cols)
     258           1 :       mooseError("The number of columns read (",
     259           1 :                  row.size(),
     260             :                  ") does not match the number of columns expected (",
     261             :                  n_cols,
     262             :                  ") based on the first row of the file when reading row ",
     263             :                  count,
     264             :                  " in file ",
     265           1 :                  _filename,
     266             :                  ".");
     267             : 
     268             :     // Append data
     269      101244 :     output.insert(output.end(), row.begin(), row.end());
     270             :   }
     271             : 
     272             :   // If the names have not been assigned, create the default names
     273         331 :   if (_names.empty())
     274             :   {
     275         107 :     _names.resize(n_cols);
     276         107 :     int padding = MooseUtils::numDigits(n_cols);
     277         343 :     for (std::size_t i = 0; i < n_cols; ++i)
     278             :     {
     279         236 :       std::stringstream ss;
     280         236 :       ss << "column_" << std::setw(padding) << std::setfill('0') << i;
     281         236 :       _names[i] = ss.str();
     282             :     }
     283             :   }
     284         339 : }
     285             : 
     286             : template <typename T>
     287             : void
     288         668 : DelimitedFileReaderTempl<T>::readRowData(std::ifstream & stream_data, std::vector<T> & output)
     289             : {
     290             :   // Local storage for the data being read
     291         668 :   std::string line;
     292         668 :   std::vector<T> row;
     293         668 :   unsigned int linenum = 0; // line number in file
     294             : 
     295             :   // Clear existing data
     296         668 :   _names.clear();
     297         668 :   _row_offsets.clear();
     298             : 
     299             :   // Read the lines
     300        6448 :   while (std::getline(stream_data, line))
     301             :   {
     302             :     // Increment line counter and clear any tokenized data
     303        5780 :     linenum++;
     304        5780 :     row.clear();
     305             : 
     306             :     // Ignore empty lines
     307        5780 :     if (preprocessLine(line, linenum))
     308          59 :       continue;
     309             : 
     310        5721 :     if (header(line))
     311             :     {
     312          62 :       std::size_t index = line.find_first_of(delimiter(line));
     313          62 :       _names.push_back(line.substr(0, index));
     314          62 :       line = line.substr(index);
     315             :     }
     316             : 
     317             :     // Separate the row and error if it fails
     318        5721 :     processLine(line, row, linenum);
     319             : 
     320             :     // Store row offsets to allow for un-even rows
     321        5721 :     _row_offsets.push_back(row.size());
     322             : 
     323             :     // Append data
     324        5721 :     output.insert(output.end(), row.begin(), row.end());
     325             :   }
     326             : 
     327             :   // Assign row names if not provided via header
     328         668 :   if (_names.empty())
     329             :   {
     330         647 :     int padding = MooseUtils::numDigits(_row_offsets.size());
     331        6306 :     for (std::size_t i = 0; i < _row_offsets.size(); ++i)
     332             :     {
     333        5659 :       std::stringstream ss;
     334        5659 :       ss << "row_" << std::setw(padding) << std::setfill('0') << i;
     335        5659 :       _names.push_back(ss.str());
     336             :     }
     337             :   }
     338         668 : }
     339             : 
     340             : template <typename T>
     341             : bool
     342      107351 : DelimitedFileReaderTempl<T>::preprocessLine(std::string & line, const unsigned int & num)
     343             : {
     344             :   // Handle row comments
     345      107351 :   std::size_t index = _row_comment.empty() ? line.size() : line.find_first_of(_row_comment);
     346      107351 :   line = MooseUtils::trim(line.substr(0, index));
     347             : 
     348             :   // Ignore empty lines
     349      107351 :   if (line.empty())
     350             :   {
     351         156 :     if (_ignore_empty_lines)
     352         155 :       return true;
     353             :     else
     354           1 :       mooseError("Failed to read line ", num, " in file ", _filename, ". The line is empty.");
     355             :   }
     356      107195 :   return false;
     357             : }
     358             : 
     359             : template <typename T>
     360             : void
     361      106968 : DelimitedFileReaderTempl<T>::processLine(const std::string & line,
     362             :                                          std::vector<T> & row,
     363             :                                          const unsigned int & num)
     364             : {
     365      106968 :   std::string line_copy = line;
     366             :   // Convert booleans to numeric
     367             :   if constexpr (!std::is_same_v<T, std::string>)
     368             :   {
     369      106918 :     line_copy = MooseUtils::toLower(line_copy);
     370      106918 :     line_copy = MooseUtils::replaceAll(line_copy, "true", "1");
     371      106918 :     line_copy = MooseUtils::replaceAll(line_copy, "false", "0");
     372             :   }
     373             : 
     374             :   // Separate the row and error if it fails
     375      106968 :   bool status = MooseUtils::tokenizeAndConvert<T>(line_copy, row, delimiter(line));
     376      106968 :   if (!status)
     377           2 :     mooseError("Failed to convert a delimited data into double when reading line ",
     378             :                num,
     379             :                " in file ",
     380           2 :                _filename,
     381             :                ".\n  LINE ",
     382             :                num,
     383             :                ": ",
     384             :                line);
     385      106968 : }
     386             : 
     387             : template <typename T>
     388             : const std::string &
     389      108184 : DelimitedFileReaderTempl<T>::delimiter(const std::string & line)
     390             : {
     391      108184 :   if (_delimiter.empty())
     392             :   {
     393         944 :     if (line.find(",") != std::string::npos)
     394         603 :       _delimiter = ",";
     395         341 :     else if (line.find("\t") != std::string::npos)
     396           1 :       _delimiter = "\t";
     397             :     else
     398         340 :       _delimiter = " ";
     399             :   }
     400      108184 :   return _delimiter;
     401             : }
     402             : 
     403             : template <typename T>
     404             : bool
     405      106267 : DelimitedFileReaderTempl<T>::header(const std::string & line)
     406             : {
     407      106267 :   switch (_header_flag)
     408             :   {
     409      105259 :     case HeaderFlag::OFF:
     410      105259 :       return false;
     411          81 :     case HeaderFlag::ON:
     412          81 :       return true;
     413         927 :     default:
     414             : 
     415             :       // Attempt to convert the line, if it fails assume it is a header
     416         927 :       std::vector<double> row;
     417         927 :       bool contains_alpha = !MooseUtils::tokenizeAndConvert<double>(line, row, delimiter(line));
     418             : 
     419             :       // Based on auto detect set the flag to TRUE|FALSE to short-circuit this check for each line
     420             :       // in the case of row data.
     421         927 :       _header_flag = contains_alpha ? HeaderFlag::ON : HeaderFlag::OFF;
     422         927 :       return contains_alpha;
     423         927 :   }
     424             : }
     425             : 
     426             : template class DelimitedFileReaderTempl<Real>;
     427             : template class DelimitedFileReaderTempl<std::string>;
     428             : } // MooseUtils

Generated by: LCOV version 1.14