https://mooseframework.inl.gov
DelimitedFileReader.C
Go to the documentation of this file.
1 //* This file is part of the MOOSE framework
2 //* https://mooseframework.inl.gov
3 //*
4 //* All rights reserved, see COPYRIGHT for full restrictions
5 //* https://github.com/idaholab/moose/blob/master/COPYRIGHT
6 //*
7 //* Licensed under LGPL 2.1, please see LICENSE for details
8 //* https://www.gnu.org/licenses/lgpl-2.1.html
9 
10 // STL includes
11 #include <sstream>
12 #include <iomanip>
13 #include <iterator>
14 #include <type_traits>
15 
16 // MOOSE includes
17 #include "DelimitedFileReader.h"
18 #include "MooseUtils.h"
19 #include "MooseError.h"
20 #include "pcrecpp.h"
21 
22 namespace MooseUtils
23 {
24 
25 template <typename T>
28  : _filename(filename),
29  _header_flag(HeaderFlag::AUTO),
30  _ignore_empty_lines(true),
31  _communicator(comm),
32  _format_flag(FormatFlag::COLUMNS)
33 {
34 }
35 
36 template <typename T>
37 void
39 {
40  // Number of columns
41  std::size_t n_cols;
42 
43  // Storage for the raw data
44  std::vector<T> raw;
45  std::size_t size_raw = 0;
46  std::size_t size_offsets = 0;
47 
48  // Read data
49  if (_communicator == nullptr || _communicator->rank() == 0)
50  {
51  // Check the file
53 
54  // Create the file stream and do nothing if the file is empty
55  std::ifstream stream_data(_filename);
56  if (stream_data.peek() == std::ifstream::traits_type::eof())
57  return;
58 
59  // Read/generate the header
60  if (_format_flag == FormatFlag::ROWS)
61  readRowData(stream_data, raw);
62  else
63  readColumnData(stream_data, raw);
64 
65  // Set the number of columns
66  n_cols = _names.size();
67 
68  // Close the stream
69  stream_data.close();
70 
71  // Set raw data vector size
72  size_raw = raw.size();
73  size_offsets = _row_offsets.size();
74  }
75 
76  if (_communicator != nullptr)
77  {
78  // Broadcast column names
79  _communicator->broadcast(n_cols);
80  _names.resize(n_cols);
81  _communicator->broadcast(_names);
82 
83  // Broadcast raw data
84  _communicator->broadcast(size_raw);
85  raw.resize(size_raw);
86  _communicator->broadcast(raw);
87 
88  // Broadcast row offsets
89  if (_format_flag == FormatFlag::ROWS)
90  {
91  _communicator->broadcast(size_offsets);
92  _row_offsets.resize(size_offsets);
93  _communicator->broadcast(_row_offsets);
94  }
95  }
96 
97  // Resize the internal storage
98  _data.resize(n_cols);
99 
100  // Process "row" formatted data
101  if (_format_flag == FormatFlag::ROWS)
102  {
103  typename std::vector<T>::iterator start = raw.begin();
104  for (std::size_t j = 0; j < n_cols; ++j)
105  {
106  _data[j] = std::vector<T>(start, start + _row_offsets[j]);
107  std::advance(start, _row_offsets[j]);
108  }
109  }
110 
111  // Process "column" formatted data
112  else
113  {
114  mooseAssert(raw.size() % n_cols == 0,
115  "The raw data is not evenly divisible by the number of columns.");
116  const std::size_t n_rows = raw.size() / n_cols;
117  for (std::size_t j = 0; j < n_cols; ++j)
118  {
119  _data[j].resize(n_rows);
120  for (std::size_t i = 0; i < n_rows; ++i)
121  _data[j][i] = raw[i * n_cols + j];
122  }
123  }
124 }
125 
126 template <typename T>
127 std::size_t
129 {
130  std::size_t n_entries = 0;
131  for (std::size_t i = 0; i < _data.size(); ++i)
132  n_entries += _data[i].size();
133 
134  return n_entries;
135 }
136 
137 template <typename T>
138 const std::vector<std::string> &
140 {
141  return _names;
142 }
143 
144 template <typename T>
145 const std::vector<std::vector<T>> &
147 {
148  return _data;
149 }
150 
151 template <>
152 const std::vector<Point>
154 {
155  std::vector<Point> point_data;
156 
157  for (std::size_t i = 0; i < _data.size(); ++i)
158  {
159  Point point;
160 
161  // Other checks in this class ensure that each data entry has the same number of values;
162  // here we just need to check that each data set has LIBMESH_DIM entries (which we could do by
163  // equivalently checking that the total number of entries is divisibly by LIBMESH_DIM
164  // _and_ one of these data sets has LIBMESH_DIM entries (consider the fringe case where
165  // LIBMESH_DIM is 3, but you accidentally put a point file like
166  // 0 0
167  // 1 0
168  // 2 0
169  // where each point is the same length _and_ the total points is still divisible by 3.
170  // This check here is more exact.
171  if (_data.at(i).size() != LIBMESH_DIM)
172  mooseError("Each point in file ", _filename, " must have ", LIBMESH_DIM, " entries");
173 
174  for (std::size_t j = 0; j < LIBMESH_DIM; ++j)
175  point(j) = _data.at(i).at(j);
176 
177  point_data.push_back(point);
178  }
179 
180  return point_data;
181 }
182 
183 template <typename T>
184 const std::vector<Point>
186 {
187  mooseError("Not implemented");
188 }
189 
190 template <typename T>
191 const std::vector<T> &
192 DelimitedFileReaderTempl<T>::getData(const std::string & name) const
193 {
194  const auto it = find(_names.begin(), _names.end(), name);
195  if (it == _names.end())
196  mooseError("Could not find '", name, "' in header of file ", _filename, ".");
197  return _data[std::distance(_names.begin(), it)];
198 }
199 
200 template <typename T>
201 const std::vector<T> &
202 DelimitedFileReaderTempl<T>::getData(std::size_t index) const
203 {
204  if (index >= _data.size())
205  mooseError("The supplied index ",
206  index,
207  " is out-of-range for the available data in file '",
208  _filename,
209  "' which contains ",
210  _data.size(),
211  " items.");
212  return _data[index];
213 }
214 
215 template <typename T>
216 void
217 DelimitedFileReaderTempl<T>::readColumnData(std::ifstream & stream_data, std::vector<T> & output)
218 {
219  // Local storage for the data being read
220  std::string line;
221  std::vector<T> row;
222 
223  // Keep track of the line number for error reporting
224  unsigned int count = 0;
225 
226  // Number of columns expected based on the first row of the data
227  std::size_t n_cols = INVALID_SIZE;
228 
229  // Read the lines
230  while (std::getline(stream_data, line))
231  {
232  // Increment line counter and clear any tokenized data
233  count++;
234  row.clear();
235 
236  // Ignore empty and/or comment lines, if applicable
237  if (preprocessLine(line, count))
238  continue;
239 
240  // Read header, if the header exists and the column names do not exist.
241  if (_names.empty() && header(line))
242  {
243  MooseUtils::tokenize(line, _names, 1, delimiter(line));
244  for (std::string & str : _names)
245  str = MooseUtils::trim(str);
246  continue;
247  }
248 
249  // Separate the row and error if it fails
250  processLine(line, row, count);
251 
252  // Set the number of columns
253  if (n_cols == INVALID_SIZE)
254  n_cols = row.size();
255 
256  // Check number of columns
257  if (row.size() != n_cols)
258  mooseError("The number of columns read (",
259  row.size(),
260  ") does not match the number of columns expected (",
261  n_cols,
262  ") based on the first row of the file when reading row ",
263  count,
264  " in file ",
265  _filename,
266  ".");
267 
268  // Append data
269  output.insert(output.end(), row.begin(), row.end());
270  }
271 
272  // If the names have not been assigned, create the default names
273  if (_names.empty())
274  {
275  _names.resize(n_cols);
276  int padding = MooseUtils::numDigits(n_cols);
277  for (std::size_t i = 0; i < n_cols; ++i)
278  {
279  std::stringstream ss;
280  ss << "column_" << std::setw(padding) << std::setfill('0') << i;
281  _names[i] = ss.str();
282  }
283  }
284 }
285 
286 template <typename T>
287 void
288 DelimitedFileReaderTempl<T>::readRowData(std::ifstream & stream_data, std::vector<T> & output)
289 {
290  // Local storage for the data being read
291  std::string line;
292  std::vector<T> row;
293  unsigned int linenum = 0; // line number in file
294 
295  // Clear existing data
296  _names.clear();
297  _row_offsets.clear();
298 
299  // Read the lines
300  while (std::getline(stream_data, line))
301  {
302  // Increment line counter and clear any tokenized data
303  linenum++;
304  row.clear();
305 
306  // Ignore empty lines
307  if (preprocessLine(line, linenum))
308  continue;
309 
310  if (header(line))
311  {
312  std::size_t index = line.find_first_of(delimiter(line));
313  _names.push_back(line.substr(0, index));
314  line = line.substr(index);
315  }
316 
317  // Separate the row and error if it fails
318  processLine(line, row, linenum);
319 
320  // Store row offsets to allow for un-even rows
321  _row_offsets.push_back(row.size());
322 
323  // Append data
324  output.insert(output.end(), row.begin(), row.end());
325  }
326 
327  // Assign row names if not provided via header
328  if (_names.empty())
329  {
330  int padding = MooseUtils::numDigits(_row_offsets.size());
331  for (std::size_t i = 0; i < _row_offsets.size(); ++i)
332  {
333  std::stringstream ss;
334  ss << "row_" << std::setw(padding) << std::setfill('0') << i;
335  _names.push_back(ss.str());
336  }
337  }
338 }
339 
340 template <typename T>
341 bool
342 DelimitedFileReaderTempl<T>::preprocessLine(std::string & line, const unsigned int & num)
343 {
344  // Handle row comments
345  std::size_t index = _row_comment.empty() ? line.size() : line.find_first_of(_row_comment);
346  line = MooseUtils::trim(line.substr(0, index));
347 
348  // Ignore empty lines
349  if (line.empty())
350  {
351  if (_ignore_empty_lines)
352  return true;
353  else
354  mooseError("Failed to read line ", num, " in file ", _filename, ". The line is empty.");
355  }
356  return false;
357 }
358 
359 template <typename T>
360 void
362  std::vector<T> & row,
363  const unsigned int & num)
364 {
365  std::string line_copy = line;
366  // Convert booleans to numeric
367  if constexpr (!std::is_same_v<T, std::string>)
368  {
369  line_copy = MooseUtils::toLower(line_copy);
370  line_copy = MooseUtils::replaceAll(line_copy, "true", "1");
371  line_copy = MooseUtils::replaceAll(line_copy, "false", "0");
372  }
373 
374  // Separate the row and error if it fails
375  bool status = MooseUtils::tokenizeAndConvert<T>(line_copy, row, delimiter(line));
376  if (!status)
377  mooseError("Failed to convert a delimited data into double when reading line ",
378  num,
379  " in file ",
380  _filename,
381  ".\n LINE ",
382  num,
383  ": ",
384  line);
385 }
386 
387 template <typename T>
388 const std::string &
389 DelimitedFileReaderTempl<T>::delimiter(const std::string & line)
390 {
391  if (_delimiter.empty())
392  {
393  if (line.find(",") != std::string::npos)
394  _delimiter = ",";
395  else if (line.find("\t") != std::string::npos)
396  _delimiter = "\t";
397  else
398  _delimiter = " ";
399  }
400  return _delimiter;
401 }
402 
403 template <typename T>
404 bool
405 DelimitedFileReaderTempl<T>::header(const std::string & line)
406 {
407  switch (_header_flag)
408  {
409  case HeaderFlag::OFF:
410  return false;
411  case HeaderFlag::ON:
412  return true;
413  default:
414 
415  // Attempt to convert the line, if it fails assume it is a header
416  std::vector<double> row;
417  bool contains_alpha = !MooseUtils::tokenizeAndConvert<double>(line, row, delimiter(line));
418 
419  // Based on auto detect set the flag to TRUE|FALSE to short-circuit this check for each line
420  // in the case of row data.
421  _header_flag = contains_alpha ? HeaderFlag::ON : HeaderFlag::OFF;
422  return contains_alpha;
423  }
424 }
425 
426 template class DelimitedFileReaderTempl<Real>;
428 } // MooseUtils
std::string name(const ElemQuality q)
std::string toLower(const std::string &name)
Convert supplied string to lower case.
void tokenize(const std::string &str, std::vector< T > &elements, unsigned int min_len=1, const std::string &delims="/")
This function will split the passed in string on a set of delimiters appending the substrings to the ...
void mooseError(Args &&... args)
Emit an error message with the given stringified, concatenated args and terminate the application...
Definition: MooseError.h:302
const std::vector< Point > getDataAsPoints() const
Get the data in Point format.
DelimitedFileReaderTempl(const std::string &filename, const libMesh::Parallel::Communicator *comm=nullptr)
void readRowData(std::ifstream &stream_data, std::vector< T > &output)
void readColumnData(std::ifstream &stream_data, std::vector< T > &output)
Read the numeric data as rows or columns into a single vector.
MPI_Status status
bool checkFileReadable(const std::string &filename, bool check_line_endings=false, bool throw_on_unreadable=true, bool check_for_git_lfs_pointer=true)
Checks to see if a file is readable (exists and permissions)
Definition: MooseUtils.C:250
std::string trim(const std::string &str, const std::string &white_space=" \\\)
Standard scripting language trim function.
std::size_t numEntries() const
Get the total number of entries in the file.
void read()
Perform the actual data reading.
bool header(const std::string &line)
Return the header flag, if it is set to AUTO attempt to determine if a header exists in line...
const std::vector< std::vector< T > > & getData() const
Return the rows/columns of data.
charT const * delimiter
Definition: InfixIterator.h:34
const std::string & delimiter(const std::string &line)
Determine the delimiter.
bool preprocessLine(std::string &line, const unsigned int &num)
Check the content of the line and if it should be skipped.
int numDigits(const T &num)
Return the number of digits for a number.
Definition: MooseUtils.h:948
Utility class for reading delimited data (e.g., CSV data).
void processLine(const std::string &line, std::vector< T > &row, const unsigned int &num)
Populate supplied vector with content from line.
const std::vector< std::string > & getNames() const
Return the column/row names.
std::string replaceAll(std::string str, const std::string &from, const std::string &to)
Replaces all occurrences of from in str with to and returns the result.
Definition: MooseUtils.C:141