Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 : #include "libmesh/libmesh_common.h"
20 : #include "libmesh/parallel.h"
21 :
22 :
23 : // Local Include
24 : #include "libmesh/libmesh_version.h"
25 : #include "libmesh/system.h"
26 : #include "libmesh/mesh_base.h"
27 : #include "libmesh/elem.h"
28 : #include "libmesh/xdr_cxx.h"
29 : #include "libmesh/numeric_vector.h"
30 : #include "libmesh/dof_map.h"
31 :
32 :
33 : // C++ Includes
34 : #include <memory>
35 : #include <numeric> // for std::partial_sum
36 : #include <set>
37 :
38 :
39 : // Anonymous namespace for implementation details.
40 : namespace {
41 :
42 : using libMesh::DofObject;
43 : using libMesh::Number;
44 : using libMesh::cast_int;
45 :
46 : // Comments:
47 : // ---------
48 : // - The max_io_blksize governs how many nodes or elements will be
49 : // treated as a single block when performing parallel IO on large
50 : // systems.
51 : // - This parameter only loosely affects the size of the actual IO
52 : // buffer as this depends on the number of components a given
53 : // variable has for the nodes/elements in the block.
54 : // - When reading/writing each processor uses an ID map which is
55 : // 3*io_blksize*sizeof(dof_id_type) bytes long, so with unsigned int
56 : // and // io_blksize=256000 we would expect that buffer alone to be
57 : // ~3Mb.
58 : // - In general, an increase in max_io_blksize should increase the
59 : // efficiency of large parallel read/writes by reducing the number
60 : // of MPI messages at the expense of memory.
61 : // - If the library exhausts memory during IO you might reduce this
62 : // parameter.
63 :
64 : const std::size_t max_io_blksize = 256000;
65 :
66 : /**
67 : *
68 : */
69 : template <typename InValType>
70 : class ThreadedIO
71 : {
72 : private:
73 : libMesh::Xdr & _io;
74 : std::vector<InValType> & _data;
75 :
76 : public:
77 45944 : ThreadedIO (libMesh::Xdr & io, std::vector<InValType> & data) :
78 37588 : _io(io),
79 43562 : _data(data)
80 4178 : {}
81 :
82 41766 : void operator()()
83 : {
84 41766 : if (_data.empty()) return;
85 20905 : _io.data_stream (_data.data(), cast_int<unsigned int>(_data.size()));
86 : }
87 : };
88 : }
89 :
90 :
91 : namespace libMesh
92 : {
93 :
94 :
95 : // ------------------------------------------------------------
96 : // System class implementation
97 11213 : void System::read_header (Xdr & io,
98 : std::string_view version,
99 : const bool read_header_in,
100 : const bool read_additional_data,
101 : const bool read_legacy_format)
102 : {
103 : // This method implements the input of a
104 : // System object, embedded in the output of
105 : // an EquationSystems<T_sys>. This warrants some
106 : // documentation. The output file essentially
107 : // consists of 5 sections:
108 : //
109 : // for this system
110 : //
111 : // 5.) The number of variables in the system (unsigned int)
112 : //
113 : // for each variable in the system
114 : //
115 : // 6.) The name of the variable (string)
116 : //
117 : // 6.1.) Variable subdomains
118 : //
119 : // 7.) Combined in an FEType:
120 : // - The approximation order(s) of the variable
121 : // (Order Enum, cast to int/s)
122 : // - The finite element family/ies of the variable
123 : // (FEFamily Enum, cast to int/s)
124 : //
125 : // end variable loop
126 : //
127 : // 8.) The number of additional vectors (unsigned int),
128 : //
129 : // for each additional vector in the system object
130 : //
131 : // 9.) the name of the additional vector (string)
132 : //
133 : // end system
134 322 : libmesh_assert (io.reading());
135 :
136 : // Possibly clear data structures and start from scratch.
137 11213 : if (read_header_in)
138 849 : this->clear ();
139 :
140 : // Figure out if we need to read infinite element information.
141 : // This will be true if the version string contains " with infinite elements"
142 : const bool read_ifem_info =
143 11245 : Utility::contains(version, " with infinite elements") ||
144 11245 : libMesh::on_command_line ("--read-ifem-systems");
145 :
146 :
147 : {
148 : // 5.)
149 : // Read the number of variables in the system
150 11213 : unsigned int nv=0;
151 11535 : if (this->processor_id() == 0)
152 1771 : io.data (nv);
153 11213 : this->comm().broadcast(nv);
154 :
155 11213 : _written_var_indices.clear();
156 11213 : _written_var_indices.resize(nv, 0);
157 :
158 22852 : for (unsigned int var=0; var<nv; var++)
159 : {
160 : // 6.)
161 : // Read the name of the var-th variable
162 668 : std::string var_name;
163 11973 : if (this->processor_id() == 0)
164 1843 : io.data (var_name);
165 11639 : this->comm().broadcast(var_name);
166 :
167 : // 6.1.)
168 668 : std::set<subdomain_id_type> domains;
169 11639 : if (io.version() >= LIBMESH_VERSION_ID(0,7,2))
170 : {
171 668 : std::vector<subdomain_id_type> domain_array;
172 11973 : if (this->processor_id() == 0)
173 1843 : io.data (domain_array);
174 11639 : for (const auto & id : domain_array)
175 0 : domains.insert(id);
176 : }
177 11639 : this->comm().broadcast(domains);
178 :
179 : // 7.)
180 : // Read the approximation order(s) of the var-th variable
181 11639 : int order=0;
182 11973 : if (this->processor_id() == 0)
183 1843 : io.data (order);
184 11639 : this->comm().broadcast(order);
185 :
186 :
187 : // do the same for infinite element radial_order
188 11639 : int rad_order=0;
189 11639 : if (read_ifem_info)
190 : {
191 968 : if (this->processor_id() == 0)
192 332 : io.data(rad_order);
193 650 : this->comm().broadcast(rad_order);
194 : }
195 :
196 : // Read the finite element type of the var-th variable
197 11639 : int fam=0;
198 11973 : if (this->processor_id() == 0)
199 1843 : io.data (fam);
200 11639 : this->comm().broadcast(fam);
201 334 : FEType type;
202 11639 : type.order = static_cast<Order>(order);
203 11639 : type.family = static_cast<FEFamily>(fam);
204 :
205 : // Check for incompatibilities. The shape function indexing was
206 : // changed for the monomial and xyz finite element families to
207 : // simplify extension to arbitrary p. The consequence is that
208 : // old restart files will not be read correctly. This is expected
209 : // to be an unlikely occurrence, but catch it anyway.
210 11639 : if (read_legacy_format)
211 0 : if ((type.family == MONOMIAL || type.family == XYZ) &&
212 0 : ((type.order.get_order() > 2 && this->get_mesh().mesh_dimension() == 2) ||
213 0 : (type.order.get_order() > 1 && this->get_mesh().mesh_dimension() == 3)))
214 : {
215 0 : libmesh_here();
216 0 : libMesh::out << "*****************************************************************\n"
217 0 : << "* WARNING: reading a potentially incompatible restart file!!! *\n"
218 0 : << "* contact [email protected] for more details *\n"
219 0 : << "*****************************************************************"
220 0 : << std::endl;
221 : }
222 :
223 : // Read additional information for infinite elements
224 11639 : int radial_fam=0;
225 11639 : int i_map=0;
226 11639 : if (read_ifem_info)
227 : {
228 968 : if (this->processor_id() == 0)
229 332 : io.data (radial_fam);
230 650 : this->comm().broadcast(radial_fam);
231 968 : if (this->processor_id() == 0)
232 332 : io.data (i_map);
233 650 : this->comm().broadcast(i_map);
234 : }
235 :
236 : #ifdef LIBMESH_ENABLE_INFINITE_ELEMENTS
237 :
238 682 : type.radial_order = static_cast<Order>(rad_order);
239 682 : type.radial_family = static_cast<FEFamily>(radial_fam);
240 682 : type.inf_map = static_cast<InfMapType>(i_map);
241 :
242 : #endif
243 :
244 11639 : if (read_header_in)
245 : {
246 991 : if (domains.empty())
247 991 : _written_var_indices[var] = this->add_variable (var_name, type);
248 : else
249 0 : _written_var_indices[var] = this->add_variable (var_name, type, &domains);
250 : }
251 : else
252 10648 : _written_var_indices[var] = this->variable_number(var_name);
253 : }
254 : }
255 :
256 : // 8.)
257 : // Read the number of additional vectors.
258 11213 : unsigned int nvecs=0;
259 11535 : if (this->processor_id() == 0)
260 1771 : io.data (nvecs);
261 11213 : this->comm().broadcast(nvecs);
262 :
263 : // If nvecs > 0, this means that write_additional_data
264 : // was true when this file was written. We will need to
265 : // make use of this fact later.
266 11213 : this->_additional_data_written = nvecs;
267 :
268 83313 : for (unsigned int vec=0; vec<nvecs; vec++)
269 : {
270 : // 9.)
271 : // Read the name of the vec-th additional vector
272 4120 : std::string vec_name;
273 74160 : if (this->processor_id() == 0)
274 11330 : io.data (vec_name);
275 72100 : this->comm().broadcast(vec_name);
276 72100 : if (io.version() >= LIBMESH_VERSION_ID(1,7,0))
277 : {
278 72100 : int vec_projection = 0;
279 74160 : if (this->processor_id() == 0)
280 11330 : io.data (vec_projection);
281 72100 : this->comm().broadcast(vec_projection);
282 : int vec_type;
283 74160 : if (this->processor_id() == 0)
284 11330 : io.data (vec_type);
285 72100 : this->comm().broadcast(vec_type);
286 :
287 72100 : if (read_additional_data)
288 74160 : this->add_vector(vec_name, bool(vec_projection), ParallelType(vec_type));
289 : }
290 0 : else if (read_additional_data)
291 : // Systems now can handle adding post-initialization vectors
292 : // libmesh_assert(this->_can_add_vectors);
293 : // Some systems may have added their own vectors already
294 : // libmesh_assert_equal_to (this->_vectors.count(vec_name), 0);
295 0 : this->add_vector(vec_name);
296 : }
297 11213 : }
298 :
299 :
300 :
301 : #ifdef LIBMESH_ENABLE_DEPRECATED
302 0 : void System::read_legacy_data (Xdr & io,
303 : const bool read_additional_data)
304 : {
305 : libmesh_deprecated();
306 :
307 : // This method implements the output of the vectors
308 : // contained in this System object, embedded in the
309 : // output of an EquationSystems<T_sys>.
310 : //
311 : // 10.) The global solution vector, re-ordered to be node-major
312 : // (More on this later.)
313 : //
314 : // for each additional vector in the object
315 : //
316 : // 11.) The global additional vector, re-ordered to be
317 : // node-major (More on this later.)
318 0 : libmesh_assert (io.reading());
319 :
320 : // directly-read and reordered buffers, declared here for reuse
321 : // without heap juggling.
322 0 : std::vector<Number> global_vector;
323 0 : std::vector<Number> reordered_vector;
324 :
325 : auto reorder_vector_into =
326 0 : [this, &global_vector, &reordered_vector]
327 0 : (NumericVector<Number> & vec)
328 : {
329 0 : this->comm().broadcast(global_vector);
330 :
331 : // If we have been reading multiple vectors, they should all be
332 : // the same size.
333 0 : libmesh_assert (reordered_vector.empty() ||
334 : reordered_vector.size() == global_vector.size());
335 :
336 : // Remember that the stored vector is node-major.
337 : // We need to put it into whatever application-specific
338 : // ordering we may have using the dof_map.
339 0 : reordered_vector.resize(global_vector.size());
340 :
341 : //libMesh::out << "global_vector.size()=" << global_vector.size() << std::endl;
342 : //libMesh::out << "this->n_dofs()=" << this->n_dofs() << std::endl;
343 :
344 0 : libmesh_assert_equal_to (global_vector.size(), this->n_dofs());
345 :
346 0 : dof_id_type cnt=0;
347 :
348 0 : const unsigned int sys = this->number();
349 : const unsigned int nv = cast_int<unsigned int>
350 0 : (this->_written_var_indices.size());
351 0 : libmesh_assert_less_equal (nv, this->n_vars());
352 :
353 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
354 : {
355 0 : const unsigned int var = _written_var_indices[data_var];
356 :
357 : // First reorder the nodal DOF values
358 0 : for (auto & node : this->get_mesh().node_ptr_range())
359 0 : for (auto index : make_range(node->n_comp(sys,var)))
360 : {
361 0 : libmesh_assert_not_equal_to (node->dof_number(sys, var, index),
362 : DofObject::invalid_id);
363 :
364 0 : libmesh_assert_less (cnt, global_vector.size());
365 :
366 0 : reordered_vector[node->dof_number(sys, var, index)] =
367 0 : global_vector[cnt++];
368 0 : }
369 :
370 : // Then reorder the element DOF values
371 0 : for (auto & elem : this->get_mesh().active_element_ptr_range())
372 0 : for (auto index : make_range(elem->n_comp(sys,var)))
373 : {
374 0 : libmesh_assert_not_equal_to (elem->dof_number(sys, var, index),
375 : DofObject::invalid_id);
376 :
377 0 : libmesh_assert_less (cnt, global_vector.size());
378 :
379 0 : reordered_vector[elem->dof_number(sys, var, index)] =
380 0 : global_vector[cnt++];
381 0 : }
382 : }
383 :
384 : // use the overloaded operator=(std::vector) to assign the values
385 0 : vec = reordered_vector;
386 0 : };
387 :
388 : // 10.)
389 : // Read and set the solution vector
390 0 : if (this->processor_id() == 0)
391 0 : io.data (global_vector);
392 0 : reorder_vector_into(*(this->solution));
393 :
394 : // For each additional vector, simply go through the list.
395 : // ONLY attempt to do this IF additional data was actually
396 : // written to the file for this system (controlled by the
397 : // _additional_data_written flag).
398 0 : if (this->_additional_data_written)
399 : {
400 0 : const std::size_t nvecs = this->_vectors.size();
401 :
402 : // If the number of additional vectors written is non-zero, and
403 : // the number of additional vectors we have is non-zero, and
404 : // they don't match, then something is wrong and we can't be
405 : // sure we're reading data into the correct places.
406 0 : if (read_additional_data && nvecs &&
407 0 : nvecs != this->_additional_data_written)
408 0 : libmesh_error_msg
409 : ("Additional vectors in file do not match system");
410 :
411 0 : auto pos = this->_vectors.begin();
412 :
413 0 : for (std::size_t i = 0; i != this->_additional_data_written; ++i)
414 : {
415 : // 11.)
416 : // Read the values of the vec-th additional vector.
417 : // Prior do _not_ clear, but fill with zero, since the
418 : // additional vectors _have_ to have the same size
419 : // as the solution vector
420 0 : std::fill (global_vector.begin(), global_vector.end(), libMesh::zero);
421 :
422 0 : if (this->processor_id() == 0)
423 0 : io.data (global_vector);
424 :
425 : // If read_additional_data==true and we have additional vectors,
426 : // then we will keep this vector data; otherwise we are going to
427 : // throw it away.
428 0 : if (read_additional_data && nvecs)
429 : {
430 0 : std::fill (reordered_vector.begin(),
431 : reordered_vector.end(),
432 : libMesh::zero);
433 :
434 0 : reorder_vector_into(*(pos->second));
435 : }
436 :
437 : // If we've got vectors then we need to be iterating through
438 : // those too
439 0 : if (pos != this->_vectors.end())
440 0 : ++pos;
441 : }
442 : } // end if (_additional_data_written)
443 0 : }
444 : #endif
445 :
446 :
447 :
448 : template <typename InValType>
449 0 : void System::read_parallel_data (Xdr & io,
450 : const bool read_additional_data)
451 : {
452 : /**
453 : * This method implements the output of the vectors
454 : * contained in this System object, embedded in the
455 : * output of an EquationSystems<T_sys>.
456 : *
457 : * 9.) The global solution vector, re-ordered to be node-major
458 : * (More on this later.)
459 : *
460 : * for each additional vector in the object
461 : *
462 : * 10.) The global additional vector, re-ordered to be
463 : * node-major (More on this later.)
464 : *
465 : * Note that the actual IO is handled through the Xdr class
466 : * (to be renamed later?) which provides a uniform interface to
467 : * both the XDR (eXternal Data Representation) interface and standard
468 : * ASCII output. Thus this one section of code will read XDR or ASCII
469 : * files with no changes.
470 : */
471 : // PerfLog pl("IO Performance",false);
472 : // pl.push("read_parallel_data");
473 0 : dof_id_type total_read_size = 0;
474 :
475 0 : libmesh_assert (io.reading());
476 0 : libmesh_assert (io.is_open());
477 :
478 : // build the ordered nodes and element maps.
479 : // when writing/reading parallel files we need to iterate
480 : // over our nodes/elements in order of increasing global id().
481 : // however, this is not guaranteed to be ordering we obtain
482 : // by using the node_iterators/element_iterators directly.
483 : // so build a set, sorted by id(), that provides the ordering.
484 : // further, for memory economy build the set but then transfer
485 : // its contents to vectors, which will be sorted.
486 0 : std::vector<const DofObject *> ordered_nodes, ordered_elements;
487 : {
488 : std::set<const DofObject *, CompareDofObjectsByID>
489 0 : ordered_nodes_set (this->get_mesh().local_nodes_begin(),
490 0 : this->get_mesh().local_nodes_end());
491 :
492 0 : ordered_nodes.insert(ordered_nodes.end(),
493 : ordered_nodes_set.begin(),
494 : ordered_nodes_set.end());
495 : }
496 : {
497 : std::set<const DofObject *, CompareDofObjectsByID>
498 0 : ordered_elements_set (this->get_mesh().local_elements_begin(),
499 0 : this->get_mesh().local_elements_end());
500 :
501 0 : ordered_elements.insert(ordered_elements.end(),
502 : ordered_elements_set.begin(),
503 : ordered_elements_set.end());
504 : }
505 :
506 : // std::vector<Number> io_buffer;
507 0 : std::vector<InValType> io_buffer;
508 :
509 : // 9.)
510 : //
511 : // Actually read the solution components
512 : // for the ith system to disk
513 0 : io.data(io_buffer);
514 :
515 0 : total_read_size += cast_int<dof_id_type>(io_buffer.size());
516 :
517 0 : const unsigned int sys_num = this->number();
518 0 : const unsigned int nv = cast_int<unsigned int>
519 0 : (this->_written_var_indices.size());
520 0 : libmesh_assert_less_equal (nv, this->n_vars());
521 :
522 0 : dof_id_type cnt=0;
523 :
524 : // Loop over each non-SCALAR variable and each node, and read out the value.
525 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
526 : {
527 0 : const unsigned int var = _written_var_indices[data_var];
528 0 : if (this->variable(var).type().family != SCALAR)
529 : {
530 : // First read the node DOF values
531 0 : for (const auto & node : ordered_nodes)
532 0 : for (auto comp : make_range(node->n_comp(sys_num,var)))
533 : {
534 0 : libmesh_assert_not_equal_to (node->dof_number(sys_num, var, comp),
535 : DofObject::invalid_id);
536 0 : libmesh_assert_less (cnt, io_buffer.size());
537 0 : this->solution->set(node->dof_number(sys_num, var, comp), io_buffer[cnt++]);
538 : }
539 :
540 : // Then read the element DOF values
541 0 : for (const auto & elem : ordered_elements)
542 0 : for (auto comp : make_range(elem->n_comp(sys_num,var)))
543 : {
544 0 : libmesh_assert_not_equal_to (elem->dof_number(sys_num, var, comp),
545 : DofObject::invalid_id);
546 0 : libmesh_assert_less (cnt, io_buffer.size());
547 0 : this->solution->set(elem->dof_number(sys_num, var, comp), io_buffer[cnt++]);
548 : }
549 : }
550 : }
551 :
552 : // Finally, read the SCALAR variables on the last processor
553 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
554 : {
555 0 : const unsigned int var = _written_var_indices[data_var];
556 0 : if (this->variable(var).type().family == SCALAR)
557 : {
558 0 : if (this->processor_id() == (this->n_processors()-1))
559 : {
560 0 : const DofMap & dof_map = this->get_dof_map();
561 0 : std::vector<dof_id_type> SCALAR_dofs;
562 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
563 :
564 0 : for (auto dof : SCALAR_dofs)
565 0 : this->solution->set(dof, io_buffer[cnt++]);
566 : }
567 : }
568 : }
569 :
570 : // And we're done setting solution entries
571 0 : this->solution->close();
572 :
573 : // For each additional vector, simply go through the list.
574 : // ONLY attempt to do this IF additional data was actually
575 : // written to the file for this system (controlled by the
576 : // _additional_data_written flag).
577 0 : if (this->_additional_data_written)
578 : {
579 0 : const std::size_t nvecs = this->_vectors.size();
580 :
581 : // If the number of additional vectors written is non-zero, and
582 : // the number of additional vectors we have is non-zero, and
583 : // they don't match, then something is wrong and we can't be
584 : // sure we're reading data into the correct places.
585 0 : if (read_additional_data && nvecs &&
586 0 : nvecs != this->_additional_data_written)
587 0 : libmesh_error_msg
588 : ("Additional vectors in file do not match system");
589 :
590 0 : auto pos = _vectors.begin();
591 :
592 0 : for (std::size_t i = 0; i != this->_additional_data_written; ++i)
593 : {
594 0 : cnt=0;
595 0 : io_buffer.clear();
596 :
597 : // 10.)
598 : //
599 : // Actually read the additional vector components
600 : // for the ith system from disk
601 0 : io.data(io_buffer);
602 :
603 0 : total_read_size += cast_int<dof_id_type>(io_buffer.size());
604 :
605 : // If read_additional_data==true and we have additional vectors,
606 : // then we will keep this vector data; otherwise we are going to
607 : // throw it away.
608 0 : if (read_additional_data && nvecs)
609 : {
610 : // Loop over each non-SCALAR variable and each node, and read out the value.
611 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
612 : {
613 0 : const unsigned int var = _written_var_indices[data_var];
614 0 : if (this->variable(var).type().family != SCALAR)
615 : {
616 : // First read the node DOF values
617 0 : for (const auto & node : ordered_nodes)
618 0 : for (auto comp : make_range(node->n_comp(sys_num,var)))
619 : {
620 0 : libmesh_assert_not_equal_to (node->dof_number(sys_num, var, comp),
621 : DofObject::invalid_id);
622 0 : libmesh_assert_less (cnt, io_buffer.size());
623 0 : pos->second->set(node->dof_number(sys_num, var, comp), io_buffer[cnt++]);
624 : }
625 :
626 : // Then read the element DOF values
627 0 : for (const auto & elem : ordered_elements)
628 0 : for (auto comp : make_range(elem->n_comp(sys_num,var)))
629 : {
630 0 : libmesh_assert_not_equal_to (elem->dof_number(sys_num, var, comp),
631 : DofObject::invalid_id);
632 0 : libmesh_assert_less (cnt, io_buffer.size());
633 0 : pos->second->set(elem->dof_number(sys_num, var, comp), io_buffer[cnt++]);
634 : }
635 : }
636 : }
637 :
638 : // Finally, read the SCALAR variables on the last processor
639 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
640 : {
641 0 : const unsigned int var = _written_var_indices[data_var];
642 0 : if (this->variable(var).type().family == SCALAR)
643 : {
644 0 : if (this->processor_id() == (this->n_processors()-1))
645 : {
646 0 : const DofMap & dof_map = this->get_dof_map();
647 0 : std::vector<dof_id_type> SCALAR_dofs;
648 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
649 :
650 0 : for (auto dof : SCALAR_dofs)
651 0 : pos->second->set(dof, io_buffer[cnt++]);
652 : }
653 : }
654 : }
655 :
656 : // And we're done setting entries for this variable
657 0 : pos->second->close();
658 : }
659 :
660 : // If we've got vectors then we need to be iterating through
661 : // those too
662 0 : if (pos != this->_vectors.end())
663 0 : ++pos;
664 : }
665 : }
666 :
667 : // const Real
668 : // dt = pl.get_elapsed_time(),
669 : // rate = total_read_size*sizeof(Number)/dt;
670 :
671 : // libMesh::err << "Read " << total_read_size << " \"Number\" values\n"
672 : // << " Elapsed time = " << dt << '\n'
673 : // << " Rate = " << rate/1.e6 << "(MB/sec)\n\n";
674 :
675 : // pl.pop("read_parallel_data");
676 0 : }
677 :
678 :
679 : template <typename InValType>
680 10929 : void System::read_serialized_data (Xdr & io,
681 : const bool read_additional_data)
682 : {
683 : // This method implements the input of the vectors
684 : // contained in this System object, embedded in the
685 : // output of an EquationSystems<T_sys>.
686 : //
687 : // 10.) The global solution vector, re-ordered to be node-major
688 : // (More on this later.)
689 : //
690 : // for each additional vector in the object
691 : //
692 : // 11.) The global additional vector, re-ordered to be
693 : // node-major (More on this later.)
694 314 : parallel_object_only();
695 628 : std::string comment;
696 :
697 : // PerfLog pl("IO Performance",false);
698 : // pl.push("read_serialized_data");
699 : // std::size_t total_read_size = 0;
700 :
701 : // 10.)
702 : // Read the global solution vector
703 : {
704 : // total_read_size +=
705 10929 : this->read_serialized_vector<InValType>(io, this->solution.get());
706 :
707 : // get the comment
708 11243 : if (this->processor_id() == 0)
709 1723 : io.comment (comment);
710 : }
711 :
712 : // 11.)
713 : // Only read additional vectors if data is available, and only use
714 : // that data to fill our vectors if the user requested it.
715 10929 : if (this->_additional_data_written)
716 : {
717 304 : const std::size_t nvecs = this->_vectors.size();
718 :
719 : // If the number of additional vectors written is non-zero, and
720 : // the number of additional vectors we have is non-zero, and
721 : // they don't match, then we can't read additional vectors
722 : // and be sure we're reading data into the correct places.
723 10640 : if (read_additional_data && nvecs &&
724 10640 : nvecs != this->_additional_data_written)
725 0 : libmesh_error_msg
726 : ("Additional vectors in file do not match system");
727 :
728 304 : auto pos = _vectors.begin();
729 :
730 82740 : for (std::size_t i = 0; i != this->_additional_data_written; ++i)
731 : {
732 : // Read data, but only put it into a vector if we've been
733 : // asked to and if we have a corresponding vector to read.
734 :
735 : // total_read_size +=
736 8240 : this->read_serialized_vector<InValType>
737 138020 : (io, (read_additional_data && nvecs) ? pos->second.get() : nullptr);
738 :
739 : // get the comment
740 74160 : if (this->processor_id() == 0)
741 11330 : io.comment (comment);
742 :
743 :
744 : // If we've got vectors then we need to be iterating through
745 : // those too
746 72100 : if (pos != this->_vectors.end())
747 2060 : ++pos;
748 : }
749 : }
750 :
751 : // const Real
752 : // dt = pl.get_elapsed_time(),
753 : // rate = total_read_size*sizeof(Number)/dt;
754 :
755 : // libMesh::out << "Read " << total_read_size << " \"Number\" values\n"
756 : // << " Elapsed time = " << dt << '\n'
757 : // << " Rate = " << rate/1.e6 << "(MB/sec)\n\n";
758 :
759 : // pl.pop("read_serialized_data");
760 10929 : }
761 :
762 :
763 :
764 : template <typename iterator_type, typename InValType>
765 166626 : std::size_t System::read_serialized_blocked_dof_objects (const dof_id_type n_objs,
766 : const iterator_type begin,
767 : const iterator_type end,
768 : const InValType ,
769 : Xdr & io,
770 : const std::vector<NumericVector<Number> *> & vecs,
771 : const unsigned int var_to_read) const
772 : {
773 : //-------------------------------------------------------
774 : // General order: (IO format 0.7.4 & greater)
775 : //
776 : // for (objects ...)
777 : // for (vecs ....)
778 : // for (vars ....)
779 : // for (comps ...)
780 : //
781 : // where objects are nodes or elements, sorted to be
782 : // partition independent,
783 : // vecs are one or more *identically distributed* solution
784 : // coefficient vectors, vars are one or more variables
785 : // to write, and comps are all the components for said
786 : // vars on the object.
787 :
788 : // variables to read. Unless specified otherwise, defaults to _written_var_indices.
789 171390 : std::vector<unsigned int> vars_to_read (_written_var_indices);
790 :
791 166626 : if (var_to_read != libMesh::invalid_uint)
792 : {
793 0 : vars_to_read.clear();
794 0 : vars_to_read.push_back(var_to_read);
795 : }
796 :
797 : const unsigned int
798 9528 : sys_num = this->number(),
799 9528 : num_vecs = cast_int<unsigned int>(vecs.size());
800 : const dof_id_type
801 171406 : io_blksize = cast_int<dof_id_type>(std::min(max_io_blksize, static_cast<std::size_t>(n_objs))),
802 166626 : num_blks = cast_int<unsigned int>(std::ceil(static_cast<double>(n_objs)/
803 157098 : static_cast<double>(io_blksize)));
804 :
805 4764 : libmesh_assert_less_equal (_written_var_indices.size(), this->n_vars());
806 :
807 4764 : std::size_t n_read_values=0;
808 :
809 176154 : std::vector<std::vector<dof_id_type>> xfer_ids(num_blks); // The global IDs and # of components for the local objects in all blocks
810 176154 : std::vector<std::vector<Number>> recv_vals(num_blks); // The raw values for the local objects in all blocks
811 : std::vector<Parallel::Request>
812 180918 : id_requests(num_blks), val_requests(num_blks);
813 : std::vector<Parallel::MessageTag>
814 180918 : id_tags(num_blks), val_tags(num_blks);
815 :
816 : // ------------------------------------------------------
817 : // First pass - count the number of objects in each block
818 : // traverse all the objects and figure out which block they
819 : // will ultimately live in.
820 : std::vector<std::size_t>
821 171390 : xfer_ids_size (num_blks,0),
822 171390 : recv_vals_size (num_blks,0);
823 :
824 :
825 32608015 : for (iterator_type it=begin; it!=end; ++it)
826 : {
827 : const dof_id_type
828 32441389 : id = (*it)->id(),
829 32441389 : block = id/io_blksize;
830 :
831 2948988 : libmesh_assert_less (block, num_blks);
832 :
833 35390377 : xfer_ids_size[block] += 2; // for each object, we send its id, as well as the total number of components for all variables
834 :
835 2948988 : dof_id_type n_comp_tot=0;
836 65131574 : for (const auto & var : vars_to_read)
837 32690185 : n_comp_tot += (*it)->n_comp(sys_num, var); // for each variable, we will receive the nonzero components
838 :
839 35390377 : recv_vals_size[block] += n_comp_tot*num_vecs;
840 : }
841 :
842 : // knowing the recv_vals_size[block] for each processor allows
843 : // us to sum them and find the global size for each block.
844 171390 : std::vector<std::size_t> tot_vals_size(recv_vals_size);
845 166626 : this->comm().sum (tot_vals_size);
846 :
847 :
848 : //------------------------------------------
849 : // Collect the ids & number of values needed
850 : // for all local objects, binning them into
851 : // 'blocks' that will be sent to processor 0
852 333252 : for (dof_id_type blk=0; blk<num_blks; blk++)
853 : {
854 : // Each processor should build up its transfer buffers for its
855 : // local objects in [first_object,last_object).
856 : const dof_id_type
857 166626 : first_object = blk*io_blksize,
858 166626 : last_object = std::min(cast_int<dof_id_type>((blk+1)*io_blksize), n_objs);
859 :
860 : // convenience
861 9544 : std::vector<dof_id_type> & ids (xfer_ids[blk]);
862 9528 : std::vector<Number> & vals (recv_vals[blk]);
863 :
864 : // we now know the number of values we will store for each block,
865 : // so we can do efficient preallocation
866 171390 : ids.clear(); /**/ ids.reserve (xfer_ids_size[blk]);
867 171390 : vals.resize(recv_vals_size[blk]);
868 :
869 : #ifdef DEBUG
870 9528 : std::unordered_set<dof_id_type> seen_ids;
871 : #endif
872 :
873 171390 : if (recv_vals_size[blk] != 0) // only if there are nonzero values to receive
874 25987249 : for (iterator_type it=begin; it!=end; ++it)
875 : {
876 14263421 : dof_id_type id = (*it)->id();
877 : #ifdef DEBUG
878 : // Any renumbering tricks should not have given us any
879 : // duplicate ids.
880 1296252 : libmesh_assert(!seen_ids.count(id));
881 1296252 : seen_ids.insert(id);
882 : #endif
883 :
884 14263421 : if ((id >= first_object) && // object in [first_object,last_object)
885 1296252 : (id < last_object))
886 : {
887 14263421 : ids.push_back(id);
888 :
889 1296252 : unsigned int n_comp_tot=0;
890 :
891 28822090 : for (const auto & var : vars_to_read)
892 14558669 : n_comp_tot += (*it)->n_comp(sys_num, var);
893 :
894 14263421 : ids.push_back (n_comp_tot*num_vecs);
895 : }
896 : }
897 :
898 : #ifdef LIBMESH_HAVE_MPI
899 166626 : id_tags[blk] = this->comm().get_unique_tag(100*num_blks + blk);
900 166626 : val_tags[blk] = this->comm().get_unique_tag(200*num_blks + blk);
901 :
902 : // nonblocking send the data for this block
903 171390 : this->comm().send (0, ids, id_requests[blk], id_tags[blk]);
904 :
905 : // Go ahead and post the receive too
906 171390 : this->comm().receive (0, vals, val_requests[blk], val_tags[blk]);
907 : #endif
908 : }
909 :
910 : //---------------------------------------------------
911 : // Here processor 0 will read and distribute the data.
912 : // We have to do this block-wise to ensure that we
913 : // do not exhaust memory on processor 0.
914 :
915 : // give these variables scope outside the block to avoid reallocation
916 180918 : std::vector<std::vector<dof_id_type>> recv_ids (this->n_processors());
917 180918 : std::vector<std::vector<Number>> send_vals (this->n_processors());
918 180918 : std::vector<Parallel::Request> reply_requests (this->n_processors());
919 9528 : std::vector<unsigned int> obj_val_offsets; // map to traverse entry-wise rather than processor-wise
920 9528 : std::vector<Number> input_vals; // The input buffer for the current block
921 4764 : std::vector<InValType> input_vals_tmp; // The input buffer for the current block
922 :
923 333252 : for (dof_id_type blk=0; blk<num_blks; blk++)
924 : {
925 : // Each processor should build up its transfer buffers for its
926 : // local objects in [first_object,last_object).
927 : const dof_id_type
928 166626 : first_object = blk*io_blksize,
929 166626 : last_object = std::min(cast_int<dof_id_type>((blk+1)*io_blksize), n_objs),
930 166626 : n_objects_blk = last_object - first_object;
931 :
932 : // Processor 0 has a special job. It needs to gather the requested indices
933 : // in [first_object,last_object) from all processors, read the data from
934 : // disk, and reply
935 171390 : if (this->processor_id() == 0)
936 : {
937 : // we know the input buffer size for this block and can begin reading it now
938 28584 : input_vals.resize(tot_vals_size[blk]);
939 28584 : input_vals_tmp.resize(tot_vals_size[blk]);
940 :
941 : // a ThreadedIO object to perform asynchronous file IO
942 2382 : ThreadedIO<InValType> threaded_io(io, input_vals_tmp);
943 28584 : Threads::Thread async_io(threaded_io);
944 :
945 : // offset array. this will define where each object's values
946 : // map into the actual input_vals buffer. this must get
947 : // 0-initialized because 0-component objects are not actually sent
948 26202 : obj_val_offsets.resize (n_objects_blk); /**/ std::fill (obj_val_offsets.begin(), obj_val_offsets.end(), 0);
949 28584 : recv_vals_size.resize(this->n_processors()); // reuse this to count how many values are going to each processor
950 :
951 : #ifndef NDEBUG
952 2382 : std::size_t n_vals_blk = 0;
953 : #endif
954 :
955 : // loop over all processors and process their index request
956 192828 : for (processor_id_type comm_step=0, tnp=this->n_processors(); comm_step != tnp; ++comm_step)
957 : {
958 : #ifdef LIBMESH_HAVE_MPI
959 : // blocking receive indices for this block, imposing no particular order on processor
960 171390 : Parallel::Status id_status (this->comm().probe (Parallel::any_source, id_tags[blk]));
961 166626 : std::vector<dof_id_type> & ids (recv_ids[id_status.source()]);
962 9528 : std::size_t & n_vals_proc (recv_vals_size[id_status.source()]);
963 171390 : this->comm().receive (id_status.source(), ids, id_tags[blk]);
964 : #else
965 : // straight copy without MPI
966 : std::vector<dof_id_type> & ids (recv_ids[0]);
967 : std::size_t & n_vals_proc (recv_vals_size[0]);
968 : ids = xfer_ids[blk];
969 : #endif
970 :
971 166626 : n_vals_proc = 0;
972 :
973 : // note its possible we didn't receive values for objects in
974 : // this block if they have no components allocated.
975 14430047 : for (std::size_t idx=0, sz=ids.size(); idx<sz; idx+=2)
976 : {
977 : const dof_id_type
978 14263421 : local_idx = ids[idx+0]-first_object,
979 14263421 : n_vals_tot_allvecs = ids[idx+1];
980 :
981 1296252 : libmesh_assert_less (local_idx, n_objects_blk);
982 :
983 14263421 : obj_val_offsets[local_idx] = n_vals_tot_allvecs;
984 14263421 : n_vals_proc += n_vals_tot_allvecs;
985 : }
986 :
987 : #ifndef NDEBUG
988 4764 : n_vals_blk += n_vals_proc;
989 : #endif
990 : }
991 :
992 : // We need the offsets into the input_vals vector for each object.
993 : // fortunately, this is simply the partial sum of the total number
994 : // of components for each object
995 23820 : std::partial_sum(obj_val_offsets.begin(), obj_val_offsets.end(),
996 : obj_val_offsets.begin());
997 :
998 2382 : libmesh_assert_equal_to (n_vals_blk, obj_val_offsets.back());
999 2382 : libmesh_assert_equal_to (n_vals_blk, tot_vals_size[blk]);
1000 :
1001 : // Wait for read completion
1002 26202 : async_io.join();
1003 : // now copy the values back to the main vector for transfer
1004 15619455 : for (auto i_val : index_range(input_vals))
1005 18408431 : input_vals[i_val] = input_vals_tmp[i_val];
1006 :
1007 26202 : n_read_values += input_vals.size();
1008 :
1009 : // pack data replies for each processor
1010 192828 : for (auto proc : make_range(this->n_processors()))
1011 : {
1012 166626 : const std::vector<dof_id_type> & ids (recv_ids[proc]);
1013 9528 : std::vector<Number> & vals (send_vals[proc]);
1014 9528 : const std::size_t & n_vals_proc (recv_vals_size[proc]);
1015 :
1016 166626 : vals.clear(); /**/ vals.reserve(n_vals_proc);
1017 :
1018 14430047 : for (std::size_t idx=0, sz=ids.size(); idx<sz; idx+=2)
1019 : {
1020 : const dof_id_type
1021 14263421 : local_idx = ids[idx+0]-first_object,
1022 15559673 : n_vals_tot_allvecs = ids[idx+1];
1023 :
1024 1296252 : std::vector<Number>::const_iterator in_vals(input_vals.begin());
1025 14263421 : if (local_idx != 0)
1026 15545368 : std::advance (in_vals, obj_val_offsets[local_idx-1]);
1027 :
1028 29856674 : for (unsigned int val=0; val<n_vals_tot_allvecs; val++, ++in_vals)
1029 : {
1030 1407589 : libmesh_assert (in_vals != input_vals.end());
1031 : //libMesh::out << "*in_vals=" << *in_vals << '\n';
1032 15593253 : vals.push_back(*in_vals);
1033 : }
1034 : }
1035 :
1036 : #ifdef LIBMESH_HAVE_MPI
1037 : // send the relevant values to this processor
1038 171390 : this->comm().send (proc, vals, reply_requests[proc], val_tags[blk]);
1039 : #else
1040 : recv_vals[blk] = vals;
1041 : #endif
1042 : }
1043 : } // end processor 0 read/reply
1044 :
1045 : // all processors complete the (already posted) read for this block
1046 : {
1047 9544 : Parallel::wait (val_requests[blk]);
1048 :
1049 9528 : const std::vector<Number> & vals (recv_vals[blk]);
1050 9528 : std::vector<Number>::const_iterator val_it(vals.begin());
1051 :
1052 166626 : if (!recv_vals[blk].empty()) // nonzero values to receive
1053 25987249 : for (iterator_type it=begin; it!=end; ++it)
1054 27230590 : if (((*it)->id() >= first_object) && // object in [first_object,last_object)
1055 14263421 : ((*it)->id() < last_object))
1056 : // unpack & set the values
1057 30321254 : for (auto & vec : vecs)
1058 34524354 : for (const auto & var : vars_to_read)
1059 : {
1060 18466521 : const unsigned int n_comp = (*it)->n_comp(sys_num, var);
1061 :
1062 34059774 : for (unsigned int comp=0; comp<n_comp; comp++, ++val_it)
1063 : {
1064 15593253 : const dof_id_type dof_index = (*it)->dof_number (sys_num, var, comp);
1065 1407589 : libmesh_assert (val_it != vals.end());
1066 15593253 : if (vec)
1067 : {
1068 1407589 : libmesh_assert_greater_equal (dof_index, vec->first_local_index());
1069 1407589 : libmesh_assert_less (dof_index, vec->last_local_index());
1070 : //libMesh::out << "dof_index, *val_it = \t" << dof_index << ", " << *val_it << '\n';
1071 15593253 : vec->set (dof_index, *val_it);
1072 : }
1073 : }
1074 : }
1075 : }
1076 :
1077 : // processor 0 needs to make sure all replies have been handed off
1078 171390 : if (this->processor_id () == 0)
1079 26202 : Parallel::wait(reply_requests);
1080 : }
1081 :
1082 166626 : Parallel::wait(id_requests);
1083 :
1084 171390 : return n_read_values;
1085 314196 : }
1086 :
1087 :
1088 :
1089 840 : unsigned int System::read_SCALAR_dofs (const unsigned int var,
1090 : Xdr & io,
1091 : NumericVector<Number> * vec) const
1092 : {
1093 24 : unsigned int n_assigned_vals = 0; // the number of values assigned, this will be returned.
1094 :
1095 : // Processor 0 will read the block from the buffer stream and send it to the last processor
1096 840 : const unsigned int n_SCALAR_dofs = this->variable(var).type().order.get_order();
1097 840 : std::vector<Number> input_buffer(n_SCALAR_dofs);
1098 864 : if (this->processor_id() == 0)
1099 132 : io.data_stream(input_buffer.data(), n_SCALAR_dofs);
1100 :
1101 : #ifdef LIBMESH_HAVE_MPI
1102 864 : if (this->n_processors() > 1)
1103 : {
1104 876 : const Parallel::MessageTag val_tag = this->comm().get_unique_tag();
1105 :
1106 : // Post the receive on the last processor
1107 852 : if (this->processor_id() == (this->n_processors()-1))
1108 120 : this->comm().receive(0, input_buffer, val_tag);
1109 :
1110 : // Send the data to processor 0
1111 852 : if (this->processor_id() == 0)
1112 120 : this->comm().send(this->n_processors()-1, input_buffer, val_tag);
1113 780 : }
1114 : #endif
1115 :
1116 : // Finally, set the SCALAR values
1117 864 : if (this->processor_id() == (this->n_processors()-1))
1118 : {
1119 12 : const DofMap & dof_map = this->get_dof_map();
1120 24 : std::vector<dof_id_type> SCALAR_dofs;
1121 132 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
1122 :
1123 264 : for (auto i : index_range(SCALAR_dofs))
1124 : {
1125 132 : if (vec)
1126 156 : vec->set (SCALAR_dofs[i], input_buffer[i]);
1127 132 : ++n_assigned_vals;
1128 : }
1129 : }
1130 :
1131 864 : return n_assigned_vals;
1132 : }
1133 :
1134 :
1135 : template <typename InValType>
1136 83029 : numeric_index_type System::read_serialized_vector (Xdr & io,
1137 : NumericVector<Number> * vec)
1138 : {
1139 2374 : parallel_object_only();
1140 :
1141 : #ifndef NDEBUG
1142 : // In parallel we better be reading a parallel vector -- if not
1143 : // we will not set all of its components below!!
1144 2374 : if (this->n_processors() > 1 && vec)
1145 : {
1146 2374 : libmesh_assert (vec->type() == PARALLEL ||
1147 : vec->type() == GHOSTED);
1148 : }
1149 : #endif
1150 :
1151 2374 : libmesh_assert (io.reading());
1152 :
1153 : // vector length
1154 83029 : unsigned int vector_length=0; // FIXME? size_t would break binary compatibility...
1155 : #ifndef NDEBUG
1156 2374 : std::size_t n_assigned_vals=0;
1157 : #endif
1158 :
1159 : // Get the buffer size
1160 85403 : if (this->processor_id() == 0)
1161 13053 : io.data(vector_length, "# vector length");
1162 83029 : this->comm().broadcast(vector_length);
1163 :
1164 2374 : const unsigned int nv = cast_int<unsigned int>
1165 4748 : (this->_written_var_indices.size());
1166 : const dof_id_type
1167 83029 : n_nodes = this->get_mesh().n_nodes(),
1168 83029 : n_elem = this->get_mesh().n_elem();
1169 :
1170 2374 : libmesh_assert_less_equal (nv, this->n_vars());
1171 :
1172 : // for newer versions, read variables node/elem major
1173 83029 : if (io.version() >= LIBMESH_VERSION_ID(0,7,4))
1174 : {
1175 : //---------------------------------
1176 : // Collect the values for all nodes
1177 : #ifndef NDEBUG
1178 4748 : n_assigned_vals +=
1179 : #endif
1180 163684 : this->read_serialized_blocked_dof_objects (n_nodes,
1181 83029 : this->get_mesh().local_nodes_begin(),
1182 83029 : this->get_mesh().local_nodes_end(),
1183 : InValType(),
1184 : io,
1185 78281 : std::vector<NumericVector<Number> *> (1,vec));
1186 :
1187 :
1188 : //------------------------------------
1189 : // Collect the values for all elements
1190 : #ifndef NDEBUG
1191 4748 : n_assigned_vals +=
1192 : #endif
1193 166058 : this->read_serialized_blocked_dof_objects (n_elem,
1194 83029 : this->get_mesh().local_elements_begin(),
1195 83029 : this->get_mesh().local_elements_end(),
1196 : InValType(),
1197 : io,
1198 156562 : std::vector<NumericVector<Number> *> (1,vec));
1199 : }
1200 :
1201 : // for older versions, read variables var-major
1202 : else
1203 : {
1204 : // Loop over each variable in the system, and then each node/element in the mesh.
1205 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
1206 : {
1207 0 : const unsigned int var = _written_var_indices[data_var];
1208 0 : if (this->variable(var).type().family != SCALAR)
1209 : {
1210 : //---------------------------------
1211 : // Collect the values for all nodes
1212 : #ifndef NDEBUG
1213 0 : n_assigned_vals +=
1214 : #endif
1215 0 : this->read_serialized_blocked_dof_objects (n_nodes,
1216 0 : this->get_mesh().local_nodes_begin(),
1217 0 : this->get_mesh().local_nodes_end(),
1218 : InValType(),
1219 : io,
1220 0 : std::vector<NumericVector<Number> *> (1,vec),
1221 : var);
1222 :
1223 :
1224 : //------------------------------------
1225 : // Collect the values for all elements
1226 : #ifndef NDEBUG
1227 0 : n_assigned_vals +=
1228 : #endif
1229 0 : this->read_serialized_blocked_dof_objects (n_elem,
1230 0 : this->get_mesh().local_elements_begin(),
1231 0 : this->get_mesh().local_elements_end(),
1232 : InValType(),
1233 : io,
1234 0 : std::vector<NumericVector<Number> *> (1,vec),
1235 : var);
1236 : } // end variable loop
1237 : }
1238 : }
1239 :
1240 : //-------------------------------------------
1241 : // Finally loop over all the SCALAR variables
1242 167040 : for (unsigned int data_var=0; data_var<nv; data_var++)
1243 : {
1244 86413 : const unsigned int var = _written_var_indices[data_var];
1245 84011 : if (this->variable(var).type().family == SCALAR)
1246 : {
1247 : #ifndef NDEBUG
1248 24 : n_assigned_vals +=
1249 : #endif
1250 840 : this->read_SCALAR_dofs (var, io, vec);
1251 : }
1252 : }
1253 :
1254 83029 : if (vec)
1255 83029 : vec->close();
1256 :
1257 : #ifndef NDEBUG
1258 2374 : this->comm().sum (n_assigned_vals);
1259 2374 : libmesh_assert_equal_to (n_assigned_vals, vector_length);
1260 : #endif
1261 :
1262 83029 : return vector_length;
1263 : }
1264 :
1265 :
1266 :
1267 1379 : void System::write_header (Xdr & io,
1268 : std::string_view /* version is currently unused */,
1269 : const bool write_additional_data) const
1270 : {
1271 : /**
1272 : * This method implements the output of a
1273 : * System object, embedded in the output of
1274 : * an EquationSystems<T_sys>. This warrants some
1275 : * documentation. The output of this part
1276 : * consists of 5 sections:
1277 : *
1278 : * for this system
1279 : *
1280 : * 5.) The number of variables in the system (unsigned int)
1281 : *
1282 : * for each variable in the system
1283 : *
1284 : * 6.) The name of the variable (string)
1285 : *
1286 : * 6.1.) subdomain where the variable lives
1287 : *
1288 : * 7.) Combined in an FEType:
1289 : * - The approximation order(s) of the variable
1290 : * (Order Enum, cast to int/s)
1291 : * - The finite element family/ies of the variable
1292 : * (FEFamily Enum, cast to int/s)
1293 : *
1294 : * end variable loop
1295 : *
1296 : * 8.) The number of additional vectors (unsigned int),
1297 : *
1298 : * for each additional vector in the system object
1299 : *
1300 : * 9.) the name of the additional vector (string)
1301 : *
1302 : * end system
1303 : */
1304 126 : libmesh_assert (io.writing());
1305 :
1306 :
1307 : // Only write the header information
1308 : // if we are processor 0.
1309 1505 : if (this->get_mesh().processor_id() != 0)
1310 0 : return;
1311 :
1312 252 : std::string comment;
1313 :
1314 : // 5.)
1315 : // Write the number of variables in the system
1316 :
1317 : {
1318 : // set up the comment
1319 126 : comment = "# No. of Variables in System \"";
1320 126 : comment += this->name();
1321 126 : comment += "\"";
1322 :
1323 1379 : unsigned int nv = this->n_vars();
1324 1379 : io.data (nv, comment);
1325 : }
1326 :
1327 :
1328 2830 : for (auto var : make_range(this->n_vars()))
1329 : {
1330 : // 6.)
1331 : // Write the name of the var-th variable
1332 : {
1333 : // set up the comment
1334 132 : comment = "# Name, Variable No. ";
1335 2638 : comment += std::to_string(var);
1336 132 : comment += ", System \"";
1337 132 : comment += this->name();
1338 132 : comment += "\"";
1339 :
1340 1451 : std::string var_name = this->variable_name(var);
1341 1451 : io.data (var_name, comment);
1342 : }
1343 :
1344 : // 6.1.) Variable subdomains
1345 : {
1346 : // set up the comment
1347 132 : comment = "# Subdomains, Variable \"";
1348 132 : comment += this->variable_name(var);
1349 132 : comment += "\", System \"";
1350 132 : comment += this->name();
1351 132 : comment += "\"";
1352 :
1353 132 : const std::set<subdomain_id_type> & domains = this->variable(var).active_subdomains();
1354 264 : std::vector<subdomain_id_type> domain_array;
1355 1319 : domain_array.assign(domains.begin(), domains.end());
1356 1451 : io.data (domain_array, comment);
1357 : }
1358 :
1359 : // 7.)
1360 : // Write the approximation order of the var-th variable
1361 : // in this system
1362 : {
1363 : // set up the comment
1364 132 : comment = "# Approximation Order, Variable \"";
1365 132 : comment += this->variable_name(var);
1366 132 : comment += "\", System \"";
1367 132 : comment += this->name();
1368 132 : comment += "\"";
1369 :
1370 1451 : int order = static_cast<int>(this->variable_type(var).order);
1371 1451 : io.data (order, comment);
1372 : }
1373 :
1374 :
1375 : #ifdef LIBMESH_ENABLE_INFINITE_ELEMENTS
1376 :
1377 : // do the same for radial_order
1378 : {
1379 132 : comment = "# Radial Approximation Order, Variable \"";
1380 132 : comment += this->variable_name(var);
1381 132 : comment += "\", System \"";
1382 132 : comment += this->name();
1383 132 : comment += "\"";
1384 :
1385 280 : int rad_order = static_cast<int>(this->variable_type(var).radial_order);
1386 280 : io.data (rad_order, comment);
1387 : }
1388 :
1389 : #endif
1390 :
1391 : // Write the Finite Element type of the var-th variable
1392 : // in this System
1393 : {
1394 : // set up the comment
1395 132 : comment = "# FE Family, Variable \"";
1396 132 : comment += this->variable_name(var);
1397 132 : comment += "\", System \"";
1398 132 : comment += this->name();
1399 132 : comment += "\"";
1400 :
1401 132 : const FEType & type = this->variable_type(var);
1402 1451 : int fam = static_cast<int>(type.family);
1403 1451 : io.data (fam, comment);
1404 :
1405 : #ifdef LIBMESH_ENABLE_INFINITE_ELEMENTS
1406 :
1407 132 : comment = "# Radial FE Family, Variable \"";
1408 132 : comment += this->variable_name(var);
1409 132 : comment += "\", System \"";
1410 132 : comment += this->name();
1411 132 : comment += "\"";
1412 :
1413 280 : int radial_fam = static_cast<int>(type.radial_family);
1414 280 : io.data (radial_fam, comment);
1415 :
1416 132 : comment = "# Infinite Mapping Type, Variable \"";
1417 132 : comment += this->variable_name(var);
1418 132 : comment += "\", System \"";
1419 132 : comment += this->name();
1420 132 : comment += "\"";
1421 :
1422 280 : int i_map = static_cast<int>(type.inf_map);
1423 280 : io.data (i_map, comment);
1424 : #endif
1425 : }
1426 : } // end of the variable loop
1427 :
1428 : // 8.)
1429 : // Write the number of additional vectors in the System.
1430 : // If write_additional_data==false, then write zero for
1431 : // the number of additional vectors.
1432 : {
1433 : {
1434 : // set up the comment
1435 126 : comment = "# No. of Additional Vectors, System \"";
1436 126 : comment += this->name();
1437 126 : comment += "\"";
1438 :
1439 2539 : unsigned int nvecs = write_additional_data ? this->n_vectors () : 0;
1440 1379 : io.data (nvecs, comment);
1441 : }
1442 :
1443 1379 : if (write_additional_data)
1444 : {
1445 116 : unsigned int cnt=0;
1446 9768 : for (const auto & [vec_name, vec] : _vectors)
1447 : {
1448 : // 9.)
1449 : // write the name of the cnt-th additional vector
1450 9264 : const std::string dth_vector = std::to_string(cnt++)+"th vector";
1451 9264 : comment = "# Name of " + dth_vector;
1452 8492 : std::string nonconst_vec_name = vec_name; // Stupid XDR API
1453 :
1454 8492 : io.data (nonconst_vec_name, comment);
1455 8492 : int vec_projection = _vector_projections.at(vec_name);
1456 16212 : comment = "# Whether to do projections for " + dth_vector;
1457 8492 : io.data (vec_projection, comment);
1458 8492 : int vec_type = vec->type();
1459 16212 : comment = "# Parallel type of " + dth_vector;
1460 8492 : io.data (vec_type, comment);
1461 : }
1462 : }
1463 : }
1464 : }
1465 :
1466 :
1467 :
1468 0 : void System::write_parallel_data (Xdr & io,
1469 : const bool write_additional_data) const
1470 : {
1471 : /**
1472 : * This method implements the output of the vectors
1473 : * contained in this System object, embedded in the
1474 : * output of an EquationSystems<T_sys>.
1475 : *
1476 : * 9.) The global solution vector, re-ordered to be node-major
1477 : * (More on this later.)
1478 : *
1479 : * for each additional vector in the object
1480 : *
1481 : * 10.) The global additional vector, re-ordered to be
1482 : * node-major (More on this later.)
1483 : *
1484 : * Note that the actual IO is handled through the Xdr class
1485 : * (to be renamed later?) which provides a uniform interface to
1486 : * both the XDR (eXternal Data Representation) interface and standard
1487 : * ASCII output. Thus this one section of code will read XDR or ASCII
1488 : * files with no changes.
1489 : */
1490 : // PerfLog pl("IO Performance",false);
1491 : // pl.push("write_parallel_data");
1492 : // std::size_t total_written_size = 0;
1493 :
1494 0 : std::string comment;
1495 :
1496 0 : libmesh_assert (io.writing());
1497 :
1498 0 : std::vector<Number> io_buffer; io_buffer.reserve(this->solution->local_size());
1499 :
1500 : // build the ordered nodes and element maps.
1501 : // when writing/reading parallel files we need to iterate
1502 : // over our nodes/elements in order of increasing global id().
1503 : // however, this is not guaranteed to be ordering we obtain
1504 : // by using the node_iterators/element_iterators directly.
1505 : // so build a set, sorted by id(), that provides the ordering.
1506 : // further, for memory economy build the set but then transfer
1507 : // its contents to vectors, which will be sorted.
1508 0 : std::vector<const DofObject *> ordered_nodes, ordered_elements;
1509 : {
1510 : std::set<const DofObject *, CompareDofObjectsByID>
1511 0 : ordered_nodes_set (this->get_mesh().local_nodes_begin(),
1512 0 : this->get_mesh().local_nodes_end());
1513 :
1514 0 : ordered_nodes.insert(ordered_nodes.end(),
1515 : ordered_nodes_set.begin(),
1516 0 : ordered_nodes_set.end());
1517 : }
1518 : {
1519 : std::set<const DofObject *, CompareDofObjectsByID>
1520 0 : ordered_elements_set (this->get_mesh().local_elements_begin(),
1521 0 : this->get_mesh().local_elements_end());
1522 :
1523 0 : ordered_elements.insert(ordered_elements.end(),
1524 : ordered_elements_set.begin(),
1525 0 : ordered_elements_set.end());
1526 : }
1527 :
1528 0 : const unsigned int sys_num = this->number();
1529 0 : const unsigned int nv = this->n_vars();
1530 :
1531 : // Loop over each non-SCALAR variable and each node, and write out the value.
1532 0 : for (unsigned int var=0; var<nv; var++)
1533 0 : if (this->variable(var).type().family != SCALAR)
1534 : {
1535 : // First write the node DOF values
1536 0 : for (const auto & node : ordered_nodes)
1537 0 : for (auto comp : make_range(node->n_comp(sys_num,var)))
1538 : {
1539 0 : libmesh_assert_not_equal_to (node->dof_number(sys_num, var, comp),
1540 : DofObject::invalid_id);
1541 :
1542 0 : io_buffer.push_back((*this->solution)(node->dof_number(sys_num, var, comp)));
1543 : }
1544 :
1545 : // Then write the element DOF values
1546 0 : for (const auto & elem : ordered_elements)
1547 0 : for (auto comp : make_range(elem->n_comp(sys_num,var)))
1548 : {
1549 0 : libmesh_assert_not_equal_to (elem->dof_number(sys_num, var, comp),
1550 : DofObject::invalid_id);
1551 :
1552 0 : io_buffer.push_back((*this->solution)(elem->dof_number(sys_num, var, comp)));
1553 : }
1554 : }
1555 :
1556 : // Finally, write the SCALAR data on the last processor
1557 0 : for (auto var : make_range(this->n_vars()))
1558 0 : if (this->variable(var).type().family == SCALAR)
1559 : {
1560 0 : if (this->processor_id() == (this->n_processors()-1))
1561 : {
1562 0 : const DofMap & dof_map = this->get_dof_map();
1563 0 : std::vector<dof_id_type> SCALAR_dofs;
1564 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
1565 :
1566 0 : for (auto dof : SCALAR_dofs)
1567 0 : io_buffer.push_back((*this->solution)(dof));
1568 : }
1569 : }
1570 :
1571 : // 9.)
1572 : //
1573 : // Actually write the reordered solution vector
1574 : // for the ith system to disk
1575 :
1576 : // set up the comment
1577 : {
1578 0 : comment = "# System \"";
1579 0 : comment += this->name();
1580 0 : comment += "\" Solution Vector";
1581 : }
1582 :
1583 0 : io.data (io_buffer, comment);
1584 :
1585 : // total_written_size += io_buffer.size();
1586 :
1587 : // Only write additional vectors if wanted
1588 0 : if (write_additional_data)
1589 : {
1590 0 : for (auto & [vec_name, vec] : _vectors)
1591 : {
1592 0 : io_buffer.clear();
1593 0 : io_buffer.reserve(vec->local_size());
1594 :
1595 : // Loop over each non-SCALAR variable and each node, and write out the value.
1596 0 : for (unsigned int var=0; var<nv; var++)
1597 0 : if (this->variable(var).type().family != SCALAR)
1598 : {
1599 : // First write the node DOF values
1600 0 : for (const auto & node : ordered_nodes)
1601 0 : for (auto comp : make_range(node->n_comp(sys_num,var)))
1602 : {
1603 0 : libmesh_assert_not_equal_to (node->dof_number(sys_num, var, comp),
1604 : DofObject::invalid_id);
1605 :
1606 0 : io_buffer.push_back((*vec)(node->dof_number(sys_num, var, comp)));
1607 : }
1608 :
1609 : // Then write the element DOF values
1610 0 : for (const auto & elem : ordered_elements)
1611 0 : for (auto comp : make_range(elem->n_comp(sys_num,var)))
1612 : {
1613 0 : libmesh_assert_not_equal_to (elem->dof_number(sys_num, var, comp),
1614 : DofObject::invalid_id);
1615 :
1616 0 : io_buffer.push_back((*vec)(elem->dof_number(sys_num, var, comp)));
1617 : }
1618 : }
1619 :
1620 : // Finally, write the SCALAR data on the last processor
1621 0 : for (auto var : make_range(this->n_vars()))
1622 0 : if (this->variable(var).type().family == SCALAR)
1623 : {
1624 0 : if (this->processor_id() == (this->n_processors()-1))
1625 : {
1626 0 : const DofMap & dof_map = this->get_dof_map();
1627 0 : std::vector<dof_id_type> SCALAR_dofs;
1628 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
1629 :
1630 0 : for (auto dof : SCALAR_dofs)
1631 0 : io_buffer.push_back((*vec)(dof));
1632 : }
1633 : }
1634 :
1635 : // 10.)
1636 : //
1637 : // Actually write the reordered additional vector
1638 : // for this system to disk
1639 :
1640 : // set up the comment
1641 : {
1642 0 : comment = "# System \"";
1643 0 : comment += this->name();
1644 0 : comment += "\" Additional Vector \"";
1645 0 : comment += vec_name;
1646 0 : comment += "\"";
1647 : }
1648 :
1649 0 : io.data (io_buffer, comment);
1650 :
1651 : // total_written_size += io_buffer.size();
1652 : }
1653 : }
1654 :
1655 : // const Real
1656 : // dt = pl.get_elapsed_time(),
1657 : // rate = total_written_size*sizeof(Number)/dt;
1658 :
1659 : // libMesh::err << "Write " << total_written_size << " \"Number\" values\n"
1660 : // << " Elapsed time = " << dt << '\n'
1661 : // << " Rate = " << rate/1.e6 << "(MB/sec)\n\n";
1662 :
1663 : // pl.pop("write_parallel_data");
1664 0 : }
1665 :
1666 :
1667 :
1668 8415 : void System::write_serialized_data (Xdr & io,
1669 : const bool write_additional_data) const
1670 : {
1671 : /**
1672 : * This method implements the output of the vectors
1673 : * contained in this System object, embedded in the
1674 : * output of an EquationSystems<T_sys>.
1675 : *
1676 : * 9.) The global solution vector, re-ordered to be node-major
1677 : * (More on this later.)
1678 : *
1679 : * for each additional vector in the object
1680 : *
1681 : * 10.) The global additional vector, re-ordered to be
1682 : * node-major (More on this later.)
1683 : */
1684 244 : parallel_object_only();
1685 488 : std::string comment;
1686 :
1687 : // PerfLog pl("IO Performance",false);
1688 : // pl.push("write_serialized_data");
1689 : // std::size_t total_written_size = 0;
1690 :
1691 : // total_written_size +=
1692 8415 : this->write_serialized_vector(io, *this->solution);
1693 :
1694 : // set up the comment
1695 8659 : if (this->processor_id() == 0)
1696 : {
1697 122 : comment = "# System \"";
1698 122 : comment += this->name();
1699 122 : comment += "\" Solution Vector";
1700 :
1701 1331 : io.comment (comment);
1702 : }
1703 :
1704 : // Only write additional vectors if wanted
1705 8415 : if (write_additional_data)
1706 : {
1707 62160 : for (auto & pair : this->_vectors)
1708 : {
1709 : // total_written_size +=
1710 54040 : this->write_serialized_vector(io, *pair.second);
1711 :
1712 : // set up the comment
1713 55584 : if (this->processor_id() == 0)
1714 : {
1715 772 : comment = "# System \"";
1716 772 : comment += this->name();
1717 772 : comment += "\" Additional Vector \"";
1718 772 : comment += pair.first;
1719 772 : comment += "\"";
1720 8492 : io.comment (comment);
1721 : }
1722 : }
1723 : }
1724 :
1725 : // const Real
1726 : // dt = pl.get_elapsed_time(),
1727 : // rate = total_written_size*sizeof(Number)/dt;
1728 :
1729 : // libMesh::out << "Write " << total_written_size << " \"Number\" values\n"
1730 : // << " Elapsed time = " << dt << '\n'
1731 : // << " Rate = " << rate/1.e6 << "(MB/sec)\n\n";
1732 :
1733 : // pl.pop("write_serialized_data");
1734 :
1735 :
1736 :
1737 :
1738 : // // test the new method
1739 : // {
1740 : // std::vector<std::string> names;
1741 : // std::vector<NumericVector<Number> *> vectors_to_write;
1742 :
1743 : // names.push_back("Solution Vector");
1744 : // vectors_to_write.push_back(this->solution.get());
1745 :
1746 : // // Only write additional vectors if wanted
1747 : // if (write_additional_data)
1748 : // {
1749 : // std::map<std::string, NumericVector<Number> *>::const_iterator
1750 : // pos = _vectors.begin();
1751 :
1752 : // for (; pos != this->_vectors.end(); ++pos)
1753 : // {
1754 : // names.push_back("Additional Vector " + pos->first);
1755 : // vectors_to_write.push_back(pos->second);
1756 : // }
1757 : // }
1758 :
1759 : // total_written_size =
1760 : // this->write_serialized_vectors (io, names, vectors_to_write);
1761 :
1762 : // const Real
1763 : // dt2 = pl.get_elapsed_time(),
1764 : // rate2 = total_written_size*sizeof(Number)/(dt2-dt);
1765 :
1766 : // libMesh::out << "Write (new) " << total_written_size << " \"Number\" values\n"
1767 : // << " Elapsed time = " << (dt2-dt) << '\n'
1768 : // << " Rate = " << rate2/1.e6 << "(MB/sec)\n\n";
1769 :
1770 : // }
1771 8415 : }
1772 :
1773 :
1774 :
1775 : template <typename iterator_type>
1776 125478 : std::size_t System::write_serialized_blocked_dof_objects (const std::vector<const NumericVector<Number> *> & vecs,
1777 : const dof_id_type n_objs,
1778 : const iterator_type begin,
1779 : const iterator_type end,
1780 : Xdr & io,
1781 : const unsigned int var_to_write) const
1782 : {
1783 3592 : parallel_object_only();
1784 :
1785 : //-------------------------------------------------------
1786 : // General order: (IO format 0.7.4 & greater)
1787 : //
1788 : // for (objects ...)
1789 : // for (vecs ....)
1790 : // for (vars ....)
1791 : // for (comps ...)
1792 : //
1793 : // where objects are nodes or elements, sorted to be
1794 : // partition independent,
1795 : // vecs are one or more *identically distributed* solution
1796 : // coefficient vectors, vars are one or more variables
1797 : // to write, and comps are all the components for said
1798 : // vars on the object.
1799 :
1800 : // We will write all variables unless requested otherwise.
1801 129070 : std::vector<unsigned int> vars_to_write(1, var_to_write);
1802 :
1803 125478 : if (var_to_write == libMesh::invalid_uint)
1804 : {
1805 125478 : vars_to_write.clear(); /**/ vars_to_write.reserve(this->n_vars());
1806 251808 : for (auto var : make_range(this->n_vars()))
1807 126330 : vars_to_write.push_back(var);
1808 : }
1809 :
1810 : const dof_id_type io_blksize = cast_int<dof_id_type>
1811 247364 : (std::min(max_io_blksize, static_cast<std::size_t>(n_objs)));
1812 :
1813 : const unsigned int
1814 7184 : sys_num = this->number(),
1815 7184 : num_vecs = cast_int<unsigned int>(vecs.size()),
1816 125478 : num_blks = cast_int<unsigned int>(std::ceil(static_cast<double>(n_objs)/
1817 118294 : static_cast<double>(io_blksize)));
1818 :
1819 : // libMesh::out << "io_blksize = " << io_blksize
1820 : // << ", num_objects = " << n_objs
1821 : // << ", num_blks = " << num_blks
1822 : // << std::endl;
1823 :
1824 125478 : std::size_t written_length=0; // The numer of values written. This will be returned
1825 132662 : std::vector<std::vector<dof_id_type>> xfer_ids(num_blks); // The global IDs and # of components for the local objects in all blocks
1826 132662 : std::vector<std::vector<Number>> send_vals(num_blks); // The raw values for the local objects in all blocks
1827 : std::vector<Parallel::Request>
1828 136254 : id_requests(num_blks), val_requests(num_blks); // send request handle for each block
1829 : std::vector<Parallel::MessageTag>
1830 136254 : id_tags(num_blks), val_tags(num_blks); // tag number for each block
1831 :
1832 : // ------------------------------------------------------
1833 : // First pass - count the number of objects in each block
1834 : // traverse all the objects and figure out which block they
1835 : // will ultimately live in.
1836 : std::vector<unsigned int>
1837 129070 : xfer_ids_size (num_blks,0),
1838 125478 : send_vals_size (num_blks,0);
1839 :
1840 33631687 : for (iterator_type it=begin; it!=end; ++it)
1841 : {
1842 : const dof_id_type
1843 33506209 : id = (*it)->id(),
1844 33506209 : block = id/io_blksize;
1845 :
1846 3047225 : libmesh_assert_less (block, num_blks);
1847 :
1848 36553434 : xfer_ids_size[block] += 2; // for each object, we store its id, as well as the total number of components for all variables
1849 :
1850 3047225 : unsigned int n_comp_tot=0;
1851 :
1852 67179242 : for (const auto & var : vars_to_write)
1853 33673033 : n_comp_tot += (*it)->n_comp(sys_num, var); // for each variable, we will store the nonzero components
1854 :
1855 36553434 : send_vals_size[block] += n_comp_tot*num_vecs;
1856 : }
1857 :
1858 : //-----------------------------------------
1859 : // Collect the values for all local objects,
1860 : // binning them into 'blocks' that will be
1861 : // sent to processor 0
1862 250956 : for (unsigned int blk=0; blk<num_blks; blk++)
1863 : {
1864 : // libMesh::out << "Writing object block " << blk << std::endl;
1865 :
1866 : // Each processor should build up its transfer buffers for its
1867 : // local objects in [first_object,last_object).
1868 : const dof_id_type
1869 125478 : first_object = blk*io_blksize,
1870 125478 : last_object = std::min(cast_int<dof_id_type>((blk+1)*io_blksize), n_objs);
1871 :
1872 : // convenience
1873 7204 : std::vector<dof_id_type> & ids (xfer_ids[blk]);
1874 7184 : std::vector<Number> & vals (send_vals[blk]);
1875 :
1876 : // we now know the number of values we will store for each block,
1877 : // so we can do efficient preallocation
1878 129070 : ids.clear(); /**/ ids.reserve (xfer_ids_size[blk]);
1879 129070 : vals.clear(); /**/ vals.reserve (send_vals_size[blk]);
1880 :
1881 129070 : if (send_vals_size[blk] != 0) // only send if we have nonzero components to write
1882 26751537 : for (iterator_type it=begin; it!=end; ++it)
1883 28045253 : if (((*it)->id() >= first_object) && // object in [first_object,last_object)
1884 14690513 : ((*it)->id() < last_object))
1885 : {
1886 14690513 : ids.push_back((*it)->id());
1887 :
1888 : // count the total number of nonzeros transferred for this object
1889 : {
1890 1335773 : unsigned int n_comp_tot=0;
1891 :
1892 29533450 : for (const auto & var : vars_to_write)
1893 14842937 : n_comp_tot += (*it)->n_comp(sys_num, var);
1894 :
1895 14690513 : ids.push_back (n_comp_tot*num_vecs); // even if 0 - processor 0 has no way of knowing otherwise...
1896 : }
1897 :
1898 : // pack the values to send
1899 31175438 : for (const auto & vec : vecs)
1900 35235714 : for (const auto & var : vars_to_write)
1901 : {
1902 18750789 : const unsigned int n_comp = (*it)->n_comp(sys_num, var);
1903 :
1904 34809222 : for (unsigned int comp=0; comp<n_comp; comp++)
1905 : {
1906 1451134 : libmesh_assert_greater_equal ((*it)->dof_number(sys_num, var, comp), vec->first_local_index());
1907 1451134 : libmesh_assert_less ((*it)->dof_number(sys_num, var, comp), vec->last_local_index());
1908 16173952 : vals.push_back((*vec)((*it)->dof_number(sys_num, var, comp)));
1909 : }
1910 : }
1911 : }
1912 :
1913 : #ifdef LIBMESH_HAVE_MPI
1914 125478 : id_tags[blk] = this->comm().get_unique_tag(100*num_blks + blk);
1915 125478 : val_tags[blk] = this->comm().get_unique_tag(200*num_blks + blk);
1916 :
1917 : // nonblocking send the data for this block
1918 129070 : this->comm().send (0, ids, id_requests[blk], id_tags[blk]);
1919 129070 : this->comm().send (0, vals, val_requests[blk], val_tags[blk]);
1920 : #endif
1921 : }
1922 :
1923 :
1924 129070 : if (this->processor_id() == 0)
1925 : {
1926 23334 : std::vector<std::vector<dof_id_type>> recv_ids (this->n_processors());
1927 25130 : std::vector<std::vector<Number>> recv_vals (this->n_processors());
1928 3592 : std::vector<unsigned int> obj_val_offsets; // map to traverse entry-wise rather than processor-wise
1929 3592 : std::vector<Number> output_vals; // The output buffer for the current block
1930 :
1931 : // a ThreadedIO object to perform asynchronous file IO
1932 1796 : ThreadedIO<Number> threaded_io(io, output_vals);
1933 17946 : std::unique_ptr<Threads::Thread> async_io;
1934 :
1935 39484 : for (unsigned int blk=0; blk<num_blks; blk++)
1936 : {
1937 : // Each processor should build up its transfer buffers for its
1938 : // local objects in [first_object,last_object).
1939 : const dof_id_type
1940 19742 : first_object = cast_int<dof_id_type>(blk*io_blksize),
1941 19742 : last_object = std::min(cast_int<dof_id_type>((blk+1)*io_blksize), n_objs),
1942 19742 : n_objects_blk = last_object - first_object;
1943 :
1944 : // offset array. this will define where each object's values
1945 : // map into the actual output_vals buffer. this must get
1946 : // 0-initialized because 0-component objects are not actually sent
1947 19742 : obj_val_offsets.resize (n_objects_blk); /**/ std::fill (obj_val_offsets.begin(), obj_val_offsets.end(), 0);
1948 :
1949 1796 : std::size_t n_val_recvd_blk=0;
1950 :
1951 : // receive this block of data from all processors.
1952 145220 : for (processor_id_type comm_step=0, tnp=this->n_processors(); comm_step != tnp; ++comm_step)
1953 : {
1954 : #ifdef LIBMESH_HAVE_MPI
1955 : // blocking receive indices for this block, imposing no particular order on processor
1956 129070 : Parallel::Status id_status (this->comm().probe (Parallel::any_source, id_tags[blk]));
1957 125478 : std::vector<dof_id_type> & ids (recv_ids[id_status.source()]);
1958 129070 : this->comm().receive (id_status.source(), ids, id_tags[blk]);
1959 : #else
1960 : std::vector<dof_id_type> & ids (recv_ids[0]);
1961 : ids = xfer_ids[blk];
1962 : #endif
1963 :
1964 : // note its possible we didn't receive values for objects in
1965 : // this block if they have no components allocated.
1966 14815991 : for (std::size_t idx=0, sz=ids.size(); idx<sz; idx+=2)
1967 : {
1968 : const dof_id_type
1969 14690513 : local_idx = ids[idx+0]-first_object,
1970 14690513 : n_vals_tot_allvecs = ids[idx+1];
1971 :
1972 1335773 : libmesh_assert_less (local_idx, n_objects_blk);
1973 1335773 : libmesh_assert_less (local_idx, obj_val_offsets.size());
1974 :
1975 16026286 : obj_val_offsets[local_idx] = n_vals_tot_allvecs;
1976 : }
1977 :
1978 : #ifdef LIBMESH_HAVE_MPI
1979 : // blocking receive values for this block, imposing no particular order on processor
1980 129070 : Parallel::Status val_status (this->comm().probe (Parallel::any_source, val_tags[blk]));
1981 125478 : std::vector<Number> & vals (recv_vals[val_status.source()]);
1982 129070 : this->comm().receive (val_status.source(), vals, val_tags[blk]);
1983 : #else
1984 : // straight copy without MPI
1985 : std::vector<Number> & vals (recv_vals[0]);
1986 : vals = send_vals[blk];
1987 : #endif
1988 :
1989 129070 : n_val_recvd_blk += vals.size();
1990 : }
1991 :
1992 : // We need the offsets into the output_vals vector for each object.
1993 : // fortunately, this is simply the partial sum of the total number
1994 : // of components for each object
1995 17946 : std::partial_sum(obj_val_offsets.begin(), obj_val_offsets.end(),
1996 : obj_val_offsets.begin());
1997 :
1998 : // wait on any previous asynchronous IO - this *must* complete before
1999 : // we start messing with the output_vals buffer!
2000 19742 : if (async_io.get()) async_io->join();
2001 :
2002 : // this is the actual output buffer that will be written to disk.
2003 : // at ths point we finally know wha size it will be.
2004 19742 : output_vals.resize(n_val_recvd_blk);
2005 :
2006 : // pack data from all processors into output values
2007 145220 : for (auto proc : make_range(this->n_processors()))
2008 : {
2009 125478 : const std::vector<dof_id_type> & ids (recv_ids [proc]);
2010 7184 : const std::vector<Number> & vals(recv_vals[proc]);
2011 7184 : std::vector<Number>::const_iterator proc_vals(vals.begin());
2012 :
2013 14815991 : for (std::size_t idx=0, sz=ids.size(); idx<sz; idx+=2)
2014 : {
2015 : const dof_id_type
2016 14690513 : local_idx = ids[idx+0]-first_object,
2017 16026286 : n_vals_tot_allvecs = ids[idx+1];
2018 :
2019 : // put this object's data into the proper location
2020 : // in the output buffer
2021 13354740 : std::vector<Number>::iterator out_vals(output_vals.begin());
2022 14690513 : if (local_idx != 0)
2023 16015504 : std::advance (out_vals, obj_val_offsets[local_idx-1]);
2024 :
2025 30748946 : for (unsigned int val=0; val<n_vals_tot_allvecs; val++, ++out_vals, ++proc_vals)
2026 : {
2027 1451134 : libmesh_assert (out_vals != output_vals.end());
2028 1451134 : libmesh_assert (proc_vals != vals.end());
2029 16058433 : *out_vals = *proc_vals;
2030 : }
2031 : }
2032 : }
2033 :
2034 : // output_vals buffer is now filled for this block.
2035 : // write it to disk
2036 35892 : async_io = std::make_unique<Threads::Thread>(threaded_io);
2037 21538 : written_length += output_vals.size();
2038 : }
2039 :
2040 : // wait on any previous asynchronous IO - this *must* complete before
2041 : // our stuff goes out of scope
2042 19742 : async_io->join();
2043 32300 : }
2044 :
2045 125478 : Parallel::wait(id_requests);
2046 125478 : Parallel::wait(val_requests);
2047 :
2048 : // we need some synchronization here. Because this method
2049 : // can be called for a range of nodes, then a range of elements,
2050 : // we need some mechanism to prevent processors from racing past
2051 : // to the next range and overtaking ongoing communication. one
2052 : // approach would be to figure out unique tags for each range,
2053 : // but for now we just impose a barrier here. And might as
2054 : // well have it do some useful work.
2055 125478 : this->comm().broadcast(written_length);
2056 :
2057 250956 : return written_length;
2058 118294 : }
2059 :
2060 :
2061 :
2062 0 : unsigned int System::write_SCALAR_dofs (const NumericVector<Number> & vec,
2063 : const unsigned int var,
2064 : Xdr & io) const
2065 : {
2066 0 : unsigned int written_length=0;
2067 0 : std::vector<Number> vals; // The raw values for the local objects in the current block
2068 : // Collect the SCALARs for the current variable
2069 0 : if (this->processor_id() == (this->n_processors()-1))
2070 : {
2071 0 : const DofMap & dof_map = this->get_dof_map();
2072 0 : std::vector<dof_id_type> SCALAR_dofs;
2073 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
2074 : const unsigned int n_scalar_dofs = cast_int<unsigned int>
2075 0 : (SCALAR_dofs.size());
2076 :
2077 0 : for (unsigned int i=0; i<n_scalar_dofs; i++)
2078 : {
2079 0 : vals.push_back( vec(SCALAR_dofs[i]) );
2080 : }
2081 : }
2082 :
2083 : #ifdef LIBMESH_HAVE_MPI
2084 0 : if (this->n_processors() > 1)
2085 : {
2086 : const Parallel::MessageTag val_tag =
2087 0 : this->comm().get_unique_tag(1);
2088 :
2089 : // Post the receive on processor 0
2090 0 : if (this->processor_id() == 0)
2091 : {
2092 0 : this->comm().receive(this->n_processors()-1, vals, val_tag);
2093 : }
2094 :
2095 : // Send the data to processor 0
2096 0 : if (this->processor_id() == (this->n_processors()-1))
2097 : {
2098 0 : this->comm().send(0, vals, val_tag);
2099 : }
2100 0 : }
2101 : #endif
2102 :
2103 : // -------------------------------------------------------
2104 : // Write the output on processor 0.
2105 0 : if (this->processor_id() == 0)
2106 : {
2107 : const unsigned int vals_size =
2108 0 : cast_int<unsigned int>(vals.size());
2109 0 : io.data_stream (vals.data(), vals_size);
2110 0 : written_length += vals_size;
2111 : }
2112 :
2113 0 : return written_length;
2114 : }
2115 :
2116 :
2117 :
2118 62455 : dof_id_type System::write_serialized_vector (Xdr & io,
2119 : const NumericVector<Number> & vec) const
2120 : {
2121 1788 : parallel_object_only();
2122 :
2123 1788 : libmesh_assert (io.writing());
2124 :
2125 62455 : dof_id_type vec_length = vec.size();
2126 65137 : if (this->processor_id() == 0) io.data (vec_length, "# vector length");
2127 :
2128 1788 : dof_id_type written_length = 0;
2129 :
2130 : //---------------------------------
2131 : // Collect the values for all nodes
2132 1788 : written_length += cast_int<dof_id_type>
2133 123122 : (this->write_serialized_blocked_dof_objects (std::vector<const NumericVector<Number> *>(1,&vec),
2134 62455 : this->get_mesh().n_nodes(),
2135 124910 : this->get_mesh().local_nodes_begin(),
2136 123122 : this->get_mesh().local_nodes_end(),
2137 : io));
2138 :
2139 : //------------------------------------
2140 : // Collect the values for all elements
2141 62455 : written_length += cast_int<dof_id_type>
2142 123122 : (this->write_serialized_blocked_dof_objects (std::vector<const NumericVector<Number> *>(1,&vec),
2143 62455 : this->get_mesh().n_elem(),
2144 124910 : this->get_mesh().local_elements_begin(),
2145 124910 : this->get_mesh().local_elements_end(),
2146 : io));
2147 :
2148 : //-------------------------------------------
2149 : // Finally loop over all the SCALAR variables
2150 125052 : for (auto var : make_range(this->n_vars()))
2151 62597 : if (this->variable(var).type().family == SCALAR)
2152 : {
2153 0 : written_length +=
2154 0 : this->write_SCALAR_dofs (vec, var, io);
2155 : }
2156 :
2157 1788 : if (this->processor_id() == 0)
2158 894 : libmesh_assert_equal_to (written_length, vec_length);
2159 :
2160 62455 : return written_length;
2161 : }
2162 :
2163 :
2164 : template <typename InValType>
2165 284 : std::size_t System::read_serialized_vectors (Xdr & io,
2166 : const std::vector<NumericVector<Number> *> & vectors) const
2167 : {
2168 8 : parallel_object_only();
2169 :
2170 : // Error checking
2171 : // #ifndef NDEBUG
2172 : // // In parallel we better be reading a parallel vector -- if not
2173 : // // we will not set all of its components below!!
2174 : // if (this->n_processors() > 1)
2175 : // {
2176 : // libmesh_assert (vec.type() == PARALLEL ||
2177 : // vec.type() == GHOSTED);
2178 : // }
2179 : // #endif
2180 :
2181 8 : libmesh_assert (io.reading());
2182 :
2183 292 : if (this->processor_id() == 0)
2184 : {
2185 : // sizes
2186 48 : unsigned int num_vecs=0;
2187 48 : dof_id_type vector_length=0;
2188 :
2189 : // Get the number of vectors
2190 48 : io.data(num_vecs);
2191 : // Get the buffer size
2192 48 : io.data(vector_length);
2193 :
2194 52 : libmesh_error_msg_if
2195 : (num_vecs != vectors.size(),
2196 : "Xdr file header declares " << num_vecs << " vectors, but we were asked to read " << vectors.size());
2197 :
2198 48 : if (num_vecs != 0)
2199 : {
2200 48 : libmesh_error_msg_if (vectors[0] == nullptr, "vectors[0] should not be null");
2201 48 : libmesh_error_msg_if (vectors[0]->size() != vector_length, "Inconsistent vector sizes");
2202 : }
2203 : }
2204 :
2205 : // no need to actually communicate these.
2206 : // this->comm().broadcast(num_vecs);
2207 : // this->comm().broadcast(vector_length);
2208 :
2209 : // Cache these - they are not free!
2210 : const dof_id_type
2211 284 : n_nodes = this->get_mesh().n_nodes(),
2212 284 : n_elem = this->get_mesh().n_elem();
2213 :
2214 8 : std::size_t read_length = 0;
2215 :
2216 : //---------------------------------
2217 : // Collect the values for all nodes
2218 24 : read_length +=
2219 536 : this->read_serialized_blocked_dof_objects (n_nodes,
2220 284 : this->get_mesh().local_nodes_begin(),
2221 284 : this->get_mesh().local_nodes_end(),
2222 : InValType(),
2223 : io,
2224 : vectors);
2225 :
2226 : //------------------------------------
2227 : // Collect the values for all elements
2228 284 : read_length +=
2229 536 : this->read_serialized_blocked_dof_objects (n_elem,
2230 284 : this->get_mesh().local_elements_begin(),
2231 284 : this->get_mesh().local_elements_end(),
2232 : InValType(),
2233 : io,
2234 : vectors);
2235 :
2236 : //-------------------------------------------
2237 : // Finally loop over all the SCALAR variables
2238 5240 : for (NumericVector<Number> * vec : vectors)
2239 14172 : for (auto var : make_range(this->n_vars()))
2240 9216 : if (this->variable(var).type().family == SCALAR)
2241 : {
2242 0 : libmesh_assert_not_equal_to (vec, 0);
2243 :
2244 0 : read_length +=
2245 0 : this->read_SCALAR_dofs (var, io, vec);
2246 : }
2247 :
2248 : //---------------------------------------
2249 : // last step - must close all the vectors
2250 5240 : for (NumericVector<Number> * vec : vectors)
2251 : {
2252 140 : libmesh_assert_not_equal_to (vec, 0);
2253 4956 : vec->close();
2254 : }
2255 :
2256 284 : return read_length;
2257 : }
2258 :
2259 :
2260 :
2261 284 : std::size_t System::write_serialized_vectors (Xdr & io,
2262 : const std::vector<const NumericVector<Number> *> & vectors) const
2263 : {
2264 8 : parallel_object_only();
2265 :
2266 8 : libmesh_assert (io.writing());
2267 :
2268 : // Cache these - they are not free!
2269 : const dof_id_type
2270 284 : n_nodes = this->get_mesh().n_nodes(),
2271 284 : n_elem = this->get_mesh().n_elem();
2272 :
2273 8 : std::size_t written_length = 0;
2274 :
2275 292 : if (this->processor_id() == 0)
2276 : {
2277 : unsigned int
2278 48 : n_vec = cast_int<unsigned int>(vectors.size());
2279 : dof_id_type
2280 48 : vec_size = vectors.empty() ? 0 : vectors[0]->size();
2281 : // Set the number of vectors
2282 48 : io.data(n_vec, "# number of vectors");
2283 : // Set the buffer size
2284 48 : io.data(vec_size, "# vector length");
2285 : }
2286 :
2287 : //---------------------------------
2288 : // Collect the values for all nodes
2289 8 : written_length +=
2290 284 : this->write_serialized_blocked_dof_objects (vectors,
2291 : n_nodes,
2292 568 : this->get_mesh().local_nodes_begin(),
2293 560 : this->get_mesh().local_nodes_end(),
2294 : io);
2295 :
2296 : //------------------------------------
2297 : // Collect the values for all elements
2298 284 : written_length +=
2299 284 : this->write_serialized_blocked_dof_objects (vectors,
2300 : n_elem,
2301 568 : this->get_mesh().local_elements_begin(),
2302 560 : this->get_mesh().local_elements_end(),
2303 : io);
2304 :
2305 : //-------------------------------------------
2306 : // Finally loop over all the SCALAR variables
2307 5240 : for (const NumericVector<Number> * vec : vectors)
2308 14172 : for (auto var : make_range(this->n_vars()))
2309 9216 : if (this->variable(var).type().family == SCALAR)
2310 : {
2311 0 : libmesh_assert_not_equal_to (vec, 0);
2312 :
2313 0 : written_length +=
2314 0 : this->write_SCALAR_dofs (*vec, var, io);
2315 : }
2316 :
2317 284 : return written_length;
2318 : }
2319 :
2320 :
2321 :
2322 :
2323 : template LIBMESH_EXPORT void System::read_parallel_data<Number> (Xdr & io, const bool read_additional_data);
2324 : template LIBMESH_EXPORT void System::read_serialized_data<Number> (Xdr & io, const bool read_additional_data);
2325 : template LIBMESH_EXPORT numeric_index_type System::read_serialized_vector<Number> (Xdr & io, NumericVector<Number> * vec);
2326 : template LIBMESH_EXPORT std::size_t System::read_serialized_vectors<Number> (Xdr & io, const std::vector<NumericVector<Number> *> & vectors) const;
2327 : #ifdef LIBMESH_USE_COMPLEX_NUMBERS
2328 : template LIBMESH_EXPORT void System::read_parallel_data<Real> (Xdr & io, const bool read_additional_data);
2329 : template LIBMESH_EXPORT void System::read_serialized_data<Real> (Xdr & io, const bool read_additional_data);
2330 : template LIBMESH_EXPORT numeric_index_type System::read_serialized_vector<Real> (Xdr & io, NumericVector<Number> * vec);
2331 : template LIBMESH_EXPORT std::size_t System::read_serialized_vectors<Real> (Xdr & io, const std::vector<NumericVector<Number> *> & vectors) const;
2332 : #endif
2333 :
2334 : } // namespace libMesh
|