Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 : #include "libmesh/libmesh_common.h"
20 : #include "libmesh/parallel.h"
21 :
22 :
23 : // Local Include
24 : #include "libmesh/libmesh_version.h"
25 : #include "libmesh/system.h"
26 : #include "libmesh/mesh_base.h"
27 : #include "libmesh/elem.h"
28 : #include "libmesh/xdr_cxx.h"
29 : #include "libmesh/numeric_vector.h"
30 : #include "libmesh/dof_map.h"
31 :
32 :
33 : // C++ Includes
34 : #include <memory>
35 : #include <numeric> // for std::partial_sum
36 : #include <set>
37 :
38 :
39 : // Anonymous namespace for implementation details.
40 : namespace {
41 :
42 : using libMesh::DofObject;
43 : using libMesh::Number;
44 : using libMesh::cast_int;
45 :
46 : // Comments:
47 : // ---------
48 : // - The max_io_blksize governs how many nodes or elements will be
49 : // treated as a single block when performing parallel IO on large
50 : // systems.
51 : // - This parameter only loosely affects the size of the actual IO
52 : // buffer as this depends on the number of components a given
53 : // variable has for the nodes/elements in the block.
54 : // - When reading/writing each processor uses an ID map which is
55 : // 3*io_blksize*sizeof(dof_id_type) bytes long, so with unsigned int
56 : // and // io_blksize=256000 we would expect that buffer alone to be
57 : // ~3Mb.
58 : // - In general, an increase in max_io_blksize should increase the
59 : // efficiency of large parallel read/writes by reducing the number
60 : // of MPI messages at the expense of memory.
61 : // - If the library exhausts memory during IO you might reduce this
62 : // parameter.
63 :
64 : const std::size_t max_io_blksize = 256000;
65 :
66 : /**
67 : *
68 : */
69 : template <typename InValType>
70 : class ThreadedIO
71 : {
72 : private:
73 : libMesh::Xdr & _io;
74 : std::vector<InValType> & _data;
75 :
76 : public:
77 45944 : ThreadedIO (libMesh::Xdr & io, std::vector<InValType> & data) :
78 37588 : _io(io),
79 43562 : _data(data)
80 4178 : {}
81 :
82 41766 : void operator()()
83 : {
84 41766 : if (_data.empty()) return;
85 20905 : _io.data_stream (_data.data(), cast_int<unsigned int>(_data.size()));
86 : }
87 : };
88 : }
89 :
90 :
91 : namespace libMesh
92 : {
93 :
94 :
95 : // ------------------------------------------------------------
96 : // System class implementation
97 11213 : void System::read_header (Xdr & io,
98 : std::string_view version,
99 : const bool read_header_in,
100 : const bool read_additional_data,
101 : const bool read_legacy_format)
102 : {
103 : // This method implements the input of a
104 : // System object, embedded in the output of
105 : // an EquationSystems<T_sys>. This warrants some
106 : // documentation. The output file essentially
107 : // consists of 5 sections:
108 : //
109 : // for this system
110 : //
111 : // 5.) The number of variables in the system (unsigned int)
112 : //
113 : // for each variable in the system
114 : //
115 : // 6.) The name of the variable (string)
116 : //
117 : // 6.1.) Variable subdomains
118 : //
119 : // 7.) Combined in an FEType:
120 : // - The approximation order(s) of the variable
121 : // (Order Enum, cast to int/s)
122 : // - The finite element family/ies of the variable
123 : // (FEFamily Enum, cast to int/s)
124 : //
125 : // end variable loop
126 : //
127 : // 8.) The number of additional vectors (unsigned int),
128 : //
129 : // for each additional vector in the system object
130 : //
131 : // 9.) the name of the additional vector (string)
132 : //
133 : // end system
134 322 : libmesh_assert (io.reading());
135 :
136 : // Possibly clear data structures and start from scratch.
137 11213 : if (read_header_in)
138 849 : this->clear ();
139 :
140 : // Figure out if we need to read infinite element information.
141 : // This will be true if the version string contains " with infinite elements"
142 : const bool read_ifem_info =
143 11245 : Utility::contains(version, " with infinite elements") ||
144 11245 : libMesh::on_command_line ("--read-ifem-systems");
145 :
146 :
147 : {
148 : // 5.)
149 : // Read the number of variables in the system
150 11213 : unsigned int nv=0;
151 11535 : if (this->processor_id() == 0)
152 1771 : io.data (nv);
153 11213 : this->comm().broadcast(nv);
154 :
155 11213 : _written_var_indices.clear();
156 11213 : _written_var_indices.resize(nv, 0);
157 :
158 22852 : for (unsigned int var=0; var<nv; var++)
159 : {
160 : // 6.)
161 : // Read the name of the var-th variable
162 668 : std::string var_name;
163 11973 : if (this->processor_id() == 0)
164 1843 : io.data (var_name);
165 11639 : this->comm().broadcast(var_name);
166 :
167 : // 6.1.)
168 668 : std::set<subdomain_id_type> domains;
169 11639 : if (io.version() >= LIBMESH_VERSION_ID(0,7,2))
170 : {
171 668 : std::vector<subdomain_id_type> domain_array;
172 11973 : if (this->processor_id() == 0)
173 1843 : io.data (domain_array);
174 11639 : for (const auto & id : domain_array)
175 0 : domains.insert(id);
176 : }
177 11639 : this->comm().broadcast(domains);
178 :
179 : // 7.)
180 : // Read the approximation order(s) of the var-th variable
181 11639 : int order=0;
182 11973 : if (this->processor_id() == 0)
183 1843 : io.data (order);
184 11639 : this->comm().broadcast(order);
185 :
186 :
187 : // do the same for infinite element radial_order
188 11639 : int rad_order=0;
189 11639 : if (read_ifem_info)
190 : {
191 968 : if (this->processor_id() == 0)
192 332 : io.data(rad_order);
193 650 : this->comm().broadcast(rad_order);
194 : }
195 :
196 : // Read the finite element type of the var-th variable
197 11639 : int fam=0;
198 11973 : if (this->processor_id() == 0)
199 1843 : io.data (fam);
200 11639 : this->comm().broadcast(fam);
201 334 : FEType type;
202 11639 : type.order = static_cast<Order>(order);
203 11639 : type.family = static_cast<FEFamily>(fam);
204 :
205 : // Check for incompatibilities. The shape function indexing was
206 : // changed for the monomial and xyz finite element families to
207 : // simplify extension to arbitrary p. The consequence is that
208 : // old restart files will not be read correctly. This is expected
209 : // to be an unlikely occurrence, but catch it anyway.
210 11639 : if (read_legacy_format)
211 0 : if ((type.family == MONOMIAL || type.family == XYZ) &&
212 0 : ((type.order.get_order() > 2 && this->get_mesh().mesh_dimension() == 2) ||
213 0 : (type.order.get_order() > 1 && this->get_mesh().mesh_dimension() == 3)))
214 : {
215 0 : libmesh_here();
216 0 : libMesh::out << "*****************************************************************\n"
217 0 : << "* WARNING: reading a potentially incompatible restart file!!! *\n"
218 0 : << "* contact [email protected] for more details *\n"
219 0 : << "*****************************************************************"
220 0 : << std::endl;
221 : }
222 :
223 : // Read additional information for infinite elements
224 11639 : int radial_fam=0;
225 11639 : int i_map=0;
226 11639 : if (read_ifem_info)
227 : {
228 968 : if (this->processor_id() == 0)
229 332 : io.data (radial_fam);
230 650 : this->comm().broadcast(radial_fam);
231 968 : if (this->processor_id() == 0)
232 332 : io.data (i_map);
233 650 : this->comm().broadcast(i_map);
234 : }
235 :
236 : #ifdef LIBMESH_ENABLE_INFINITE_ELEMENTS
237 :
238 682 : type.radial_order = static_cast<Order>(rad_order);
239 682 : type.radial_family = static_cast<FEFamily>(radial_fam);
240 682 : type.inf_map = static_cast<InfMapType>(i_map);
241 :
242 : #endif
243 :
244 11639 : if (read_header_in)
245 : {
246 991 : if (domains.empty())
247 991 : _written_var_indices[var] = this->add_variable (var_name, type);
248 : else
249 0 : _written_var_indices[var] = this->add_variable (var_name, type, &domains);
250 : }
251 : else
252 10648 : _written_var_indices[var] = this->variable_number(var_name);
253 : }
254 : }
255 :
256 : // 8.)
257 : // Read the number of additional vectors.
258 11213 : unsigned int nvecs=0;
259 11535 : if (this->processor_id() == 0)
260 1771 : io.data (nvecs);
261 11213 : this->comm().broadcast(nvecs);
262 :
263 : // If nvecs > 0, this means that write_additional_data
264 : // was true when this file was written. We will need to
265 : // make use of this fact later.
266 11213 : this->_additional_data_written = nvecs;
267 :
268 83313 : for (unsigned int vec=0; vec<nvecs; vec++)
269 : {
270 : // 9.)
271 : // Read the name of the vec-th additional vector
272 4120 : std::string vec_name;
273 74160 : if (this->processor_id() == 0)
274 11330 : io.data (vec_name);
275 72100 : this->comm().broadcast(vec_name);
276 72100 : if (io.version() >= LIBMESH_VERSION_ID(1,7,0))
277 : {
278 72100 : int vec_projection = 0;
279 74160 : if (this->processor_id() == 0)
280 11330 : io.data (vec_projection);
281 72100 : this->comm().broadcast(vec_projection);
282 : int vec_type;
283 74160 : if (this->processor_id() == 0)
284 11330 : io.data (vec_type);
285 72100 : this->comm().broadcast(vec_type);
286 :
287 72100 : if (read_additional_data)
288 74160 : this->add_vector(vec_name, bool(vec_projection), ParallelType(vec_type));
289 : }
290 0 : else if (read_additional_data)
291 : // Systems now can handle adding post-initialization vectors
292 : // libmesh_assert(this->_can_add_vectors);
293 : // Some systems may have added their own vectors already
294 : // libmesh_assert_equal_to (this->_vectors.count(vec_name), 0);
295 0 : this->add_vector(vec_name);
296 : }
297 11213 : }
298 :
299 :
300 :
301 : template <typename InValType>
302 0 : void System::read_parallel_data (Xdr & io,
303 : const bool read_additional_data)
304 : {
305 : /**
306 : * This method implements the output of the vectors
307 : * contained in this System object, embedded in the
308 : * output of an EquationSystems<T_sys>.
309 : *
310 : * 9.) The global solution vector, re-ordered to be node-major
311 : * (More on this later.)
312 : *
313 : * for each additional vector in the object
314 : *
315 : * 10.) The global additional vector, re-ordered to be
316 : * node-major (More on this later.)
317 : *
318 : * Note that the actual IO is handled through the Xdr class
319 : * (to be renamed later?) which provides a uniform interface to
320 : * both the XDR (eXternal Data Representation) interface and standard
321 : * ASCII output. Thus this one section of code will read XDR or ASCII
322 : * files with no changes.
323 : */
324 : // PerfLog pl("IO Performance",false);
325 : // pl.push("read_parallel_data");
326 0 : dof_id_type total_read_size = 0;
327 :
328 0 : libmesh_assert (io.reading());
329 0 : libmesh_assert (io.is_open());
330 :
331 : // build the ordered nodes and element maps.
332 : // when writing/reading parallel files we need to iterate
333 : // over our nodes/elements in order of increasing global id().
334 : // however, this is not guaranteed to be ordering we obtain
335 : // by using the node_iterators/element_iterators directly.
336 : // so build a set, sorted by id(), that provides the ordering.
337 : // further, for memory economy build the set but then transfer
338 : // its contents to vectors, which will be sorted.
339 0 : std::vector<const DofObject *> ordered_nodes, ordered_elements;
340 : {
341 : std::set<const DofObject *, CompareDofObjectsByID>
342 0 : ordered_nodes_set (this->get_mesh().local_nodes_begin(),
343 0 : this->get_mesh().local_nodes_end());
344 :
345 0 : ordered_nodes.insert(ordered_nodes.end(),
346 : ordered_nodes_set.begin(),
347 : ordered_nodes_set.end());
348 : }
349 : {
350 : std::set<const DofObject *, CompareDofObjectsByID>
351 0 : ordered_elements_set (this->get_mesh().local_elements_begin(),
352 0 : this->get_mesh().local_elements_end());
353 :
354 0 : ordered_elements.insert(ordered_elements.end(),
355 : ordered_elements_set.begin(),
356 : ordered_elements_set.end());
357 : }
358 :
359 : // std::vector<Number> io_buffer;
360 0 : std::vector<InValType> io_buffer;
361 :
362 : // 9.)
363 : //
364 : // Actually read the solution components
365 : // for the ith system to disk
366 0 : io.data(io_buffer);
367 :
368 0 : total_read_size += cast_int<dof_id_type>(io_buffer.size());
369 :
370 0 : const unsigned int sys_num = this->number();
371 0 : const unsigned int nv = cast_int<unsigned int>
372 0 : (this->_written_var_indices.size());
373 0 : libmesh_assert_less_equal (nv, this->n_vars());
374 :
375 0 : dof_id_type cnt=0;
376 :
377 : // Loop over each non-SCALAR variable and each node, and read out the value.
378 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
379 : {
380 0 : const unsigned int var = _written_var_indices[data_var];
381 0 : if (this->variable(var).type().family != SCALAR)
382 : {
383 : // First read the node DOF values
384 0 : for (const auto & node : ordered_nodes)
385 0 : for (auto comp : make_range(node->n_comp(sys_num,var)))
386 : {
387 0 : libmesh_assert_not_equal_to (node->dof_number(sys_num, var, comp),
388 : DofObject::invalid_id);
389 0 : libmesh_assert_less (cnt, io_buffer.size());
390 0 : this->solution->set(node->dof_number(sys_num, var, comp), io_buffer[cnt++]);
391 : }
392 :
393 : // Then read the element DOF values
394 0 : for (const auto & elem : ordered_elements)
395 0 : for (auto comp : make_range(elem->n_comp(sys_num,var)))
396 : {
397 0 : libmesh_assert_not_equal_to (elem->dof_number(sys_num, var, comp),
398 : DofObject::invalid_id);
399 0 : libmesh_assert_less (cnt, io_buffer.size());
400 0 : this->solution->set(elem->dof_number(sys_num, var, comp), io_buffer[cnt++]);
401 : }
402 : }
403 : }
404 :
405 : // Finally, read the SCALAR variables on the last processor
406 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
407 : {
408 0 : const unsigned int var = _written_var_indices[data_var];
409 0 : if (this->variable(var).type().family == SCALAR)
410 : {
411 0 : if (this->processor_id() == (this->n_processors()-1))
412 : {
413 0 : const DofMap & dof_map = this->get_dof_map();
414 0 : std::vector<dof_id_type> SCALAR_dofs;
415 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
416 :
417 0 : for (auto dof : SCALAR_dofs)
418 0 : this->solution->set(dof, io_buffer[cnt++]);
419 : }
420 : }
421 : }
422 :
423 : // And we're done setting solution entries
424 0 : this->solution->close();
425 :
426 : // For each additional vector, simply go through the list.
427 : // ONLY attempt to do this IF additional data was actually
428 : // written to the file for this system (controlled by the
429 : // _additional_data_written flag).
430 0 : if (this->_additional_data_written)
431 : {
432 0 : const std::size_t nvecs = this->_vectors.size();
433 :
434 : // If the number of additional vectors written is non-zero, and
435 : // the number of additional vectors we have is non-zero, and
436 : // they don't match, then something is wrong and we can't be
437 : // sure we're reading data into the correct places.
438 0 : if (read_additional_data && nvecs &&
439 0 : nvecs != this->_additional_data_written)
440 0 : libmesh_error_msg
441 : ("Additional vectors in file do not match system");
442 :
443 0 : auto pos = _vectors.begin();
444 :
445 0 : for (std::size_t i = 0; i != this->_additional_data_written; ++i)
446 : {
447 0 : cnt=0;
448 0 : io_buffer.clear();
449 :
450 : // 10.)
451 : //
452 : // Actually read the additional vector components
453 : // for the ith system from disk
454 0 : io.data(io_buffer);
455 :
456 0 : total_read_size += cast_int<dof_id_type>(io_buffer.size());
457 :
458 : // If read_additional_data==true and we have additional vectors,
459 : // then we will keep this vector data; otherwise we are going to
460 : // throw it away.
461 0 : if (read_additional_data && nvecs)
462 : {
463 : // Loop over each non-SCALAR variable and each node, and read out the value.
464 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
465 : {
466 0 : const unsigned int var = _written_var_indices[data_var];
467 0 : if (this->variable(var).type().family != SCALAR)
468 : {
469 : // First read the node DOF values
470 0 : for (const auto & node : ordered_nodes)
471 0 : for (auto comp : make_range(node->n_comp(sys_num,var)))
472 : {
473 0 : libmesh_assert_not_equal_to (node->dof_number(sys_num, var, comp),
474 : DofObject::invalid_id);
475 0 : libmesh_assert_less (cnt, io_buffer.size());
476 0 : pos->second->set(node->dof_number(sys_num, var, comp), io_buffer[cnt++]);
477 : }
478 :
479 : // Then read the element DOF values
480 0 : for (const auto & elem : ordered_elements)
481 0 : for (auto comp : make_range(elem->n_comp(sys_num,var)))
482 : {
483 0 : libmesh_assert_not_equal_to (elem->dof_number(sys_num, var, comp),
484 : DofObject::invalid_id);
485 0 : libmesh_assert_less (cnt, io_buffer.size());
486 0 : pos->second->set(elem->dof_number(sys_num, var, comp), io_buffer[cnt++]);
487 : }
488 : }
489 : }
490 :
491 : // Finally, read the SCALAR variables on the last processor
492 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
493 : {
494 0 : const unsigned int var = _written_var_indices[data_var];
495 0 : if (this->variable(var).type().family == SCALAR)
496 : {
497 0 : if (this->processor_id() == (this->n_processors()-1))
498 : {
499 0 : const DofMap & dof_map = this->get_dof_map();
500 0 : std::vector<dof_id_type> SCALAR_dofs;
501 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
502 :
503 0 : for (auto dof : SCALAR_dofs)
504 0 : pos->second->set(dof, io_buffer[cnt++]);
505 : }
506 : }
507 : }
508 :
509 : // And we're done setting entries for this variable
510 0 : pos->second->close();
511 : }
512 :
513 : // If we've got vectors then we need to be iterating through
514 : // those too
515 0 : if (pos != this->_vectors.end())
516 0 : ++pos;
517 : }
518 : }
519 :
520 : // const Real
521 : // dt = pl.get_elapsed_time(),
522 : // rate = total_read_size*sizeof(Number)/dt;
523 :
524 : // libMesh::err << "Read " << total_read_size << " \"Number\" values\n"
525 : // << " Elapsed time = " << dt << '\n'
526 : // << " Rate = " << rate/1.e6 << "(MB/sec)\n\n";
527 :
528 : // pl.pop("read_parallel_data");
529 0 : }
530 :
531 :
532 : template <typename InValType>
533 10929 : void System::read_serialized_data (Xdr & io,
534 : const bool read_additional_data)
535 : {
536 : // This method implements the input of the vectors
537 : // contained in this System object, embedded in the
538 : // output of an EquationSystems<T_sys>.
539 : //
540 : // 10.) The global solution vector, re-ordered to be node-major
541 : // (More on this later.)
542 : //
543 : // for each additional vector in the object
544 : //
545 : // 11.) The global additional vector, re-ordered to be
546 : // node-major (More on this later.)
547 314 : parallel_object_only();
548 628 : std::string comment;
549 :
550 : // PerfLog pl("IO Performance",false);
551 : // pl.push("read_serialized_data");
552 : // std::size_t total_read_size = 0;
553 :
554 : // 10.)
555 : // Read the global solution vector
556 : {
557 : // total_read_size +=
558 10929 : this->read_serialized_vector<InValType>(io, this->solution.get());
559 :
560 : // get the comment
561 11243 : if (this->processor_id() == 0)
562 1723 : io.comment (comment);
563 : }
564 :
565 : // 11.)
566 : // Only read additional vectors if data is available, and only use
567 : // that data to fill our vectors if the user requested it.
568 10929 : if (this->_additional_data_written)
569 : {
570 304 : const std::size_t nvecs = this->_vectors.size();
571 :
572 : // If the number of additional vectors written is non-zero, and
573 : // the number of additional vectors we have is non-zero, and
574 : // they don't match, then we can't read additional vectors
575 : // and be sure we're reading data into the correct places.
576 10640 : if (read_additional_data && nvecs &&
577 10640 : nvecs != this->_additional_data_written)
578 0 : libmesh_error_msg
579 : ("Additional vectors in file do not match system");
580 :
581 304 : auto pos = _vectors.begin();
582 :
583 82740 : for (std::size_t i = 0; i != this->_additional_data_written; ++i)
584 : {
585 : // Read data, but only put it into a vector if we've been
586 : // asked to and if we have a corresponding vector to read.
587 :
588 : // total_read_size +=
589 8240 : this->read_serialized_vector<InValType>
590 138020 : (io, (read_additional_data && nvecs) ? pos->second.get() : nullptr);
591 :
592 : // get the comment
593 74160 : if (this->processor_id() == 0)
594 11330 : io.comment (comment);
595 :
596 :
597 : // If we've got vectors then we need to be iterating through
598 : // those too
599 72100 : if (pos != this->_vectors.end())
600 2060 : ++pos;
601 : }
602 : }
603 :
604 : // const Real
605 : // dt = pl.get_elapsed_time(),
606 : // rate = total_read_size*sizeof(Number)/dt;
607 :
608 : // libMesh::out << "Read " << total_read_size << " \"Number\" values\n"
609 : // << " Elapsed time = " << dt << '\n'
610 : // << " Rate = " << rate/1.e6 << "(MB/sec)\n\n";
611 :
612 : // pl.pop("read_serialized_data");
613 10929 : }
614 :
615 :
616 :
617 : template <typename iterator_type, typename InValType>
618 166626 : std::size_t System::read_serialized_blocked_dof_objects (const dof_id_type n_objs,
619 : const iterator_type begin,
620 : const iterator_type end,
621 : const InValType ,
622 : Xdr & io,
623 : const std::vector<NumericVector<Number> *> & vecs,
624 : const unsigned int var_to_read) const
625 : {
626 : //-------------------------------------------------------
627 : // General order: (IO format 0.7.4 & greater)
628 : //
629 : // for (objects ...)
630 : // for (vecs ....)
631 : // for (vars ....)
632 : // for (comps ...)
633 : //
634 : // where objects are nodes or elements, sorted to be
635 : // partition independent,
636 : // vecs are one or more *identically distributed* solution
637 : // coefficient vectors, vars are one or more variables
638 : // to write, and comps are all the components for said
639 : // vars on the object.
640 :
641 : // variables to read. Unless specified otherwise, defaults to _written_var_indices.
642 171390 : std::vector<unsigned int> vars_to_read (_written_var_indices);
643 :
644 166626 : if (var_to_read != libMesh::invalid_uint)
645 : {
646 0 : vars_to_read.clear();
647 0 : vars_to_read.push_back(var_to_read);
648 : }
649 :
650 : const unsigned int
651 9528 : sys_num = this->number(),
652 9528 : num_vecs = cast_int<unsigned int>(vecs.size());
653 : const dof_id_type
654 171406 : io_blksize = cast_int<dof_id_type>(std::min(max_io_blksize, static_cast<std::size_t>(n_objs))),
655 166626 : num_blks = cast_int<unsigned int>(std::ceil(static_cast<double>(n_objs)/
656 157098 : static_cast<double>(io_blksize)));
657 :
658 4764 : libmesh_assert_less_equal (_written_var_indices.size(), this->n_vars());
659 :
660 4764 : std::size_t n_read_values=0;
661 :
662 176154 : std::vector<std::vector<dof_id_type>> xfer_ids(num_blks); // The global IDs and # of components for the local objects in all blocks
663 176154 : std::vector<std::vector<Number>> recv_vals(num_blks); // The raw values for the local objects in all blocks
664 : std::vector<Parallel::Request>
665 180918 : id_requests(num_blks), val_requests(num_blks);
666 : std::vector<Parallel::MessageTag>
667 180918 : id_tags(num_blks), val_tags(num_blks);
668 :
669 : // ------------------------------------------------------
670 : // First pass - count the number of objects in each block
671 : // traverse all the objects and figure out which block they
672 : // will ultimately live in.
673 : std::vector<std::size_t>
674 171390 : xfer_ids_size (num_blks,0),
675 171390 : recv_vals_size (num_blks,0);
676 :
677 :
678 32608015 : for (iterator_type it=begin; it!=end; ++it)
679 : {
680 : const dof_id_type
681 32441389 : id = (*it)->id(),
682 32441389 : block = id/io_blksize;
683 :
684 2948988 : libmesh_assert_less (block, num_blks);
685 :
686 35390377 : xfer_ids_size[block] += 2; // for each object, we send its id, as well as the total number of components for all variables
687 :
688 2948988 : dof_id_type n_comp_tot=0;
689 65131574 : for (const auto & var : vars_to_read)
690 32690185 : n_comp_tot += (*it)->n_comp(sys_num, var); // for each variable, we will receive the nonzero components
691 :
692 35390377 : recv_vals_size[block] += n_comp_tot*num_vecs;
693 : }
694 :
695 : // knowing the recv_vals_size[block] for each processor allows
696 : // us to sum them and find the global size for each block.
697 171390 : std::vector<std::size_t> tot_vals_size(recv_vals_size);
698 166626 : this->comm().sum (tot_vals_size);
699 :
700 :
701 : //------------------------------------------
702 : // Collect the ids & number of values needed
703 : // for all local objects, binning them into
704 : // 'blocks' that will be sent to processor 0
705 333252 : for (dof_id_type blk=0; blk<num_blks; blk++)
706 : {
707 : // Each processor should build up its transfer buffers for its
708 : // local objects in [first_object,last_object).
709 : const dof_id_type
710 166626 : first_object = blk*io_blksize,
711 166626 : last_object = std::min(cast_int<dof_id_type>((blk+1)*io_blksize), n_objs);
712 :
713 : // convenience
714 9544 : std::vector<dof_id_type> & ids (xfer_ids[blk]);
715 9528 : std::vector<Number> & vals (recv_vals[blk]);
716 :
717 : // we now know the number of values we will store for each block,
718 : // so we can do efficient preallocation
719 171390 : ids.clear(); /**/ ids.reserve (xfer_ids_size[blk]);
720 171390 : vals.resize(recv_vals_size[blk]);
721 :
722 : #ifdef DEBUG
723 9528 : std::unordered_set<dof_id_type> seen_ids;
724 : #endif
725 :
726 171390 : if (recv_vals_size[blk] != 0) // only if there are nonzero values to receive
727 25987249 : for (iterator_type it=begin; it!=end; ++it)
728 : {
729 14263421 : dof_id_type id = (*it)->id();
730 : #ifdef DEBUG
731 : // Any renumbering tricks should not have given us any
732 : // duplicate ids.
733 1296252 : libmesh_assert(!seen_ids.count(id));
734 1296252 : seen_ids.insert(id);
735 : #endif
736 :
737 14263421 : if ((id >= first_object) && // object in [first_object,last_object)
738 1296252 : (id < last_object))
739 : {
740 14263421 : ids.push_back(id);
741 :
742 1296252 : unsigned int n_comp_tot=0;
743 :
744 28822090 : for (const auto & var : vars_to_read)
745 14558669 : n_comp_tot += (*it)->n_comp(sys_num, var);
746 :
747 14263421 : ids.push_back (n_comp_tot*num_vecs);
748 : }
749 : }
750 :
751 : #ifdef LIBMESH_HAVE_MPI
752 166626 : id_tags[blk] = this->comm().get_unique_tag(100*num_blks + blk);
753 166626 : val_tags[blk] = this->comm().get_unique_tag(200*num_blks + blk);
754 :
755 : // nonblocking send the data for this block
756 171390 : this->comm().send (0, ids, id_requests[blk], id_tags[blk]);
757 :
758 : // Go ahead and post the receive too
759 171390 : this->comm().receive (0, vals, val_requests[blk], val_tags[blk]);
760 : #endif
761 : }
762 :
763 : //---------------------------------------------------
764 : // Here processor 0 will read and distribute the data.
765 : // We have to do this block-wise to ensure that we
766 : // do not exhaust memory on processor 0.
767 :
768 : // give these variables scope outside the block to avoid reallocation
769 180918 : std::vector<std::vector<dof_id_type>> recv_ids (this->n_processors());
770 180918 : std::vector<std::vector<Number>> send_vals (this->n_processors());
771 180918 : std::vector<Parallel::Request> reply_requests (this->n_processors());
772 9528 : std::vector<unsigned int> obj_val_offsets; // map to traverse entry-wise rather than processor-wise
773 9528 : std::vector<Number> input_vals; // The input buffer for the current block
774 4764 : std::vector<InValType> input_vals_tmp; // The input buffer for the current block
775 :
776 333252 : for (dof_id_type blk=0; blk<num_blks; blk++)
777 : {
778 : // Each processor should build up its transfer buffers for its
779 : // local objects in [first_object,last_object).
780 : const dof_id_type
781 166626 : first_object = blk*io_blksize,
782 166626 : last_object = std::min(cast_int<dof_id_type>((blk+1)*io_blksize), n_objs),
783 166626 : n_objects_blk = last_object - first_object;
784 :
785 : // Processor 0 has a special job. It needs to gather the requested indices
786 : // in [first_object,last_object) from all processors, read the data from
787 : // disk, and reply
788 171390 : if (this->processor_id() == 0)
789 : {
790 : // we know the input buffer size for this block and can begin reading it now
791 28584 : input_vals.resize(tot_vals_size[blk]);
792 28584 : input_vals_tmp.resize(tot_vals_size[blk]);
793 :
794 : // a ThreadedIO object to perform asynchronous file IO
795 2382 : ThreadedIO<InValType> threaded_io(io, input_vals_tmp);
796 28584 : Threads::Thread async_io(threaded_io);
797 :
798 : // offset array. this will define where each object's values
799 : // map into the actual input_vals buffer. this must get
800 : // 0-initialized because 0-component objects are not actually sent
801 26202 : obj_val_offsets.resize (n_objects_blk); /**/ std::fill (obj_val_offsets.begin(), obj_val_offsets.end(), 0);
802 28584 : recv_vals_size.resize(this->n_processors()); // reuse this to count how many values are going to each processor
803 :
804 : #ifndef NDEBUG
805 2382 : std::size_t n_vals_blk = 0;
806 : #endif
807 :
808 : // loop over all processors and process their index request
809 192828 : for (processor_id_type comm_step=0, tnp=this->n_processors(); comm_step != tnp; ++comm_step)
810 : {
811 : #ifdef LIBMESH_HAVE_MPI
812 : // blocking receive indices for this block, imposing no particular order on processor
813 171390 : Parallel::Status id_status (this->comm().probe (Parallel::any_source, id_tags[blk]));
814 166626 : std::vector<dof_id_type> & ids (recv_ids[id_status.source()]);
815 9528 : std::size_t & n_vals_proc (recv_vals_size[id_status.source()]);
816 171390 : this->comm().receive (id_status.source(), ids, id_tags[blk]);
817 : #else
818 : // straight copy without MPI
819 : std::vector<dof_id_type> & ids (recv_ids[0]);
820 : std::size_t & n_vals_proc (recv_vals_size[0]);
821 : ids = xfer_ids[blk];
822 : #endif
823 :
824 166626 : n_vals_proc = 0;
825 :
826 : // note its possible we didn't receive values for objects in
827 : // this block if they have no components allocated.
828 14430047 : for (std::size_t idx=0, sz=ids.size(); idx<sz; idx+=2)
829 : {
830 : const dof_id_type
831 14263421 : local_idx = ids[idx+0]-first_object,
832 14263421 : n_vals_tot_allvecs = ids[idx+1];
833 :
834 1296252 : libmesh_assert_less (local_idx, n_objects_blk);
835 :
836 14263421 : obj_val_offsets[local_idx] = n_vals_tot_allvecs;
837 14263421 : n_vals_proc += n_vals_tot_allvecs;
838 : }
839 :
840 : #ifndef NDEBUG
841 4764 : n_vals_blk += n_vals_proc;
842 : #endif
843 : }
844 :
845 : // We need the offsets into the input_vals vector for each object.
846 : // fortunately, this is simply the partial sum of the total number
847 : // of components for each object
848 23820 : std::partial_sum(obj_val_offsets.begin(), obj_val_offsets.end(),
849 : obj_val_offsets.begin());
850 :
851 2382 : libmesh_assert_equal_to (n_vals_blk, obj_val_offsets.back());
852 2382 : libmesh_assert_equal_to (n_vals_blk, tot_vals_size[blk]);
853 :
854 : // Wait for read completion
855 26202 : async_io.join();
856 : // now copy the values back to the main vector for transfer
857 15619455 : for (auto i_val : index_range(input_vals))
858 18408431 : input_vals[i_val] = input_vals_tmp[i_val];
859 :
860 26202 : n_read_values += input_vals.size();
861 :
862 : // pack data replies for each processor
863 192828 : for (auto proc : make_range(this->n_processors()))
864 : {
865 166626 : const std::vector<dof_id_type> & ids (recv_ids[proc]);
866 9528 : std::vector<Number> & vals (send_vals[proc]);
867 9528 : const std::size_t & n_vals_proc (recv_vals_size[proc]);
868 :
869 166626 : vals.clear(); /**/ vals.reserve(n_vals_proc);
870 :
871 14430047 : for (std::size_t idx=0, sz=ids.size(); idx<sz; idx+=2)
872 : {
873 : const dof_id_type
874 14263421 : local_idx = ids[idx+0]-first_object,
875 15559673 : n_vals_tot_allvecs = ids[idx+1];
876 :
877 1296252 : std::vector<Number>::const_iterator in_vals(input_vals.begin());
878 14263421 : if (local_idx != 0)
879 15545368 : std::advance (in_vals, obj_val_offsets[local_idx-1]);
880 :
881 29856674 : for (unsigned int val=0; val<n_vals_tot_allvecs; val++, ++in_vals)
882 : {
883 1407589 : libmesh_assert (in_vals != input_vals.end());
884 : //libMesh::out << "*in_vals=" << *in_vals << '\n';
885 15593253 : vals.push_back(*in_vals);
886 : }
887 : }
888 :
889 : #ifdef LIBMESH_HAVE_MPI
890 : // send the relevant values to this processor
891 171390 : this->comm().send (proc, vals, reply_requests[proc], val_tags[blk]);
892 : #else
893 : recv_vals[blk] = vals;
894 : #endif
895 : }
896 : } // end processor 0 read/reply
897 :
898 : // all processors complete the (already posted) read for this block
899 : {
900 9544 : Parallel::wait (val_requests[blk]);
901 :
902 9528 : const std::vector<Number> & vals (recv_vals[blk]);
903 9528 : std::vector<Number>::const_iterator val_it(vals.begin());
904 :
905 166626 : if (!recv_vals[blk].empty()) // nonzero values to receive
906 25987249 : for (iterator_type it=begin; it!=end; ++it)
907 27230590 : if (((*it)->id() >= first_object) && // object in [first_object,last_object)
908 14263421 : ((*it)->id() < last_object))
909 : // unpack & set the values
910 30321254 : for (auto & vec : vecs)
911 34524354 : for (const auto & var : vars_to_read)
912 : {
913 18466521 : const unsigned int n_comp = (*it)->n_comp(sys_num, var);
914 :
915 34059774 : for (unsigned int comp=0; comp<n_comp; comp++, ++val_it)
916 : {
917 15593253 : const dof_id_type dof_index = (*it)->dof_number (sys_num, var, comp);
918 1407589 : libmesh_assert (val_it != vals.end());
919 15593253 : if (vec)
920 : {
921 1407589 : libmesh_assert_greater_equal (dof_index, vec->first_local_index());
922 1407589 : libmesh_assert_less (dof_index, vec->last_local_index());
923 : //libMesh::out << "dof_index, *val_it = \t" << dof_index << ", " << *val_it << '\n';
924 15593253 : vec->set (dof_index, *val_it);
925 : }
926 : }
927 : }
928 : }
929 :
930 : // processor 0 needs to make sure all replies have been handed off
931 171390 : if (this->processor_id () == 0)
932 26202 : Parallel::wait(reply_requests);
933 : }
934 :
935 166626 : Parallel::wait(id_requests);
936 :
937 171390 : return n_read_values;
938 314196 : }
939 :
940 :
941 :
942 840 : unsigned int System::read_SCALAR_dofs (const unsigned int var,
943 : Xdr & io,
944 : NumericVector<Number> * vec) const
945 : {
946 24 : unsigned int n_assigned_vals = 0; // the number of values assigned, this will be returned.
947 :
948 : // Processor 0 will read the block from the buffer stream and send it to the last processor
949 840 : const unsigned int n_SCALAR_dofs = this->variable(var).type().order.get_order();
950 840 : std::vector<Number> input_buffer(n_SCALAR_dofs);
951 864 : if (this->processor_id() == 0)
952 132 : io.data_stream(input_buffer.data(), n_SCALAR_dofs);
953 :
954 : #ifdef LIBMESH_HAVE_MPI
955 864 : if (this->n_processors() > 1)
956 : {
957 876 : const Parallel::MessageTag val_tag = this->comm().get_unique_tag();
958 :
959 : // Post the receive on the last processor
960 852 : if (this->processor_id() == (this->n_processors()-1))
961 120 : this->comm().receive(0, input_buffer, val_tag);
962 :
963 : // Send the data to processor 0
964 852 : if (this->processor_id() == 0)
965 120 : this->comm().send(this->n_processors()-1, input_buffer, val_tag);
966 780 : }
967 : #endif
968 :
969 : // Finally, set the SCALAR values
970 864 : if (this->processor_id() == (this->n_processors()-1))
971 : {
972 12 : const DofMap & dof_map = this->get_dof_map();
973 24 : std::vector<dof_id_type> SCALAR_dofs;
974 132 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
975 :
976 264 : for (auto i : index_range(SCALAR_dofs))
977 : {
978 132 : if (vec)
979 156 : vec->set (SCALAR_dofs[i], input_buffer[i]);
980 132 : ++n_assigned_vals;
981 : }
982 : }
983 :
984 864 : return n_assigned_vals;
985 : }
986 :
987 :
988 : template <typename InValType>
989 83029 : numeric_index_type System::read_serialized_vector (Xdr & io,
990 : NumericVector<Number> * vec)
991 : {
992 2374 : parallel_object_only();
993 :
994 : #ifndef NDEBUG
995 : // In parallel we better be reading a parallel vector -- if not
996 : // we will not set all of its components below!!
997 2374 : if (this->n_processors() > 1 && vec)
998 : {
999 2374 : libmesh_assert (vec->type() == PARALLEL ||
1000 : vec->type() == GHOSTED);
1001 : }
1002 : #endif
1003 :
1004 2374 : libmesh_assert (io.reading());
1005 :
1006 : // vector length
1007 83029 : unsigned int vector_length=0; // FIXME? size_t would break binary compatibility...
1008 : #ifndef NDEBUG
1009 2374 : std::size_t n_assigned_vals=0;
1010 : #endif
1011 :
1012 : // Get the buffer size
1013 85403 : if (this->processor_id() == 0)
1014 13053 : io.data(vector_length, "# vector length");
1015 83029 : this->comm().broadcast(vector_length);
1016 :
1017 2374 : const unsigned int nv = cast_int<unsigned int>
1018 4748 : (this->_written_var_indices.size());
1019 : const dof_id_type
1020 83029 : n_nodes = this->get_mesh().n_nodes(),
1021 83029 : n_elem = this->get_mesh().n_elem();
1022 :
1023 2374 : libmesh_assert_less_equal (nv, this->n_vars());
1024 :
1025 : // for newer versions, read variables node/elem major
1026 83029 : if (io.version() >= LIBMESH_VERSION_ID(0,7,4))
1027 : {
1028 : //---------------------------------
1029 : // Collect the values for all nodes
1030 : #ifndef NDEBUG
1031 4748 : n_assigned_vals +=
1032 : #endif
1033 163684 : this->read_serialized_blocked_dof_objects (n_nodes,
1034 83029 : this->get_mesh().local_nodes_begin(),
1035 83029 : this->get_mesh().local_nodes_end(),
1036 : InValType(),
1037 : io,
1038 78281 : std::vector<NumericVector<Number> *> (1,vec));
1039 :
1040 :
1041 : //------------------------------------
1042 : // Collect the values for all elements
1043 : #ifndef NDEBUG
1044 4748 : n_assigned_vals +=
1045 : #endif
1046 166058 : this->read_serialized_blocked_dof_objects (n_elem,
1047 83029 : this->get_mesh().local_elements_begin(),
1048 83029 : this->get_mesh().local_elements_end(),
1049 : InValType(),
1050 : io,
1051 156562 : std::vector<NumericVector<Number> *> (1,vec));
1052 : }
1053 :
1054 : // for older versions, read variables var-major
1055 : else
1056 : {
1057 : // Loop over each variable in the system, and then each node/element in the mesh.
1058 0 : for (unsigned int data_var=0; data_var<nv; data_var++)
1059 : {
1060 0 : const unsigned int var = _written_var_indices[data_var];
1061 0 : if (this->variable(var).type().family != SCALAR)
1062 : {
1063 : //---------------------------------
1064 : // Collect the values for all nodes
1065 : #ifndef NDEBUG
1066 0 : n_assigned_vals +=
1067 : #endif
1068 0 : this->read_serialized_blocked_dof_objects (n_nodes,
1069 0 : this->get_mesh().local_nodes_begin(),
1070 0 : this->get_mesh().local_nodes_end(),
1071 : InValType(),
1072 : io,
1073 0 : std::vector<NumericVector<Number> *> (1,vec),
1074 : var);
1075 :
1076 :
1077 : //------------------------------------
1078 : // Collect the values for all elements
1079 : #ifndef NDEBUG
1080 0 : n_assigned_vals +=
1081 : #endif
1082 0 : this->read_serialized_blocked_dof_objects (n_elem,
1083 0 : this->get_mesh().local_elements_begin(),
1084 0 : this->get_mesh().local_elements_end(),
1085 : InValType(),
1086 : io,
1087 0 : std::vector<NumericVector<Number> *> (1,vec),
1088 : var);
1089 : } // end variable loop
1090 : }
1091 : }
1092 :
1093 : //-------------------------------------------
1094 : // Finally loop over all the SCALAR variables
1095 167040 : for (unsigned int data_var=0; data_var<nv; data_var++)
1096 : {
1097 84011 : const unsigned int var = _written_var_indices[data_var];
1098 84011 : if (this->variable(var).type().family == SCALAR)
1099 : {
1100 : #ifndef NDEBUG
1101 24 : n_assigned_vals +=
1102 : #endif
1103 840 : this->read_SCALAR_dofs (var, io, vec);
1104 : }
1105 : }
1106 :
1107 83029 : if (vec)
1108 83029 : vec->close();
1109 :
1110 : #ifndef NDEBUG
1111 2374 : this->comm().sum (n_assigned_vals);
1112 2374 : libmesh_assert_equal_to (n_assigned_vals, vector_length);
1113 : #endif
1114 :
1115 83029 : return vector_length;
1116 : }
1117 :
1118 :
1119 :
1120 1379 : void System::write_header (Xdr & io,
1121 : std::string_view /* version is currently unused */,
1122 : const bool write_additional_data) const
1123 : {
1124 : /**
1125 : * This method implements the output of a
1126 : * System object, embedded in the output of
1127 : * an EquationSystems<T_sys>. This warrants some
1128 : * documentation. The output of this part
1129 : * consists of 5 sections:
1130 : *
1131 : * for this system
1132 : *
1133 : * 5.) The number of variables in the system (unsigned int)
1134 : *
1135 : * for each variable in the system
1136 : *
1137 : * 6.) The name of the variable (string)
1138 : *
1139 : * 6.1.) subdomain where the variable lives
1140 : *
1141 : * 7.) Combined in an FEType:
1142 : * - The approximation order(s) of the variable
1143 : * (Order Enum, cast to int/s)
1144 : * - The finite element family/ies of the variable
1145 : * (FEFamily Enum, cast to int/s)
1146 : *
1147 : * end variable loop
1148 : *
1149 : * 8.) The number of additional vectors (unsigned int),
1150 : *
1151 : * for each additional vector in the system object
1152 : *
1153 : * 9.) the name of the additional vector (string)
1154 : *
1155 : * end system
1156 : */
1157 126 : libmesh_assert (io.writing());
1158 :
1159 :
1160 : // Only write the header information
1161 : // if we are processor 0.
1162 1505 : if (this->get_mesh().processor_id() != 0)
1163 0 : return;
1164 :
1165 252 : std::string comment;
1166 :
1167 : // 5.)
1168 : // Write the number of variables in the system
1169 :
1170 : {
1171 : // set up the comment
1172 126 : comment = "# No. of Variables in System \"";
1173 126 : comment += this->name();
1174 126 : comment += "\"";
1175 :
1176 1379 : unsigned int nv = this->n_vars();
1177 1379 : io.data (nv, comment);
1178 : }
1179 :
1180 :
1181 2830 : for (auto var : make_range(this->n_vars()))
1182 : {
1183 : // 6.)
1184 : // Write the name of the var-th variable
1185 : {
1186 : // set up the comment
1187 132 : comment = "# Name, Variable No. ";
1188 2638 : comment += std::to_string(var);
1189 132 : comment += ", System \"";
1190 132 : comment += this->name();
1191 132 : comment += "\"";
1192 :
1193 1583 : std::string var_name = this->variable_name(var);
1194 1451 : io.data (var_name, comment);
1195 : }
1196 :
1197 : // 6.1.) Variable subdomains
1198 : {
1199 : // set up the comment
1200 132 : comment = "# Subdomains, Variable \"";
1201 1451 : comment += this->variable_name(var);
1202 132 : comment += "\", System \"";
1203 132 : comment += this->name();
1204 132 : comment += "\"";
1205 :
1206 1451 : const std::set<subdomain_id_type> & domains = this->variable(var).active_subdomains();
1207 264 : std::vector<subdomain_id_type> domain_array;
1208 1319 : domain_array.assign(domains.begin(), domains.end());
1209 1451 : io.data (domain_array, comment);
1210 : }
1211 :
1212 : // 7.)
1213 : // Write the approximation order of the var-th variable
1214 : // in this system
1215 : {
1216 : // set up the comment
1217 132 : comment = "# Approximation Order, Variable \"";
1218 1451 : comment += this->variable_name(var);
1219 132 : comment += "\", System \"";
1220 132 : comment += this->name();
1221 132 : comment += "\"";
1222 :
1223 1451 : int order = static_cast<int>(this->variable_type(var).order);
1224 1451 : io.data (order, comment);
1225 : }
1226 :
1227 :
1228 : #ifdef LIBMESH_ENABLE_INFINITE_ELEMENTS
1229 :
1230 : // do the same for radial_order
1231 : {
1232 132 : comment = "# Radial Approximation Order, Variable \"";
1233 280 : comment += this->variable_name(var);
1234 132 : comment += "\", System \"";
1235 132 : comment += this->name();
1236 132 : comment += "\"";
1237 :
1238 280 : int rad_order = static_cast<int>(this->variable_type(var).radial_order);
1239 280 : io.data (rad_order, comment);
1240 : }
1241 :
1242 : #endif
1243 :
1244 : // Write the Finite Element type of the var-th variable
1245 : // in this System
1246 : {
1247 : // set up the comment
1248 132 : comment = "# FE Family, Variable \"";
1249 1451 : comment += this->variable_name(var);
1250 132 : comment += "\", System \"";
1251 132 : comment += this->name();
1252 132 : comment += "\"";
1253 :
1254 1451 : const FEType & type = this->variable_type(var);
1255 1451 : int fam = static_cast<int>(type.family);
1256 1451 : io.data (fam, comment);
1257 :
1258 : #ifdef LIBMESH_ENABLE_INFINITE_ELEMENTS
1259 :
1260 132 : comment = "# Radial FE Family, Variable \"";
1261 280 : comment += this->variable_name(var);
1262 132 : comment += "\", System \"";
1263 132 : comment += this->name();
1264 132 : comment += "\"";
1265 :
1266 280 : int radial_fam = static_cast<int>(type.radial_family);
1267 280 : io.data (radial_fam, comment);
1268 :
1269 132 : comment = "# Infinite Mapping Type, Variable \"";
1270 280 : comment += this->variable_name(var);
1271 132 : comment += "\", System \"";
1272 132 : comment += this->name();
1273 132 : comment += "\"";
1274 :
1275 280 : int i_map = static_cast<int>(type.inf_map);
1276 280 : io.data (i_map, comment);
1277 : #endif
1278 : }
1279 : } // end of the variable loop
1280 :
1281 : // 8.)
1282 : // Write the number of additional vectors in the System.
1283 : // If write_additional_data==false, then write zero for
1284 : // the number of additional vectors.
1285 : {
1286 : {
1287 : // set up the comment
1288 126 : comment = "# No. of Additional Vectors, System \"";
1289 126 : comment += this->name();
1290 126 : comment += "\"";
1291 :
1292 2539 : unsigned int nvecs = write_additional_data ? this->n_vectors () : 0;
1293 1379 : io.data (nvecs, comment);
1294 : }
1295 :
1296 1379 : if (write_additional_data)
1297 : {
1298 116 : unsigned int cnt=0;
1299 9768 : for (const auto & [vec_name, vec] : _vectors)
1300 : {
1301 : // 9.)
1302 : // write the name of the cnt-th additional vector
1303 9264 : const std::string dth_vector = std::to_string(cnt++)+"th vector";
1304 9264 : comment = "# Name of " + dth_vector;
1305 8492 : std::string nonconst_vec_name = vec_name; // Stupid XDR API
1306 :
1307 8492 : io.data (nonconst_vec_name, comment);
1308 8492 : int vec_projection = _vector_projections.at(vec_name);
1309 16212 : comment = "# Whether to do projections for " + dth_vector;
1310 8492 : io.data (vec_projection, comment);
1311 8492 : int vec_type = vec->type();
1312 16212 : comment = "# Parallel type of " + dth_vector;
1313 8492 : io.data (vec_type, comment);
1314 : }
1315 : }
1316 : }
1317 : }
1318 :
1319 :
1320 :
1321 0 : void System::write_parallel_data (Xdr & io,
1322 : const bool write_additional_data) const
1323 : {
1324 : /**
1325 : * This method implements the output of the vectors
1326 : * contained in this System object, embedded in the
1327 : * output of an EquationSystems<T_sys>.
1328 : *
1329 : * 9.) The global solution vector, re-ordered to be node-major
1330 : * (More on this later.)
1331 : *
1332 : * for each additional vector in the object
1333 : *
1334 : * 10.) The global additional vector, re-ordered to be
1335 : * node-major (More on this later.)
1336 : *
1337 : * Note that the actual IO is handled through the Xdr class
1338 : * (to be renamed later?) which provides a uniform interface to
1339 : * both the XDR (eXternal Data Representation) interface and standard
1340 : * ASCII output. Thus this one section of code will read XDR or ASCII
1341 : * files with no changes.
1342 : */
1343 : // PerfLog pl("IO Performance",false);
1344 : // pl.push("write_parallel_data");
1345 : // std::size_t total_written_size = 0;
1346 :
1347 0 : std::string comment;
1348 :
1349 0 : libmesh_assert (io.writing());
1350 :
1351 0 : std::vector<Number> io_buffer; io_buffer.reserve(this->solution->local_size());
1352 :
1353 : // build the ordered nodes and element maps.
1354 : // when writing/reading parallel files we need to iterate
1355 : // over our nodes/elements in order of increasing global id().
1356 : // however, this is not guaranteed to be ordering we obtain
1357 : // by using the node_iterators/element_iterators directly.
1358 : // so build a set, sorted by id(), that provides the ordering.
1359 : // further, for memory economy build the set but then transfer
1360 : // its contents to vectors, which will be sorted.
1361 0 : std::vector<const DofObject *> ordered_nodes, ordered_elements;
1362 : {
1363 : std::set<const DofObject *, CompareDofObjectsByID>
1364 0 : ordered_nodes_set (this->get_mesh().local_nodes_begin(),
1365 0 : this->get_mesh().local_nodes_end());
1366 :
1367 0 : ordered_nodes.insert(ordered_nodes.end(),
1368 : ordered_nodes_set.begin(),
1369 0 : ordered_nodes_set.end());
1370 : }
1371 : {
1372 : std::set<const DofObject *, CompareDofObjectsByID>
1373 0 : ordered_elements_set (this->get_mesh().local_elements_begin(),
1374 0 : this->get_mesh().local_elements_end());
1375 :
1376 0 : ordered_elements.insert(ordered_elements.end(),
1377 : ordered_elements_set.begin(),
1378 0 : ordered_elements_set.end());
1379 : }
1380 :
1381 0 : const unsigned int sys_num = this->number();
1382 0 : const unsigned int nv = this->n_vars();
1383 :
1384 : // Loop over each non-SCALAR variable and each node, and write out the value.
1385 0 : for (unsigned int var=0; var<nv; var++)
1386 0 : if (this->variable(var).type().family != SCALAR)
1387 : {
1388 : // First write the node DOF values
1389 0 : for (const auto & node : ordered_nodes)
1390 0 : for (auto comp : make_range(node->n_comp(sys_num,var)))
1391 : {
1392 0 : libmesh_assert_not_equal_to (node->dof_number(sys_num, var, comp),
1393 : DofObject::invalid_id);
1394 :
1395 0 : io_buffer.push_back((*this->solution)(node->dof_number(sys_num, var, comp)));
1396 : }
1397 :
1398 : // Then write the element DOF values
1399 0 : for (const auto & elem : ordered_elements)
1400 0 : for (auto comp : make_range(elem->n_comp(sys_num,var)))
1401 : {
1402 0 : libmesh_assert_not_equal_to (elem->dof_number(sys_num, var, comp),
1403 : DofObject::invalid_id);
1404 :
1405 0 : io_buffer.push_back((*this->solution)(elem->dof_number(sys_num, var, comp)));
1406 : }
1407 : }
1408 :
1409 : // Finally, write the SCALAR data on the last processor
1410 0 : for (auto var : make_range(this->n_vars()))
1411 0 : if (this->variable(var).type().family == SCALAR)
1412 : {
1413 0 : if (this->processor_id() == (this->n_processors()-1))
1414 : {
1415 0 : const DofMap & dof_map = this->get_dof_map();
1416 0 : std::vector<dof_id_type> SCALAR_dofs;
1417 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
1418 :
1419 0 : for (auto dof : SCALAR_dofs)
1420 0 : io_buffer.push_back((*this->solution)(dof));
1421 : }
1422 : }
1423 :
1424 : // 9.)
1425 : //
1426 : // Actually write the reordered solution vector
1427 : // for the ith system to disk
1428 :
1429 : // set up the comment
1430 : {
1431 0 : comment = "# System \"";
1432 0 : comment += this->name();
1433 0 : comment += "\" Solution Vector";
1434 : }
1435 :
1436 0 : io.data (io_buffer, comment);
1437 :
1438 : // total_written_size += io_buffer.size();
1439 :
1440 : // Only write additional vectors if wanted
1441 0 : if (write_additional_data)
1442 : {
1443 0 : for (auto & [vec_name, vec] : _vectors)
1444 : {
1445 0 : io_buffer.clear();
1446 0 : io_buffer.reserve(vec->local_size());
1447 :
1448 : // Loop over each non-SCALAR variable and each node, and write out the value.
1449 0 : for (unsigned int var=0; var<nv; var++)
1450 0 : if (this->variable(var).type().family != SCALAR)
1451 : {
1452 : // First write the node DOF values
1453 0 : for (const auto & node : ordered_nodes)
1454 0 : for (auto comp : make_range(node->n_comp(sys_num,var)))
1455 : {
1456 0 : libmesh_assert_not_equal_to (node->dof_number(sys_num, var, comp),
1457 : DofObject::invalid_id);
1458 :
1459 0 : io_buffer.push_back((*vec)(node->dof_number(sys_num, var, comp)));
1460 : }
1461 :
1462 : // Then write the element DOF values
1463 0 : for (const auto & elem : ordered_elements)
1464 0 : for (auto comp : make_range(elem->n_comp(sys_num,var)))
1465 : {
1466 0 : libmesh_assert_not_equal_to (elem->dof_number(sys_num, var, comp),
1467 : DofObject::invalid_id);
1468 :
1469 0 : io_buffer.push_back((*vec)(elem->dof_number(sys_num, var, comp)));
1470 : }
1471 : }
1472 :
1473 : // Finally, write the SCALAR data on the last processor
1474 0 : for (auto var : make_range(this->n_vars()))
1475 0 : if (this->variable(var).type().family == SCALAR)
1476 : {
1477 0 : if (this->processor_id() == (this->n_processors()-1))
1478 : {
1479 0 : const DofMap & dof_map = this->get_dof_map();
1480 0 : std::vector<dof_id_type> SCALAR_dofs;
1481 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
1482 :
1483 0 : for (auto dof : SCALAR_dofs)
1484 0 : io_buffer.push_back((*vec)(dof));
1485 : }
1486 : }
1487 :
1488 : // 10.)
1489 : //
1490 : // Actually write the reordered additional vector
1491 : // for this system to disk
1492 :
1493 : // set up the comment
1494 : {
1495 0 : comment = "# System \"";
1496 0 : comment += this->name();
1497 0 : comment += "\" Additional Vector \"";
1498 0 : comment += vec_name;
1499 0 : comment += "\"";
1500 : }
1501 :
1502 0 : io.data (io_buffer, comment);
1503 :
1504 : // total_written_size += io_buffer.size();
1505 : }
1506 : }
1507 :
1508 : // const Real
1509 : // dt = pl.get_elapsed_time(),
1510 : // rate = total_written_size*sizeof(Number)/dt;
1511 :
1512 : // libMesh::err << "Write " << total_written_size << " \"Number\" values\n"
1513 : // << " Elapsed time = " << dt << '\n'
1514 : // << " Rate = " << rate/1.e6 << "(MB/sec)\n\n";
1515 :
1516 : // pl.pop("write_parallel_data");
1517 0 : }
1518 :
1519 :
1520 :
1521 8415 : void System::write_serialized_data (Xdr & io,
1522 : const bool write_additional_data) const
1523 : {
1524 : /**
1525 : * This method implements the output of the vectors
1526 : * contained in this System object, embedded in the
1527 : * output of an EquationSystems<T_sys>.
1528 : *
1529 : * 9.) The global solution vector, re-ordered to be node-major
1530 : * (More on this later.)
1531 : *
1532 : * for each additional vector in the object
1533 : *
1534 : * 10.) The global additional vector, re-ordered to be
1535 : * node-major (More on this later.)
1536 : */
1537 244 : parallel_object_only();
1538 488 : std::string comment;
1539 :
1540 : // PerfLog pl("IO Performance",false);
1541 : // pl.push("write_serialized_data");
1542 : // std::size_t total_written_size = 0;
1543 :
1544 : // total_written_size +=
1545 8415 : this->write_serialized_vector(io, *this->solution);
1546 :
1547 : // set up the comment
1548 8659 : if (this->processor_id() == 0)
1549 : {
1550 122 : comment = "# System \"";
1551 122 : comment += this->name();
1552 122 : comment += "\" Solution Vector";
1553 :
1554 1331 : io.comment (comment);
1555 : }
1556 :
1557 : // Only write additional vectors if wanted
1558 8415 : if (write_additional_data)
1559 : {
1560 62160 : for (auto & pair : this->_vectors)
1561 : {
1562 : // total_written_size +=
1563 54040 : this->write_serialized_vector(io, *pair.second);
1564 :
1565 : // set up the comment
1566 55584 : if (this->processor_id() == 0)
1567 : {
1568 772 : comment = "# System \"";
1569 772 : comment += this->name();
1570 772 : comment += "\" Additional Vector \"";
1571 772 : comment += pair.first;
1572 772 : comment += "\"";
1573 8492 : io.comment (comment);
1574 : }
1575 : }
1576 : }
1577 :
1578 : // const Real
1579 : // dt = pl.get_elapsed_time(),
1580 : // rate = total_written_size*sizeof(Number)/dt;
1581 :
1582 : // libMesh::out << "Write " << total_written_size << " \"Number\" values\n"
1583 : // << " Elapsed time = " << dt << '\n'
1584 : // << " Rate = " << rate/1.e6 << "(MB/sec)\n\n";
1585 :
1586 : // pl.pop("write_serialized_data");
1587 :
1588 :
1589 :
1590 :
1591 : // // test the new method
1592 : // {
1593 : // std::vector<std::string> names;
1594 : // std::vector<NumericVector<Number> *> vectors_to_write;
1595 :
1596 : // names.push_back("Solution Vector");
1597 : // vectors_to_write.push_back(this->solution.get());
1598 :
1599 : // // Only write additional vectors if wanted
1600 : // if (write_additional_data)
1601 : // {
1602 : // std::map<std::string, NumericVector<Number> *>::const_iterator
1603 : // pos = _vectors.begin();
1604 :
1605 : // for (; pos != this->_vectors.end(); ++pos)
1606 : // {
1607 : // names.push_back("Additional Vector " + pos->first);
1608 : // vectors_to_write.push_back(pos->second);
1609 : // }
1610 : // }
1611 :
1612 : // total_written_size =
1613 : // this->write_serialized_vectors (io, names, vectors_to_write);
1614 :
1615 : // const Real
1616 : // dt2 = pl.get_elapsed_time(),
1617 : // rate2 = total_written_size*sizeof(Number)/(dt2-dt);
1618 :
1619 : // libMesh::out << "Write (new) " << total_written_size << " \"Number\" values\n"
1620 : // << " Elapsed time = " << (dt2-dt) << '\n'
1621 : // << " Rate = " << rate2/1.e6 << "(MB/sec)\n\n";
1622 :
1623 : // }
1624 8415 : }
1625 :
1626 :
1627 :
1628 : template <typename iterator_type>
1629 125478 : std::size_t System::write_serialized_blocked_dof_objects (const std::vector<const NumericVector<Number> *> & vecs,
1630 : const dof_id_type n_objs,
1631 : const iterator_type begin,
1632 : const iterator_type end,
1633 : Xdr & io,
1634 : const unsigned int var_to_write) const
1635 : {
1636 3592 : parallel_object_only();
1637 :
1638 : //-------------------------------------------------------
1639 : // General order: (IO format 0.7.4 & greater)
1640 : //
1641 : // for (objects ...)
1642 : // for (vecs ....)
1643 : // for (vars ....)
1644 : // for (comps ...)
1645 : //
1646 : // where objects are nodes or elements, sorted to be
1647 : // partition independent,
1648 : // vecs are one or more *identically distributed* solution
1649 : // coefficient vectors, vars are one or more variables
1650 : // to write, and comps are all the components for said
1651 : // vars on the object.
1652 :
1653 : // We will write all variables unless requested otherwise.
1654 129070 : std::vector<unsigned int> vars_to_write(1, var_to_write);
1655 :
1656 125478 : if (var_to_write == libMesh::invalid_uint)
1657 : {
1658 125478 : vars_to_write.clear(); /**/ vars_to_write.reserve(this->n_vars());
1659 251808 : for (auto var : make_range(this->n_vars()))
1660 126330 : vars_to_write.push_back(var);
1661 : }
1662 :
1663 : const dof_id_type io_blksize = cast_int<dof_id_type>
1664 247364 : (std::min(max_io_blksize, static_cast<std::size_t>(n_objs)));
1665 :
1666 : const unsigned int
1667 7184 : sys_num = this->number(),
1668 7184 : num_vecs = cast_int<unsigned int>(vecs.size()),
1669 125478 : num_blks = cast_int<unsigned int>(std::ceil(static_cast<double>(n_objs)/
1670 118294 : static_cast<double>(io_blksize)));
1671 :
1672 : // libMesh::out << "io_blksize = " << io_blksize
1673 : // << ", num_objects = " << n_objs
1674 : // << ", num_blks = " << num_blks
1675 : // << std::endl;
1676 :
1677 125478 : std::size_t written_length=0; // The numer of values written. This will be returned
1678 132662 : std::vector<std::vector<dof_id_type>> xfer_ids(num_blks); // The global IDs and # of components for the local objects in all blocks
1679 132662 : std::vector<std::vector<Number>> send_vals(num_blks); // The raw values for the local objects in all blocks
1680 : std::vector<Parallel::Request>
1681 136254 : id_requests(num_blks), val_requests(num_blks); // send request handle for each block
1682 : std::vector<Parallel::MessageTag>
1683 136254 : id_tags(num_blks), val_tags(num_blks); // tag number for each block
1684 :
1685 : // ------------------------------------------------------
1686 : // First pass - count the number of objects in each block
1687 : // traverse all the objects and figure out which block they
1688 : // will ultimately live in.
1689 : std::vector<unsigned int>
1690 129070 : xfer_ids_size (num_blks,0),
1691 125478 : send_vals_size (num_blks,0);
1692 :
1693 33631687 : for (iterator_type it=begin; it!=end; ++it)
1694 : {
1695 : const dof_id_type
1696 33506209 : id = (*it)->id(),
1697 33506209 : block = id/io_blksize;
1698 :
1699 3047225 : libmesh_assert_less (block, num_blks);
1700 :
1701 36553434 : xfer_ids_size[block] += 2; // for each object, we store its id, as well as the total number of components for all variables
1702 :
1703 3047225 : unsigned int n_comp_tot=0;
1704 :
1705 67179242 : for (const auto & var : vars_to_write)
1706 33673033 : n_comp_tot += (*it)->n_comp(sys_num, var); // for each variable, we will store the nonzero components
1707 :
1708 36553434 : send_vals_size[block] += n_comp_tot*num_vecs;
1709 : }
1710 :
1711 : //-----------------------------------------
1712 : // Collect the values for all local objects,
1713 : // binning them into 'blocks' that will be
1714 : // sent to processor 0
1715 250956 : for (unsigned int blk=0; blk<num_blks; blk++)
1716 : {
1717 : // libMesh::out << "Writing object block " << blk << std::endl;
1718 :
1719 : // Each processor should build up its transfer buffers for its
1720 : // local objects in [first_object,last_object).
1721 : const dof_id_type
1722 125478 : first_object = blk*io_blksize,
1723 125478 : last_object = std::min(cast_int<dof_id_type>((blk+1)*io_blksize), n_objs);
1724 :
1725 : // convenience
1726 7204 : std::vector<dof_id_type> & ids (xfer_ids[blk]);
1727 7184 : std::vector<Number> & vals (send_vals[blk]);
1728 :
1729 : // we now know the number of values we will store for each block,
1730 : // so we can do efficient preallocation
1731 129070 : ids.clear(); /**/ ids.reserve (xfer_ids_size[blk]);
1732 129070 : vals.clear(); /**/ vals.reserve (send_vals_size[blk]);
1733 :
1734 129070 : if (send_vals_size[blk] != 0) // only send if we have nonzero components to write
1735 26751537 : for (iterator_type it=begin; it!=end; ++it)
1736 28045253 : if (((*it)->id() >= first_object) && // object in [first_object,last_object)
1737 14690513 : ((*it)->id() < last_object))
1738 : {
1739 14690513 : ids.push_back((*it)->id());
1740 :
1741 : // count the total number of nonzeros transferred for this object
1742 : {
1743 1335773 : unsigned int n_comp_tot=0;
1744 :
1745 29533450 : for (const auto & var : vars_to_write)
1746 14842937 : n_comp_tot += (*it)->n_comp(sys_num, var);
1747 :
1748 14690513 : ids.push_back (n_comp_tot*num_vecs); // even if 0 - processor 0 has no way of knowing otherwise...
1749 : }
1750 :
1751 : // pack the values to send
1752 31175438 : for (const auto & vec : vecs)
1753 35235714 : for (const auto & var : vars_to_write)
1754 : {
1755 18750789 : const unsigned int n_comp = (*it)->n_comp(sys_num, var);
1756 :
1757 34809222 : for (unsigned int comp=0; comp<n_comp; comp++)
1758 : {
1759 1451134 : libmesh_assert_greater_equal ((*it)->dof_number(sys_num, var, comp), vec->first_local_index());
1760 1451134 : libmesh_assert_less ((*it)->dof_number(sys_num, var, comp), vec->last_local_index());
1761 16173952 : vals.push_back((*vec)((*it)->dof_number(sys_num, var, comp)));
1762 : }
1763 : }
1764 : }
1765 :
1766 : #ifdef LIBMESH_HAVE_MPI
1767 125478 : id_tags[blk] = this->comm().get_unique_tag(100*num_blks + blk);
1768 125478 : val_tags[blk] = this->comm().get_unique_tag(200*num_blks + blk);
1769 :
1770 : // nonblocking send the data for this block
1771 129070 : this->comm().send (0, ids, id_requests[blk], id_tags[blk]);
1772 129070 : this->comm().send (0, vals, val_requests[blk], val_tags[blk]);
1773 : #endif
1774 : }
1775 :
1776 :
1777 129070 : if (this->processor_id() == 0)
1778 : {
1779 23334 : std::vector<std::vector<dof_id_type>> recv_ids (this->n_processors());
1780 25130 : std::vector<std::vector<Number>> recv_vals (this->n_processors());
1781 3592 : std::vector<unsigned int> obj_val_offsets; // map to traverse entry-wise rather than processor-wise
1782 3592 : std::vector<Number> output_vals; // The output buffer for the current block
1783 :
1784 : // a ThreadedIO object to perform asynchronous file IO
1785 1796 : ThreadedIO<Number> threaded_io(io, output_vals);
1786 17946 : std::unique_ptr<Threads::Thread> async_io;
1787 :
1788 39484 : for (unsigned int blk=0; blk<num_blks; blk++)
1789 : {
1790 : // Each processor should build up its transfer buffers for its
1791 : // local objects in [first_object,last_object).
1792 : const dof_id_type
1793 19742 : first_object = cast_int<dof_id_type>(blk*io_blksize),
1794 19742 : last_object = std::min(cast_int<dof_id_type>((blk+1)*io_blksize), n_objs),
1795 19742 : n_objects_blk = last_object - first_object;
1796 :
1797 : // offset array. this will define where each object's values
1798 : // map into the actual output_vals buffer. this must get
1799 : // 0-initialized because 0-component objects are not actually sent
1800 19742 : obj_val_offsets.resize (n_objects_blk); /**/ std::fill (obj_val_offsets.begin(), obj_val_offsets.end(), 0);
1801 :
1802 1796 : std::size_t n_val_recvd_blk=0;
1803 :
1804 : // receive this block of data from all processors.
1805 145220 : for (processor_id_type comm_step=0, tnp=this->n_processors(); comm_step != tnp; ++comm_step)
1806 : {
1807 : #ifdef LIBMESH_HAVE_MPI
1808 : // blocking receive indices for this block, imposing no particular order on processor
1809 129070 : Parallel::Status id_status (this->comm().probe (Parallel::any_source, id_tags[blk]));
1810 125478 : std::vector<dof_id_type> & ids (recv_ids[id_status.source()]);
1811 129070 : this->comm().receive (id_status.source(), ids, id_tags[blk]);
1812 : #else
1813 : std::vector<dof_id_type> & ids (recv_ids[0]);
1814 : ids = xfer_ids[blk];
1815 : #endif
1816 :
1817 : // note its possible we didn't receive values for objects in
1818 : // this block if they have no components allocated.
1819 14815991 : for (std::size_t idx=0, sz=ids.size(); idx<sz; idx+=2)
1820 : {
1821 : const dof_id_type
1822 14690513 : local_idx = ids[idx+0]-first_object,
1823 14690513 : n_vals_tot_allvecs = ids[idx+1];
1824 :
1825 1335773 : libmesh_assert_less (local_idx, n_objects_blk);
1826 1335773 : libmesh_assert_less (local_idx, obj_val_offsets.size());
1827 :
1828 16026286 : obj_val_offsets[local_idx] = n_vals_tot_allvecs;
1829 : }
1830 :
1831 : #ifdef LIBMESH_HAVE_MPI
1832 : // blocking receive values for this block, imposing no particular order on processor
1833 129070 : Parallel::Status val_status (this->comm().probe (Parallel::any_source, val_tags[blk]));
1834 125478 : std::vector<Number> & vals (recv_vals[val_status.source()]);
1835 129070 : this->comm().receive (val_status.source(), vals, val_tags[blk]);
1836 : #else
1837 : // straight copy without MPI
1838 : std::vector<Number> & vals (recv_vals[0]);
1839 : vals = send_vals[blk];
1840 : #endif
1841 :
1842 129070 : n_val_recvd_blk += vals.size();
1843 : }
1844 :
1845 : // We need the offsets into the output_vals vector for each object.
1846 : // fortunately, this is simply the partial sum of the total number
1847 : // of components for each object
1848 17946 : std::partial_sum(obj_val_offsets.begin(), obj_val_offsets.end(),
1849 : obj_val_offsets.begin());
1850 :
1851 : // wait on any previous asynchronous IO - this *must* complete before
1852 : // we start messing with the output_vals buffer!
1853 19742 : if (async_io.get()) async_io->join();
1854 :
1855 : // this is the actual output buffer that will be written to disk.
1856 : // at ths point we finally know wha size it will be.
1857 19742 : output_vals.resize(n_val_recvd_blk);
1858 :
1859 : // pack data from all processors into output values
1860 145220 : for (auto proc : make_range(this->n_processors()))
1861 : {
1862 125478 : const std::vector<dof_id_type> & ids (recv_ids [proc]);
1863 7184 : const std::vector<Number> & vals(recv_vals[proc]);
1864 7184 : std::vector<Number>::const_iterator proc_vals(vals.begin());
1865 :
1866 14815991 : for (std::size_t idx=0, sz=ids.size(); idx<sz; idx+=2)
1867 : {
1868 : const dof_id_type
1869 14690513 : local_idx = ids[idx+0]-first_object,
1870 16026286 : n_vals_tot_allvecs = ids[idx+1];
1871 :
1872 : // put this object's data into the proper location
1873 : // in the output buffer
1874 13354740 : std::vector<Number>::iterator out_vals(output_vals.begin());
1875 14690513 : if (local_idx != 0)
1876 16015504 : std::advance (out_vals, obj_val_offsets[local_idx-1]);
1877 :
1878 30748946 : for (unsigned int val=0; val<n_vals_tot_allvecs; val++, ++out_vals, ++proc_vals)
1879 : {
1880 1451134 : libmesh_assert (out_vals != output_vals.end());
1881 1451134 : libmesh_assert (proc_vals != vals.end());
1882 16058433 : *out_vals = *proc_vals;
1883 : }
1884 : }
1885 : }
1886 :
1887 : // output_vals buffer is now filled for this block.
1888 : // write it to disk
1889 35892 : async_io = std::make_unique<Threads::Thread>(threaded_io);
1890 21538 : written_length += output_vals.size();
1891 : }
1892 :
1893 : // wait on any previous asynchronous IO - this *must* complete before
1894 : // our stuff goes out of scope
1895 19742 : async_io->join();
1896 32300 : }
1897 :
1898 125478 : Parallel::wait(id_requests);
1899 125478 : Parallel::wait(val_requests);
1900 :
1901 : // we need some synchronization here. Because this method
1902 : // can be called for a range of nodes, then a range of elements,
1903 : // we need some mechanism to prevent processors from racing past
1904 : // to the next range and overtaking ongoing communication. one
1905 : // approach would be to figure out unique tags for each range,
1906 : // but for now we just impose a barrier here. And might as
1907 : // well have it do some useful work.
1908 125478 : this->comm().broadcast(written_length);
1909 :
1910 250956 : return written_length;
1911 118294 : }
1912 :
1913 :
1914 :
1915 0 : unsigned int System::write_SCALAR_dofs (const NumericVector<Number> & vec,
1916 : const unsigned int var,
1917 : Xdr & io) const
1918 : {
1919 0 : unsigned int written_length=0;
1920 0 : std::vector<Number> vals; // The raw values for the local objects in the current block
1921 : // Collect the SCALARs for the current variable
1922 0 : if (this->processor_id() == (this->n_processors()-1))
1923 : {
1924 0 : const DofMap & dof_map = this->get_dof_map();
1925 0 : std::vector<dof_id_type> SCALAR_dofs;
1926 0 : dof_map.SCALAR_dof_indices(SCALAR_dofs, var);
1927 : const unsigned int n_scalar_dofs = cast_int<unsigned int>
1928 0 : (SCALAR_dofs.size());
1929 :
1930 0 : for (unsigned int i=0; i<n_scalar_dofs; i++)
1931 : {
1932 0 : vals.push_back( vec(SCALAR_dofs[i]) );
1933 : }
1934 : }
1935 :
1936 : #ifdef LIBMESH_HAVE_MPI
1937 0 : if (this->n_processors() > 1)
1938 : {
1939 : const Parallel::MessageTag val_tag =
1940 0 : this->comm().get_unique_tag(1);
1941 :
1942 : // Post the receive on processor 0
1943 0 : if (this->processor_id() == 0)
1944 : {
1945 0 : this->comm().receive(this->n_processors()-1, vals, val_tag);
1946 : }
1947 :
1948 : // Send the data to processor 0
1949 0 : if (this->processor_id() == (this->n_processors()-1))
1950 : {
1951 0 : this->comm().send(0, vals, val_tag);
1952 : }
1953 0 : }
1954 : #endif
1955 :
1956 : // -------------------------------------------------------
1957 : // Write the output on processor 0.
1958 0 : if (this->processor_id() == 0)
1959 : {
1960 : const unsigned int vals_size =
1961 0 : cast_int<unsigned int>(vals.size());
1962 0 : io.data_stream (vals.data(), vals_size);
1963 0 : written_length += vals_size;
1964 : }
1965 :
1966 0 : return written_length;
1967 : }
1968 :
1969 :
1970 :
1971 62455 : dof_id_type System::write_serialized_vector (Xdr & io,
1972 : const NumericVector<Number> & vec) const
1973 : {
1974 1788 : parallel_object_only();
1975 :
1976 1788 : libmesh_assert (io.writing());
1977 :
1978 62455 : dof_id_type vec_length = vec.size();
1979 65137 : if (this->processor_id() == 0) io.data (vec_length, "# vector length");
1980 :
1981 1788 : dof_id_type written_length = 0;
1982 :
1983 : //---------------------------------
1984 : // Collect the values for all nodes
1985 1788 : written_length += cast_int<dof_id_type>
1986 123122 : (this->write_serialized_blocked_dof_objects (std::vector<const NumericVector<Number> *>(1,&vec),
1987 62455 : this->get_mesh().n_nodes(),
1988 124910 : this->get_mesh().local_nodes_begin(),
1989 123122 : this->get_mesh().local_nodes_end(),
1990 : io));
1991 :
1992 : //------------------------------------
1993 : // Collect the values for all elements
1994 62455 : written_length += cast_int<dof_id_type>
1995 123122 : (this->write_serialized_blocked_dof_objects (std::vector<const NumericVector<Number> *>(1,&vec),
1996 62455 : this->get_mesh().n_elem(),
1997 124910 : this->get_mesh().local_elements_begin(),
1998 64243 : this->get_mesh().local_elements_end(),
1999 : io));
2000 :
2001 : //-------------------------------------------
2002 : // Finally loop over all the SCALAR variables
2003 125052 : for (auto var : make_range(this->n_vars()))
2004 62597 : if (this->variable(var).type().family == SCALAR)
2005 : {
2006 0 : written_length +=
2007 0 : this->write_SCALAR_dofs (vec, var, io);
2008 : }
2009 :
2010 1788 : if (this->processor_id() == 0)
2011 894 : libmesh_assert_equal_to (written_length, vec_length);
2012 :
2013 62455 : return written_length;
2014 : }
2015 :
2016 :
2017 : template <typename InValType>
2018 284 : std::size_t System::read_serialized_vectors (Xdr & io,
2019 : const std::vector<NumericVector<Number> *> & vectors) const
2020 : {
2021 8 : parallel_object_only();
2022 :
2023 : // Error checking
2024 : // #ifndef NDEBUG
2025 : // // In parallel we better be reading a parallel vector -- if not
2026 : // // we will not set all of its components below!!
2027 : // if (this->n_processors() > 1)
2028 : // {
2029 : // libmesh_assert (vec.type() == PARALLEL ||
2030 : // vec.type() == GHOSTED);
2031 : // }
2032 : // #endif
2033 :
2034 8 : libmesh_assert (io.reading());
2035 :
2036 292 : if (this->processor_id() == 0)
2037 : {
2038 : // sizes
2039 48 : unsigned int num_vecs=0;
2040 48 : dof_id_type vector_length=0;
2041 :
2042 : // Get the number of vectors
2043 48 : io.data(num_vecs);
2044 : // Get the buffer size
2045 48 : io.data(vector_length);
2046 :
2047 52 : libmesh_error_msg_if
2048 : (num_vecs != vectors.size(),
2049 : "Xdr file header declares " << num_vecs << " vectors, but we were asked to read " << vectors.size());
2050 :
2051 48 : if (num_vecs != 0)
2052 : {
2053 48 : libmesh_error_msg_if (vectors[0] == nullptr, "vectors[0] should not be null");
2054 48 : libmesh_error_msg_if (vectors[0]->size() != vector_length, "Inconsistent vector sizes");
2055 : }
2056 : }
2057 :
2058 : // no need to actually communicate these.
2059 : // this->comm().broadcast(num_vecs);
2060 : // this->comm().broadcast(vector_length);
2061 :
2062 : // Cache these - they are not free!
2063 : const dof_id_type
2064 284 : n_nodes = this->get_mesh().n_nodes(),
2065 284 : n_elem = this->get_mesh().n_elem();
2066 :
2067 8 : std::size_t read_length = 0;
2068 :
2069 : //---------------------------------
2070 : // Collect the values for all nodes
2071 24 : read_length +=
2072 536 : this->read_serialized_blocked_dof_objects (n_nodes,
2073 284 : this->get_mesh().local_nodes_begin(),
2074 284 : this->get_mesh().local_nodes_end(),
2075 : InValType(),
2076 : io,
2077 : vectors);
2078 :
2079 : //------------------------------------
2080 : // Collect the values for all elements
2081 284 : read_length +=
2082 536 : this->read_serialized_blocked_dof_objects (n_elem,
2083 284 : this->get_mesh().local_elements_begin(),
2084 284 : this->get_mesh().local_elements_end(),
2085 : InValType(),
2086 : io,
2087 : vectors);
2088 :
2089 : //-------------------------------------------
2090 : // Finally loop over all the SCALAR variables
2091 5240 : for (NumericVector<Number> * vec : vectors)
2092 14172 : for (auto var : make_range(this->n_vars()))
2093 9216 : if (this->variable(var).type().family == SCALAR)
2094 : {
2095 0 : libmesh_assert_not_equal_to (vec, 0);
2096 :
2097 0 : read_length +=
2098 0 : this->read_SCALAR_dofs (var, io, vec);
2099 : }
2100 :
2101 : //---------------------------------------
2102 : // last step - must close all the vectors
2103 5240 : for (NumericVector<Number> * vec : vectors)
2104 : {
2105 140 : libmesh_assert_not_equal_to (vec, 0);
2106 4956 : vec->close();
2107 : }
2108 :
2109 284 : return read_length;
2110 : }
2111 :
2112 :
2113 :
2114 284 : std::size_t System::write_serialized_vectors (Xdr & io,
2115 : const std::vector<const NumericVector<Number> *> & vectors) const
2116 : {
2117 8 : parallel_object_only();
2118 :
2119 8 : libmesh_assert (io.writing());
2120 :
2121 : // Cache these - they are not free!
2122 : const dof_id_type
2123 284 : n_nodes = this->get_mesh().n_nodes(),
2124 284 : n_elem = this->get_mesh().n_elem();
2125 :
2126 8 : std::size_t written_length = 0;
2127 :
2128 292 : if (this->processor_id() == 0)
2129 : {
2130 : unsigned int
2131 48 : n_vec = cast_int<unsigned int>(vectors.size());
2132 : dof_id_type
2133 48 : vec_size = vectors.empty() ? 0 : vectors[0]->size();
2134 : // Set the number of vectors
2135 48 : io.data(n_vec, "# number of vectors");
2136 : // Set the buffer size
2137 48 : io.data(vec_size, "# vector length");
2138 : }
2139 :
2140 : //---------------------------------
2141 : // Collect the values for all nodes
2142 8 : written_length +=
2143 284 : this->write_serialized_blocked_dof_objects (vectors,
2144 : n_nodes,
2145 568 : this->get_mesh().local_nodes_begin(),
2146 560 : this->get_mesh().local_nodes_end(),
2147 : io);
2148 :
2149 : //------------------------------------
2150 : // Collect the values for all elements
2151 284 : written_length +=
2152 284 : this->write_serialized_blocked_dof_objects (vectors,
2153 : n_elem,
2154 568 : this->get_mesh().local_elements_begin(),
2155 560 : this->get_mesh().local_elements_end(),
2156 : io);
2157 :
2158 : //-------------------------------------------
2159 : // Finally loop over all the SCALAR variables
2160 5240 : for (const NumericVector<Number> * vec : vectors)
2161 14172 : for (auto var : make_range(this->n_vars()))
2162 9216 : if (this->variable(var).type().family == SCALAR)
2163 : {
2164 0 : libmesh_assert_not_equal_to (vec, 0);
2165 :
2166 0 : written_length +=
2167 0 : this->write_SCALAR_dofs (*vec, var, io);
2168 : }
2169 :
2170 284 : return written_length;
2171 : }
2172 :
2173 :
2174 :
2175 :
2176 : template LIBMESH_EXPORT void System::read_parallel_data<Number> (Xdr & io, const bool read_additional_data);
2177 : template LIBMESH_EXPORT void System::read_serialized_data<Number> (Xdr & io, const bool read_additional_data);
2178 : template LIBMESH_EXPORT numeric_index_type System::read_serialized_vector<Number> (Xdr & io, NumericVector<Number> * vec);
2179 : template LIBMESH_EXPORT std::size_t System::read_serialized_vectors<Number> (Xdr & io, const std::vector<NumericVector<Number> *> & vectors) const;
2180 : #ifdef LIBMESH_USE_COMPLEX_NUMBERS
2181 : template LIBMESH_EXPORT void System::read_parallel_data<Real> (Xdr & io, const bool read_additional_data);
2182 : template LIBMESH_EXPORT void System::read_serialized_data<Real> (Xdr & io, const bool read_additional_data);
2183 : template LIBMESH_EXPORT numeric_index_type System::read_serialized_vector<Real> (Xdr & io, NumericVector<Number> * vec);
2184 : template LIBMESH_EXPORT std::size_t System::read_serialized_vectors<Real> (Xdr & io, const std::vector<NumericVector<Number> *> & vectors) const;
2185 : #endif
2186 :
2187 : } // namespace libMesh
|