Line data Source code
1 : /**********************************************************************/ 2 : /* DO NOT MODIFY THIS HEADER */ 3 : /* MAGPIE - Mesoscale Atomistic Glue Program for Integrated Execution */ 4 : /* */ 5 : /* Copyright 2017 Battelle Energy Alliance, LLC */ 6 : /* ALL RIGHTS RESERVED */ 7 : /**********************************************************************/ 8 : 9 : #include "MyTRIMElementRun.h" 10 : #include "MooseMesh.h" 11 : 12 : // libmesh includes 13 : #include "libmesh/quadrature.h" 14 : #include "libmesh/parallel_algebra.h" 15 : 16 : #include <queue> 17 : 18 : registerMooseObject("MagpieApp", MyTRIMElementRun); 19 : 20 : InputParameters 21 212 : MyTRIMElementRun::validParams() 22 : { 23 212 : InputParameters params = MyTRIMRunBase::validParams(); 24 212 : params.addClassDescription("Run a TRIM binary collision Monte Carlo simulation across the entire " 25 : "sample and gather the results for use with an element averaged " 26 : "source Kernel."); 27 212 : return params; 28 0 : } 29 : 30 106 : MyTRIMElementRun::MyTRIMElementRun(const InputParameters & parameters) 31 : : MyTRIMRunBase(parameters), 32 106 : _perf_trim(registerTimedSection("trim", 2)), 33 106 : _perf_finalize(registerTimedSection("finalize", 2)), 34 212 : _zero(_nvars) 35 : { 36 106 : } 37 : 38 : void 39 142 : MyTRIMElementRun::execute() 40 : { 41 : _result_map.clear(); 42 : 43 : // bail out early if no run is requested for this timestep 44 142 : if (!_rasterizer.executeThisTimestep()) 45 0 : return; 46 : 47 : // trigger the creation of a master PointLocator outside the threaded region 48 142 : _mesh.getPointLocator(); 49 : 50 : // build thread loop functor 51 142 : ThreadedRecoilElementAveragedLoop rl(_rasterizer, _mesh); 52 : 53 : // output the number of recoils being launched 54 142 : _console << "\nMyTRIM: Running " << _trim_parameters.scaled_npka << " recoils.\n" 55 142 : << "Recoil rate scaling factor is " << _trim_parameters.recoil_rate_scaling << "\n" 56 142 : << "Sampled number of recoils: " << _trim_parameters.original_npka << "\n" 57 142 : << "Result scaling factor: " << _trim_parameters.result_scaling_factor << std::endl; 58 : 59 : // run threads 60 : { 61 142 : TIME_SECTION(_perf_trim); 62 284 : Threads::parallel_reduce(PKARange(_pka_list.begin(), _pka_list.end()), rl); 63 : } 64 : 65 : // fetch the joined data from thread 0 66 : _result_map = rl.getResultMap(); 67 142 : } 68 : 69 : void 70 142 : MyTRIMElementRun::finalize() 71 : { 72 142 : TIME_SECTION(_perf_finalize); 73 : 74 142 : if (!_rasterizer.executeThisTimestep()) 75 : return; 76 : 77 : // for single processor runs we do not need to do anything here 78 142 : if (_app.n_processors() > 1) 79 : { 80 : // create send buffer 81 : std::string send_buffer; 82 : 83 : // create byte buffers for the streams received from all processors 84 : std::vector<std::string> recv_buffers; 85 : 86 : // pack the complex datastructures into the string stream 87 68 : serialize(send_buffer); 88 : 89 : // broadcast serialized data to and receive from all processors 90 68 : _communicator.allgather(send_buffer, recv_buffers); 91 : 92 : // unpack the received data and merge it into the local data structures 93 68 : deserialize(recv_buffers); 94 68 : } 95 : 96 : // scale result 97 156272 : for (auto & result : _result_map) 98 : { 99 156130 : result.second._energy *= _trim_parameters.result_scaling_factor; 100 : 101 359920 : for (auto k = beginIndex(result.second._defects); k < _nvars; ++k) 102 1018950 : for (std::size_t l = 0; l < ThreadedRecoilDiracSourceLoop::N_DEFECTS; ++l) 103 815160 : result.second._defects[k][l] *= _trim_parameters.result_scaling_factor; 104 : } 105 : } 106 : 107 : const MyTRIMElementRun::MyTRIMResult & 108 2379594 : MyTRIMElementRun::result(const Elem * elem) const 109 : { 110 2379594 : auto i = _result_map.find(elem->id()); 111 : 112 : // if no entry in the map was found no collision event happened in the element 113 2379594 : if (i == _result_map.end()) 114 268176 : return _zero; 115 : 116 2111418 : return i->second; 117 : } 118 : 119 : void 120 68 : MyTRIMElementRun::serialize(std::string & serialized_buffer) 121 : { 122 : // stream for serializing the _result_map structure to a byte stream 123 68 : std::ostringstream oss; 124 68 : dataStore(oss, _result_map, this); 125 : 126 : // Populate the passed in string pointer with the string stream's buffer contents 127 68 : serialized_buffer.assign(oss.str()); 128 68 : } 129 : 130 : void 131 68 : MyTRIMElementRun::deserialize(std::vector<std::string> & serialized_buffers) 132 : { 133 : mooseAssert(serialized_buffers.size() == _app.n_processors(), 134 : "Unexpected size of serialized_buffers: " << serialized_buffers.size()); 135 : 136 : // The input string stream used for deserialization 137 68 : std::istringstream iss; 138 : 139 : // Loop over all datastructures for all procerssors to perfrom the gather operation 140 204 : for (unsigned int rank = 0; rank < serialized_buffers.size(); ++rank) 141 : { 142 : // skip the current processor (its data is already in the structures) 143 136 : if (rank == processor_id()) 144 68 : continue; 145 : 146 : // populate the stream with a new buffer and reset stream state 147 : iss.str(serialized_buffers[rank]); 148 68 : iss.clear(); 149 : 150 : // Load the communicated data into temporary structures 151 : MyTRIMResultMap other_result_map; 152 68 : dataLoad(iss, other_result_map, this); 153 : 154 : // merge the data in with the current processor's data 155 53034 : for (auto & other_result : other_result_map) 156 : { 157 : auto j = _result_map.lower_bound(other_result.first); 158 : 159 : // if no entry in the map was found then set it, otherwise add value 160 52966 : if (j == _result_map.end() || j->first != other_result.first) 161 19426 : _result_map.emplace_hint(j, other_result); 162 : else 163 : { 164 : mooseAssert(other_result.second._defects.size() == j->second._defects.size(), 165 : "Inconsistent TRIM defect vector sizes across processors."); 166 : mooseAssert(j->second._defects.size() == _nvars, "Defect vector size must be _nvars."); 167 : 168 71036 : for (unsigned int k = 0; k < _nvars; ++k) 169 187480 : for (std::size_t l = 0; l < ThreadedRecoilDiracSourceLoop::N_DEFECTS; ++l) 170 149984 : j->second._defects[k][l] += other_result.second._defects[k][l]; 171 : 172 : // sum energy contributions 173 33540 : j->second._energy += other_result.second._energy; 174 : } 175 : } 176 : } 177 68 : }