Line data Source code
1 : // The libMesh Finite Element Library.
2 : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
3 :
4 : // This library is free software; you can redistribute it and/or
5 : // modify it under the terms of the GNU Lesser General Public
6 : // License as published by the Free Software Foundation; either
7 : // version 2.1 of the License, or (at your option) any later version.
8 :
9 : // This library is distributed in the hope that it will be useful,
10 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 : // Lesser General Public License for more details.
13 :
14 : // You should have received a copy of the GNU Lesser General Public
15 : // License along with this library; if not, write to the Free Software
16 : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 :
18 :
19 : // Local Includes
20 : #include "libmesh/dense_matrix.h"
21 : #include "libmesh/dense_vector.h"
22 : #include "libmesh/int_range.h"
23 :
24 : #if (LIBMESH_HAVE_PETSC)
25 : # include "libmesh/petsc_macro.h"
26 : # include "libmesh/ignore_warnings.h"
27 : # include <petscblaslapack.h>
28 : # include "libmesh/restore_warnings.h"
29 : #endif
30 :
31 : namespace libMesh
32 : {
33 :
34 : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
35 :
36 : template<typename T>
37 5545974 : void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T> & other,
38 : _BLAS_Multiply_Flag flag)
39 : {
40 272480 : int result_size = 0;
41 :
42 : // For each case, determine the size of the final result make sure
43 : // that the inner dimensions match
44 5545974 : switch (flag)
45 : {
46 0 : case LEFT_MULTIPLY:
47 : {
48 0 : result_size = other.m() * this->n();
49 0 : if (other.n() == this->m())
50 0 : break;
51 : }
52 : libmesh_fallthrough();
53 : case RIGHT_MULTIPLY:
54 : {
55 2773009 : result_size = other.n() * this->m();
56 2773009 : if (other.m() == this->n())
57 136242 : break;
58 : }
59 : libmesh_fallthrough();
60 : case LEFT_MULTIPLY_TRANSPOSE:
61 : {
62 1298405 : result_size = other.n() * this->n();
63 1298405 : if (other.m() == this->m())
64 136238 : break;
65 : }
66 : libmesh_fallthrough();
67 : case RIGHT_MULTIPLY_TRANSPOSE:
68 : {
69 1474560 : result_size = other.m() * this->m();
70 1474560 : if (other.n() == this->n())
71 0 : break;
72 : }
73 : libmesh_fallthrough();
74 : default:
75 0 : libmesh_error_msg("Unknown flag selected or matrices are incompatible for multiplication.");
76 : }
77 :
78 : // For this to work, the passed arg. must actually be a DenseMatrix<T>
79 272480 : const DenseMatrix<T> * const_that = cast_ptr<const DenseMatrix<T> *>(&other);
80 :
81 : // Also, although 'that' is logically const in this BLAS routine,
82 : // the PETSc BLAS interface does not specify that any of the inputs are
83 : // const. To use it, I must cast away const-ness.
84 272480 : DenseMatrix<T> * that = const_cast<DenseMatrix<T> *> (const_that);
85 :
86 : // Initialize A, B pointers for LEFT_MULTIPLY* cases
87 272480 : DenseMatrix<T> * A = this;
88 272480 : DenseMatrix<T> * B = that;
89 :
90 : // For RIGHT_MULTIPLY* cases, swap the meaning of A and B.
91 : // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished:
92 : // pass A B -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A "lt multiply"
93 : // pass B A -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B "rt multiply"
94 : // pass A B^T -> (Fortran) -> A^T B -> (C++) -> (A^T B)^T -> (identity) -> B^T A "lt multiply t"
95 : // pass B^T A -> (Fortran) -> B A^T -> (C++) -> (B A^T)^T -> (identity) -> A B^T "rt multiply t"
96 5545974 : if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE)
97 136242 : std::swap(A,B);
98 :
99 : // transa, transb values to pass to blas
100 : char
101 5545974 : transa[] = "n",
102 5545974 : transb[] = "n";
103 :
104 : // Integer values to pass to BLAS:
105 : //
106 : // M
107 : // In Fortran, the number of rows of op(A),
108 : // In the BLAS documentation, typically known as 'M'.
109 : //
110 : // In C/C++, we set:
111 : // M = n_cols(A) if (transa='n')
112 : // n_rows(A) if (transa='t')
113 5545974 : PetscBLASInt M = static_cast<PetscBLASInt>( A->n() );
114 :
115 : // N
116 : // In Fortran, the number of cols of op(B), and also the number of cols of C.
117 : // In the BLAS documentation, typically known as 'N'.
118 : //
119 : // In C/C++, we set:
120 : // N = n_rows(B) if (transb='n')
121 : // n_cols(B) if (transb='t')
122 5545974 : PetscBLASInt N = static_cast<PetscBLASInt>( B->m() );
123 :
124 : // K
125 : // In Fortran, the number of cols of op(A), and also
126 : // the number of rows of op(B). In the BLAS documentation,
127 : // typically known as 'K'.
128 : //
129 : // In C/C++, we set:
130 : // K = n_rows(A) if (transa='n')
131 : // n_cols(A) if (transa='t')
132 5545974 : PetscBLASInt K = static_cast<PetscBLASInt>( A->m() );
133 :
134 : // LDA (leading dimension of A). In our cases,
135 : // LDA is always the number of columns of A.
136 5545974 : PetscBLASInt LDA = static_cast<PetscBLASInt>( A->n() );
137 :
138 : // LDB (leading dimension of B). In our cases,
139 : // LDB is always the number of columns of B.
140 5545974 : PetscBLASInt LDB = static_cast<PetscBLASInt>( B->n() );
141 :
142 5545974 : if (flag == LEFT_MULTIPLY_TRANSPOSE)
143 : {
144 1298405 : transb[0] = 't';
145 1298405 : N = static_cast<PetscBLASInt>( B->n() );
146 : }
147 :
148 4247569 : else if (flag == RIGHT_MULTIPLY_TRANSPOSE)
149 : {
150 1474560 : transa[0] = 't';
151 0 : std::swap(M,K);
152 : }
153 :
154 : // LDC (leading dimension of C). LDC is the
155 : // number of columns in the solution matrix.
156 5545974 : PetscBLASInt LDC = M;
157 :
158 : // Scalar values to pass to BLAS
159 : //
160 : // scalar multiplying the whole product AB
161 5545974 : T alpha = 1.;
162 :
163 : // scalar multiplying C, which is the original matrix.
164 5545974 : T beta = 0.;
165 :
166 : // Storage for the result
167 5818454 : std::vector<T> result (result_size);
168 :
169 : // Finally ready to call the BLAS
170 6090930 : BLASgemm_(transa, transb, &M, &N, &K, pPS(&alpha),
171 544956 : pPS(A->get_values().data()), &LDA,
172 544956 : pPS(B->get_values().data()), &LDB, pPS(&beta),
173 : pPS(result.data()), &LDC);
174 :
175 : // Update the relevant dimension for this matrix.
176 5545974 : switch (flag)
177 : {
178 0 : case LEFT_MULTIPLY: { this->_m = other.m(); break; }
179 2773009 : case RIGHT_MULTIPLY: { this->_n = other.n(); break; }
180 1298405 : case LEFT_MULTIPLY_TRANSPOSE: { this->_m = other.n(); break; }
181 1474560 : case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; }
182 0 : default:
183 0 : libmesh_error_msg("Unknown flag selected.");
184 : }
185 :
186 : // Swap my data vector with the result
187 272480 : this->_val.swap(result);
188 5545974 : }
189 :
190 : #else
191 :
192 : template<typename T>
193 0 : void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T> &,
194 : _BLAS_Multiply_Flag)
195 : {
196 0 : libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
197 : }
198 :
199 : #endif
200 :
201 :
202 :
203 :
204 :
205 :
206 :
207 : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
208 :
209 : template<typename T>
210 758098 : void DenseMatrix<T>::_lu_decompose_lapack ()
211 : {
212 : // If this function was called, there better not be any
213 : // previous decomposition of the matrix.
214 60509 : libmesh_assert_equal_to (this->_decomposition_type, NONE);
215 :
216 : // The calling sequence for dgetrf is:
217 : // dgetrf(M, N, A, lda, ipiv, info)
218 :
219 : // M (input)
220 : // The number of rows of the matrix A. M >= 0.
221 : // In C/C++, pass the number of *cols* of A
222 758098 : PetscBLASInt M = this->n();
223 :
224 : // N (input)
225 : // The number of columns of the matrix A. N >= 0.
226 : // In C/C++, pass the number of *rows* of A
227 758098 : PetscBLASInt N = this->m();
228 :
229 : // A (input/output) double precision array, dimension (LDA,N)
230 : // On entry, the M-by-N matrix to be factored.
231 : // On exit, the factors L and U from the factorization
232 : // A = P*L*U; the unit diagonal elements of L are not stored.
233 :
234 : // LDA (input)
235 : // The leading dimension of the array A. LDA >= max(1,M).
236 758098 : PetscBLASInt LDA = M;
237 :
238 : // ipiv (output) integer array, dimension (min(m,n))
239 : // The pivot indices; for 1 <= i <= min(m,n), row i of the
240 : // matrix was interchanged with row IPIV(i).
241 : // Here, we pass _pivots.data(), a private class member used to store pivots
242 758098 : this->_pivots.resize( std::min(M,N) );
243 :
244 : // info (output)
245 : // = 0: successful exit
246 : // < 0: if INFO = -i, the i-th argument had an illegal value
247 : // > 0: if INFO = i, U(i,i) is exactly zero. The factorization
248 : // has been completed, but the factor U is exactly
249 : // singular, and division by zero will occur if it is used
250 : // to solve a system of equations.
251 758098 : PetscBLASInt INFO = 0;
252 :
253 : // Ready to call the actual factorization routine through PETSc's interface
254 758098 : LAPACKgetrf_(&M, &N, pPS(this->_val.data()), &LDA, _pivots.data(),
255 : &INFO);
256 :
257 : // Check return value for errors
258 758098 : libmesh_error_msg_if(INFO != 0, "INFO=" << INFO << ", Error during Lapack LU factorization!");
259 :
260 : // Set the flag for LU decomposition
261 758098 : this->_decomposition_type = LU_BLAS_LAPACK;
262 758098 : }
263 :
264 : #else
265 :
266 : template<typename T>
267 0 : void DenseMatrix<T>::_lu_decompose_lapack ()
268 : {
269 0 : libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
270 : }
271 :
272 : #endif
273 :
274 :
275 :
276 : template<typename T>
277 0 : void DenseMatrix<T>::_svd_lapack (DenseVector<Real> & sigma)
278 : {
279 : // The calling sequence for dgetrf is:
280 : // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
281 :
282 : // JOBU (input)
283 : // Specifies options for computing all or part of the matrix U:
284 : // = 'A': all M columns of U are returned in array U:
285 : // = 'S': the first min(m,n) columns of U (the left singular
286 : // vectors) are returned in the array U;
287 : // = 'O': the first min(m,n) columns of U (the left singular
288 : // vectors) are overwritten on the array A;
289 : // = 'N': no columns of U (no left singular vectors) are
290 : // computed.
291 0 : char JOBU = 'N';
292 :
293 : // JOBVT (input)
294 : // Specifies options for computing all or part of the matrix
295 : // V**T:
296 : // = 'A': all N rows of V**T are returned in the array VT;
297 : // = 'S': the first min(m,n) rows of V**T (the right singular
298 : // vectors) are returned in the array VT;
299 : // = 'O': the first min(m,n) rows of V**T (the right singular
300 : // vectors) are overwritten on the array A;
301 : // = 'N': no rows of V**T (no right singular vectors) are
302 : // computed.
303 0 : char JOBVT = 'N';
304 :
305 0 : std::vector<Real> sigma_val;
306 0 : std::vector<Number> U_val;
307 0 : std::vector<Number> VT_val;
308 :
309 0 : _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val);
310 :
311 : // Copy the singular values into sigma, ignore U_val and VT_val
312 0 : sigma.resize(cast_int<unsigned int>(sigma_val.size()));
313 0 : for (auto i : make_range(sigma.size()))
314 0 : sigma(i) = sigma_val[i];
315 0 : }
316 :
317 : template<typename T>
318 1470 : void DenseMatrix<T>::_svd_lapack (DenseVector<Real> & sigma,
319 : DenseMatrix<Number> & U,
320 : DenseMatrix<Number> & VT)
321 : {
322 : // The calling sequence for dgetrf is:
323 : // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
324 :
325 : // JOBU (input)
326 : // Specifies options for computing all or part of the matrix U:
327 : // = 'A': all M columns of U are returned in array U:
328 : // = 'S': the first min(m,n) columns of U (the left singular
329 : // vectors) are returned in the array U;
330 : // = 'O': the first min(m,n) columns of U (the left singular
331 : // vectors) are overwritten on the array A;
332 : // = 'N': no columns of U (no left singular vectors) are
333 : // computed.
334 42 : char JOBU = 'S';
335 :
336 : // JOBVT (input)
337 : // Specifies options for computing all or part of the matrix
338 : // V**T:
339 : // = 'A': all N rows of V**T are returned in the array VT;
340 : // = 'S': the first min(m,n) rows of V**T (the right singular
341 : // vectors) are returned in the array VT;
342 : // = 'O': the first min(m,n) rows of V**T (the right singular
343 : // vectors) are overwritten on the array A;
344 : // = 'N': no rows of V**T (no right singular vectors) are
345 : // computed.
346 42 : char JOBVT = 'S';
347 :
348 : // Note: Lapack is going to compute the singular values of A^T. If
349 : // A=U * S * V^T, then A^T = V * S * U^T, which means that the
350 : // values returned in the "U_val" array actually correspond to the
351 : // entries of the V matrix, and the values returned in the VT_val
352 : // array actually correspond to the entries of U^T. Therefore, we
353 : // pass VT in the place of U and U in the place of VT below!
354 84 : std::vector<Real> sigma_val;
355 1470 : int M = this->n();
356 1470 : int N = this->m();
357 84 : int min_MN = (M < N) ? M : N;
358 :
359 : // Size user-provided storage appropriately. Inside svd_helper:
360 : // U_val is sized to (M x min_MN)
361 : // VT_val is sized to (min_MN x N)
362 : // So, we set up U to have the shape of "VT_val^T", and VT to
363 : // have the shape of "U_val^T".
364 : //
365 : // Finally, since the results are stored in column-major order by
366 : // Lapack, but we actually want the transpose of what Lapack
367 : // returns, this means (conveniently) that we don't even have to
368 : // copy anything after the call to _svd_helper, it should already be
369 : // in the correct order!
370 1470 : U.resize(N, min_MN);
371 1428 : VT.resize(min_MN, M);
372 :
373 1470 : _svd_helper(JOBU, JOBVT, sigma_val, VT.get_values(), U.get_values());
374 :
375 : // Copy the singular values into sigma.
376 1470 : sigma.resize(cast_int<unsigned int>(sigma_val.size()));
377 143010 : for (auto i : make_range(sigma.size()))
378 149628 : sigma(i) = sigma_val[i];
379 1470 : }
380 :
381 : #if (LIBMESH_HAVE_PETSC)
382 :
383 : template<typename T>
384 1470 : void DenseMatrix<T>::_svd_helper (char JOBU,
385 : char JOBVT,
386 : std::vector<Real> & sigma_val,
387 : std::vector<Number> & U_val,
388 : std::vector<Number> & VT_val)
389 : {
390 :
391 : // M (input)
392 : // The number of rows of the matrix A. M >= 0.
393 : // In C/C++, pass the number of *cols* of A
394 1470 : PetscBLASInt M = this->n();
395 :
396 : // N (input)
397 : // The number of columns of the matrix A. N >= 0.
398 : // In C/C++, pass the number of *rows* of A
399 1470 : PetscBLASInt N = this->m();
400 :
401 84 : PetscBLASInt min_MN = (M < N) ? M : N;
402 84 : PetscBLASInt max_MN = (M > N) ? M : N;
403 :
404 : // A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
405 : // On entry, the M-by-N matrix A.
406 : // On exit,
407 : // if JOBU = 'O', A is overwritten with the first min(m,n)
408 : // columns of U (the left singular vectors,
409 : // stored columnwise);
410 : // if JOBVT = 'O', A is overwritten with the first min(m,n)
411 : // rows of V**T (the right singular vectors,
412 : // stored rowwise);
413 : // if JOBU != 'O' and JOBVT != 'O', the contents of A are destroyed.
414 :
415 : // LDA (input)
416 : // The leading dimension of the array A. LDA >= max(1,M).
417 1470 : PetscBLASInt LDA = M;
418 :
419 : // S (output) DOUBLE PRECISION array, dimension (min(M,N))
420 : // The singular values of A, sorted so that S(i) >= S(i+1).
421 1470 : sigma_val.resize( min_MN );
422 :
423 : // LDU (input)
424 : // The leading dimension of the array U. LDU >= 1; if
425 : // JOBU = 'S' or 'A', LDU >= M.
426 1470 : PetscBLASInt LDU = M;
427 :
428 : // U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
429 : // (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
430 : // If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
431 : // if JOBU = 'S', U contains the first min(m,n) columns of U
432 : // (the left singular vectors, stored columnwise);
433 : // if JOBU = 'N' or 'O', U is not referenced.
434 1470 : if (JOBU == 'S')
435 1470 : U_val.resize( LDU*min_MN );
436 : else
437 0 : U_val.resize( LDU*M );
438 :
439 : // LDVT (input)
440 : // The leading dimension of the array VT. LDVT >= 1; if
441 : // JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
442 1470 : PetscBLASInt LDVT = N;
443 1470 : if (JOBVT == 'S')
444 1470 : LDVT = min_MN;
445 :
446 : // VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
447 : // If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
448 : // V**T;
449 : // if JOBVT = 'S', VT contains the first min(m,n) rows of
450 : // V**T (the right singular vectors, stored rowwise);
451 : // if JOBVT = 'N' or 'O', VT is not referenced.
452 1470 : VT_val.resize( LDVT*N );
453 :
454 : // LWORK (input)
455 : // The dimension of the array WORK.
456 : // LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
457 : // For good performance, LWORK should generally be larger.
458 : //
459 : // If LWORK = -1, then a workspace query is assumed; the routine
460 : // only calculates the optimal size of the WORK array, returns
461 : // this value as the first entry of the WORK array, and no error
462 : // message related to LWORK is issued by XERBLA.
463 1470 : PetscBLASInt larger = (3*min_MN+max_MN > 5*min_MN) ? 3*min_MN+max_MN : 5*min_MN;
464 1470 : PetscBLASInt LWORK = (larger > 1) ? larger : 1;
465 :
466 :
467 : // WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
468 : // On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
469 : // if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
470 : // superdiagonal elements of an upper bidiagonal matrix B
471 : // whose diagonal is in S (not necessarily sorted). B
472 : // satisfies A = U * B * VT, so it has the same singular values
473 : // as A, and singular vectors related by U and VT.
474 1512 : std::vector<Number> WORK( LWORK );
475 :
476 : // INFO (output)
477 : // = 0: successful exit.
478 : // < 0: if INFO = -i, the i-th argument had an illegal value.
479 : // > 0: if DBDSQR did not converge, INFO specifies how many
480 : // superdiagonals of an intermediate bidiagonal form B
481 : // did not converge to zero. See the description of WORK
482 : // above for details.
483 1470 : PetscBLASInt INFO = 0;
484 :
485 : // Ready to call the actual factorization routine through PETSc's interface.
486 : #ifdef LIBMESH_USE_REAL_NUMBERS
487 : // Note that the call to LAPACKgesvd_ may modify _val
488 1470 : LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, pPS(_val.data()), &LDA,
489 : pPS(sigma_val.data()), pPS(U_val.data()), &LDU,
490 : pPS(VT_val.data()), &LDVT, pPS(WORK.data()), &LWORK,
491 : &INFO);
492 : #else
493 : // When we have LIBMESH_USE_COMPLEX_NUMBERS then we must pass an array of Complex
494 : // numbers to LAPACKgesvd_, but _val may contain Reals so we copy to Number below to
495 : // handle both the real-valued and complex-valued cases.
496 : std::vector<Number> val_copy(_val.size());
497 : for (auto i : make_range(_val.size()))
498 : val_copy[i] = _val[i];
499 :
500 : std::vector<Real> RWORK(5 * min_MN);
501 : LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, val_copy.data(), &LDA, sigma_val.data(), U_val.data(),
502 : &LDU, VT_val.data(), &LDVT, WORK.data(), &LWORK, RWORK.data(), &INFO);
503 : #endif
504 :
505 : // Check return value for errors
506 1470 : libmesh_error_msg_if(INFO != 0, "INFO=" << INFO << ", Error during Lapack SVD calculation!");
507 1470 : }
508 :
509 :
510 : #else
511 :
512 : template<typename T>
513 0 : void DenseMatrix<T>::_svd_helper (char,
514 : char,
515 : std::vector<Real> &,
516 : std::vector<Number> &,
517 : std::vector<Number> &)
518 : {
519 0 : libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
520 : }
521 :
522 : #endif
523 :
524 :
525 :
526 : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
527 :
528 : template<typename T>
529 5654 : void DenseMatrix<T>::_svd_solve_lapack(const DenseVector<T> & rhs,
530 : DenseVector<T> & x,
531 : Real rcond) const
532 : {
533 : // Since BLAS is expecting column-major storage, we first need to
534 : // make a transposed copy of *this, then pass it to the gelss
535 : // routine instead of the original. This extra copy is kind of a
536 : // bummer, it might be better if we could use the full SVD to
537 : // compute the least-squares solution instead... Note that it isn't
538 : // completely terrible either, since A_trans gets overwritten by
539 : // Lapack, and we usually would end up making a copy of A outside
540 : // the function call anyway.
541 6682 : DenseMatrix<T> A_trans;
542 5654 : this->get_transpose(A_trans);
543 :
544 : // M
545 : // The number of rows of the input matrix. M >= 0.
546 : // This is actually the number of *columns* of A_trans.
547 5654 : PetscBLASInt M = A_trans.n();
548 :
549 : // N
550 : // The number of columns of the matrix A. N >= 0.
551 : // This is actually the number of *rows* of A_trans.
552 5654 : PetscBLASInt N = A_trans.m();
553 :
554 : // We'll use the min and max of (M,N) several times below.
555 5654 : PetscBLASInt max_MN = std::max(M,N);
556 5654 : PetscBLASInt min_MN = std::min(M,N);
557 :
558 : // NRHS
559 : // The number of right hand sides, i.e., the number of columns
560 : // of the matrices B and X. NRHS >= 0.
561 : // This could later be generalized to solve for multiple right-hand
562 : // sides...
563 5654 : PetscBLASInt NRHS = 1;
564 :
565 : // A is double precision array, dimension (LDA,N)
566 : // On entry, the M-by-N matrix A.
567 : // On exit, the first min(m,n) rows of A are overwritten with
568 : // its right singular vectors, stored rowwise.
569 : //
570 : // The data vector that will be passed to Lapack.
571 514 : std::vector<T> & A_trans_vals = A_trans.get_values();
572 :
573 : // LDA
574 : // The leading dimension of the array A. LDA >= max(1,M).
575 5654 : PetscBLASInt LDA = M;
576 :
577 : // B is double precision array, dimension (LDB,NRHS)
578 : // On entry, the M-by-NRHS right hand side matrix B.
579 : // On exit, B is overwritten by the N-by-NRHS solution
580 : // matrix X. If m >= n and RANK = n, the residual
581 : // sum-of-squares for the solution in the i-th column is given
582 : // by the sum of squares of elements n+1:m in that column.
583 : //
584 : // Since we don't want the user's rhs vector to be overwritten by
585 : // the solution, we copy the rhs values into the solution vector "x"
586 : // now. x needs to be long enough to hold both the (Nx1) solution
587 : // vector or the (Mx1) rhs, so size it to the max of those.
588 5654 : x.resize(max_MN);
589 35332 : for (auto i : make_range(rhs.size()))
590 32376 : x(i) = rhs(i);
591 :
592 : // Make the syntax below simpler by grabbing a reference to this array.
593 514 : std::vector<T> & B = x.get_values();
594 :
595 : // LDB
596 : // The leading dimension of the array B. LDB >= max(1,max(M,N)).
597 5654 : PetscBLASInt LDB = x.size();
598 :
599 : // S is double precision array, dimension (min(M,N))
600 : // The singular values of A in decreasing order.
601 : // The condition number of A in the 2-norm = S(1)/S(min(m,n)).
602 6168 : std::vector<T> S(min_MN);
603 :
604 : // RCOND
605 : // Used to determine the effective rank of A. Singular values
606 : // S(i) <= RCOND*S(1) are treated as zero. If RCOND < 0, machine
607 : // precision is used instead.
608 5654 : PetscScalar RCOND = PS(rcond);
609 :
610 : // RANK
611 : // The effective rank of A, i.e., the number of singular values
612 : // which are greater than RCOND*S(1).
613 5654 : PetscBLASInt RANK = 0;
614 :
615 : // LWORK
616 : // The dimension of the array WORK. LWORK >= 1, and also:
617 : // LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
618 : // For good performance, LWORK should generally be larger.
619 : //
620 : // If LWORK = -1, then a workspace query is assumed; the routine
621 : // only calculates the optimal size of the WORK array, returns
622 : // this value as the first entry of the WORK array, and no error
623 : // message related to LWORK is issued by XERBLA.
624 : //
625 : // The factor of 3/2 is arbitrary and is used to satisfy the "should
626 : // generally be larger" clause.
627 5654 : PetscBLASInt LWORK = (3*min_MN + std::max(2*min_MN, std::max(max_MN, NRHS))) * 3/2;
628 :
629 : // WORK is double precision array, dimension (MAX(1,LWORK))
630 : // On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
631 6168 : std::vector<T> WORK(LWORK);
632 :
633 : // INFO
634 : // = 0: successful exit
635 : // < 0: if INFO = -i, the i-th argument had an illegal value.
636 : // > 0: the algorithm for computing the SVD failed to converge;
637 : // if INFO = i, i off-diagonal elements of an intermediate
638 : // bidiagonal form did not converge to zero.
639 5654 : PetscBLASInt INFO = 0;
640 :
641 : // LAPACKgelss_(const PetscBLASInt *, // M
642 : // const PetscBLASInt *, // N
643 : // const PetscBLASInt *, // NRHS
644 : // PetscScalar *, // A
645 : // const PetscBLASInt *, // LDA
646 : // PetscScalar *, // B
647 : // const PetscBLASInt *, // LDB
648 : // PetscReal *, // S(out) = singular values of A in increasing order
649 : // const PetscReal *, // RCOND = tolerance for singular values
650 : // PetscBLASInt *, // RANK(out) = number of "non-zero" singular values
651 : // PetscScalar *, // WORK
652 : // const PetscBLASInt *, // LWORK
653 : // PetscBLASInt *); // INFO
654 5654 : LAPACKgelss_(&M, &N, &NRHS, pPS(A_trans_vals.data()), &LDA,
655 : pPS(B.data()), &LDB, pPS(S.data()), &RCOND, &RANK,
656 : pPS(WORK.data()), &LWORK, &INFO);
657 :
658 : // Check for errors in the Lapack call
659 5654 : libmesh_error_msg_if(INFO < 0, "Error, argument " << -INFO << " to LAPACKgelss_ had an illegal value.");
660 5654 : libmesh_error_msg_if(INFO > 0, "The algorithm for computing the SVD failed to converge!");
661 :
662 : // Debugging: print singular values and information about condition number:
663 : // libMesh::err << "RCOND=" << RCOND << std::endl;
664 : // libMesh::err << "Singular values: " << std::endl;
665 : // for (auto i : index_range(S))
666 : // libMesh::err << S[i] << std::endl;
667 : // libMesh::err << "The condition number of A is approximately: " << S[0]/S.back() << std::endl;
668 :
669 : // Lapack has already written the solution into B, but it will be
670 : // the wrong size for non-square problems, so we need to resize it
671 : // correctly. The size of the solution vector should be the number
672 : // of columns of the original A matrix. Unfortunately, resizing a
673 : // DenseVector currently also zeros it out (unlike a std::vector) so
674 : // we'll resize the underlying storage directly (the size is not
675 : // stored independently elsewhere).
676 5654 : x.get_values().resize(this->n());
677 5654 : }
678 :
679 : #else
680 :
681 : template<typename T>
682 0 : void DenseMatrix<T>::_svd_solve_lapack(const DenseVector<T>& /*rhs*/,
683 : DenseVector<T> & /*x*/,
684 : Real /*rcond*/) const
685 : {
686 0 : libmesh_not_implemented();
687 : }
688 : #endif // (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
689 :
690 :
691 :
692 : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
693 :
694 : template<typename T>
695 140 : void DenseMatrix<T>::_evd_lapack (DenseVector<T> & lambda_real,
696 : DenseVector<T> & lambda_imag,
697 : DenseMatrix<T> * VL,
698 : DenseMatrix<T> * VR)
699 : {
700 : // This algorithm only works for square matrices, so verify this up front.
701 140 : libmesh_error_msg_if(this->m() != this->n(), "Can only compute eigen-decompositions for square matrices.");
702 :
703 : // If the user requests left or right eigenvectors, we have to make
704 : // sure and pass the transpose of this matrix to Lapack, otherwise
705 : // it will compute the inverse transpose of what we are
706 : // after... since we know the matrix is square, we can just swap
707 : // entries in place. If the user does not request eigenvectors, we
708 : // can skip this extra step, since the eigenvalues for A and A^T are
709 : // the same.
710 140 : if (VL || VR)
711 : {
712 560 : for (auto i : make_range(this->_m))
713 840 : for (auto j : make_range(i))
714 12 : std::swap((*this)(i,j), (*this)(j,i));
715 : }
716 :
717 : // The calling sequence for dgeev is:
718 : // DGEEV (character JOBVL,
719 : // character JOBVR,
720 : // integer N,
721 : // double precision, dimension( lda, * ) A,
722 : // integer LDA,
723 : // double precision, dimension( * ) WR,
724 : // double precision, dimension( * ) WI,
725 : // double precision, dimension( ldvl, * ) VL,
726 : // integer LDVL,
727 : // double precision, dimension( ldvr, * ) VR,
728 : // integer LDVR,
729 : // double precision, dimension( * ) WORK,
730 : // integer LWORK,
731 : // integer INFO)
732 :
733 : // JOBVL (input)
734 : // = 'N': left eigenvectors of A are not computed;
735 : // = 'V': left eigenvectors of A are computed.
736 140 : char JOBVL = VL ? 'V' : 'N';
737 :
738 : // JOBVR (input)
739 : // = 'N': right eigenvectors of A are not computed;
740 : // = 'V': right eigenvectors of A are computed.
741 140 : char JOBVR = VR ? 'V' : 'N';
742 :
743 : // N (input)
744 : // The number of rows/cols of the matrix A. N >= 0.
745 140 : PetscBLASInt N = this->m();
746 :
747 : // A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
748 : // On entry, the N-by-N matrix A.
749 : // On exit, A has been overwritten.
750 :
751 : // LDA (input)
752 : // The leading dimension of the array A. LDA >= max(1,N).
753 140 : PetscBLASInt LDA = N;
754 :
755 : // WR (output) double precision array, dimension (N)
756 : // WI (output) double precision array, dimension (N)
757 : // WR and WI contain the real and imaginary parts,
758 : // respectively, of the computed eigenvalues. Complex
759 : // conjugate pairs of eigenvalues appear consecutively
760 : // with the eigenvalue having the positive imaginary part
761 : // first.
762 136 : lambda_real.resize(N);
763 140 : lambda_imag.resize(N);
764 :
765 : // VL (output) double precision array, dimension (LDVL,N)
766 : // If JOBVL = 'V', the left eigenvectors u(j) are stored one
767 : // after another in the columns of VL, in the same order
768 : // as their eigenvalues.
769 : // If JOBVL = 'N', VL is not referenced.
770 : // If the j-th eigenvalue is real, then u(j) = VL(:,j),
771 : // the j-th column of VL.
772 : // If the j-th and (j+1)-st eigenvalues form a complex
773 : // conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
774 : // u(j+1) = VL(:,j) - i*VL(:,j+1).
775 : // Will be set below if needed.
776 :
777 : // LDVL (input)
778 : // The leading dimension of the array VL. LDVL >= 1; if
779 : // JOBVL = 'V', LDVL >= N.
780 140 : PetscBLASInt LDVL = VL ? N : 1;
781 :
782 : // VR (output) DOUBLE PRECISION array, dimension (LDVR,N)
783 : // If JOBVR = 'V', the right eigenvectors v(j) are stored one
784 : // after another in the columns of VR, in the same order
785 : // as their eigenvalues.
786 : // If JOBVR = 'N', VR is not referenced.
787 : // If the j-th eigenvalue is real, then v(j) = VR(:,j),
788 : // the j-th column of VR.
789 : // If the j-th and (j+1)-st eigenvalues form a complex
790 : // conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
791 : // v(j+1) = VR(:,j) - i*VR(:,j+1).
792 : // Will be set below if needed.
793 :
794 : // LDVR (input)
795 : // The leading dimension of the array VR. LDVR >= 1; if
796 : // JOBVR = 'V', LDVR >= N.
797 140 : PetscBLASInt LDVR = VR ? N : 1;
798 :
799 : // WORK (workspace/output) double precision array, dimension (MAX(1,LWORK))
800 : // On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
801 : //
802 : // LWORK (input)
803 : // The dimension of the array WORK. LWORK >= max(1,3*N), and
804 : // if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good
805 : // performance, LWORK must generally be larger.
806 : //
807 : // If LWORK = -1, then a workspace query is assumed; the routine
808 : // only calculates the optimal size of the WORK array, returns
809 : // this value as the first entry of the WORK array, and no error
810 : // message related to LWORK is issued by XERBLA.
811 140 : PetscBLASInt LWORK = (VR || VL) ? 4*N : 3*N;
812 144 : std::vector<T> WORK(LWORK);
813 :
814 : // INFO (output)
815 : // = 0: successful exit
816 : // < 0: if INFO = -i, the i-th argument had an illegal value.
817 : // > 0: if INFO = i, the QR algorithm failed to compute all the
818 : // eigenvalues, and no eigenvectors or condition numbers
819 : // have been computed; elements 1:ILO-1 and i+1:N of WR
820 : // and WI contain eigenvalues which have converged.
821 140 : PetscBLASInt INFO = 0;
822 :
823 : // Get references to raw data
824 4 : std::vector<T> & lambda_real_val = lambda_real.get_values();
825 4 : std::vector<T> & lambda_imag_val = lambda_imag.get_values();
826 :
827 : // Set up eigenvector storage if necessary.
828 4 : T * VR_ptr = nullptr;
829 140 : if (VR)
830 : {
831 140 : VR->resize(N, N);
832 4 : VR_ptr = VR->get_values().data();
833 : }
834 :
835 4 : T * VL_ptr = nullptr;
836 140 : if (VL)
837 : {
838 140 : VL->resize(N, N);
839 4 : VL_ptr = VL->get_values().data();
840 : }
841 :
842 : // Ready to call the Lapack routine through PETSc's interface
843 140 : LAPACKgeev_(&JOBVL,
844 : &JOBVR,
845 : &N,
846 : pPS(_val.data()),
847 : &LDA,
848 : pPS(lambda_real_val.data()),
849 : pPS(lambda_imag_val.data()),
850 : pPS(VL_ptr),
851 : &LDVL,
852 : pPS(VR_ptr),
853 : &LDVR,
854 : pPS(WORK.data()),
855 : &LWORK,
856 : &INFO);
857 :
858 : // Check return value for errors
859 140 : libmesh_error_msg_if(INFO != 0, "INFO=" << INFO << ", Error during Lapack eigenvalue calculation!");
860 :
861 : // If the user requested either right or left eigenvectors, LAPACK
862 : // has now computed the transpose of the desired matrix, i.e. V^T
863 : // instead of V. We could leave this up to user code to handle, but
864 : // rather than risking getting very unexpected results, we'll just
865 : // transpose it in place before handing it back.
866 140 : if (VR)
867 : {
868 560 : for (auto i : make_range(N))
869 840 : for (auto j : make_range(i))
870 420 : std::swap((*VR)(i,j), (*VR)(j,i));
871 : }
872 :
873 140 : if (VL)
874 : {
875 560 : for (auto i : make_range(N))
876 840 : for (auto j : make_range(i))
877 420 : std::swap((*VL)(i,j), (*VL)(j,i));
878 : }
879 140 : }
880 :
881 : #else
882 :
883 : template<typename T>
884 0 : void DenseMatrix<T>::_evd_lapack (DenseVector<T> &,
885 : DenseVector<T> &,
886 : DenseMatrix<T> *,
887 : DenseMatrix<T> *)
888 : {
889 0 : libmesh_error_msg("_evd_lapack is currently only available when LIBMESH_USE_REAL_NUMBERS is defined!");
890 : }
891 :
892 : #endif
893 :
894 :
895 :
896 :
897 :
898 : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
899 :
900 : template<typename T>
901 2875030 : void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T> & b,
902 : DenseVector<T> & x)
903 : {
904 : // The calling sequence for getrs is:
905 : // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
906 :
907 : // trans (input)
908 : // 'n' for no transpose, 't' for transpose
909 2875030 : char TRANS[] = "t";
910 :
911 : // N (input)
912 : // The order of the matrix A. N >= 0.
913 2875030 : PetscBLASInt N = this->m();
914 :
915 :
916 : // NRHS (input)
917 : // The number of right hand sides, i.e., the number of columns
918 : // of the matrix B. NRHS >= 0.
919 2875030 : PetscBLASInt NRHS = 1;
920 :
921 : // A (input) double precision array, dimension (LDA,N)
922 : // The factors L and U from the factorization A = P*L*U
923 : // as computed by dgetrf.
924 :
925 : // LDA (input)
926 : // The leading dimension of the array A. LDA >= max(1,N).
927 2875030 : PetscBLASInt LDA = N;
928 :
929 : // ipiv (input) int array, dimension (N)
930 : // The pivot indices from DGETRF; for 1<=i<=N, row i of the
931 : // matrix was interchanged with row IPIV(i).
932 : // Here, we pass _pivots.data() which was computed in _lu_decompose_lapack
933 :
934 : // B (input/output) double precision array, dimension (LDB,NRHS)
935 : // On entry, the right hand side matrix B.
936 : // On exit, the solution matrix X.
937 : // Here, we pass a copy of the rhs vector's data array in x, so that the
938 : // passed right-hand side b is unmodified. I don't see a way around this
939 : // copy if we want to maintain an unmodified rhs in LibMesh.
940 253013 : x = b;
941 253013 : std::vector<T> & x_vec = x.get_values();
942 :
943 : // We can avoid the copy if we don't care about overwriting the RHS: just
944 : // pass b to the Lapack routine and then swap with x before exiting
945 : // std::vector<T> & x_vec = b.get_values();
946 :
947 : // LDB (input)
948 : // The leading dimension of the array B. LDB >= max(1,N).
949 2875030 : PetscBLASInt LDB = N;
950 :
951 : // INFO (output)
952 : // = 0: successful exit
953 : // < 0: if INFO = -i, the i-th argument had an illegal value
954 2875030 : PetscBLASInt INFO = 0;
955 :
956 : // Finally, ready to call the Lapack getrs function
957 2875030 : LAPACKgetrs_(TRANS, &N, &NRHS, pPS(_val.data()), &LDA,
958 : _pivots.data(), pPS(x_vec.data()), &LDB, &INFO);
959 :
960 : // Check return value for errors
961 2875030 : libmesh_error_msg_if(INFO != 0, "INFO=" << INFO << ", Error during Lapack LU solve!");
962 :
963 : // Don't do this if you already made a copy of b above
964 : // Swap b and x. The solution will then be in x, and whatever was originally
965 : // in x, maybe garbage, maybe nothing, will be in b.
966 : // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite
967 : // the input. This *should* make user code simpler, as they don't have to create
968 : // an extra vector just to pass it in to the solve function!
969 : // b.swap(x);
970 2875030 : }
971 :
972 : #else
973 :
974 : template<typename T>
975 0 : void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T> &,
976 : DenseVector<T> &)
977 : {
978 0 : libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
979 : }
980 :
981 : #endif
982 :
983 :
984 :
985 :
986 :
987 : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
988 :
989 : template<typename T>
990 28208966 : void DenseMatrix<T>::_matvec_blas(T alpha,
991 : T beta,
992 : DenseVector<T> & dest,
993 : const DenseVector<T> & arg,
994 : bool trans) const
995 : {
996 : // Ensure that dest and arg sizes are compatible
997 28208966 : if (!trans)
998 : {
999 : // dest ~ A * arg
1000 : // (mx1) (mxn) * (nx1)
1001 25633116 : libmesh_error_msg_if((dest.size() != this->m()) || (arg.size() != this->n()),
1002 : "Improper input argument sizes!");
1003 : }
1004 :
1005 : else // trans == true
1006 : {
1007 : // Ensure that dest and arg are proper size
1008 : // dest ~ A^T * arg
1009 : // (nx1) (nxm) * (mx1)
1010 2575850 : libmesh_error_msg_if((dest.size() != this->n()) || (arg.size() != this->m()),
1011 : "Improper input argument sizes!");
1012 : }
1013 :
1014 : // Calling sequence for dgemv:
1015 : //
1016 : // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
1017 :
1018 : // TRANS (input)
1019 : // 't' for transpose, 'n' for non-transpose multiply
1020 : // We store everything in row-major order, so pass the transpose flag for
1021 : // non-transposed matvecs and the 'n' flag for transposed matvecs
1022 28208966 : char TRANS[] = "t";
1023 28208966 : if (trans)
1024 2575850 : TRANS[0] = 'n';
1025 :
1026 : // M (input)
1027 : // On entry, M specifies the number of rows of the matrix A.
1028 : // In C/C++, pass the number of *cols* of A
1029 28208966 : PetscBLASInt M = this->n();
1030 :
1031 : // N (input)
1032 : // On entry, N specifies the number of columns of the matrix A.
1033 : // In C/C++, pass the number of *rows* of A
1034 28208966 : PetscBLASInt N = this->m();
1035 :
1036 : // ALPHA (input)
1037 : // The scalar constant passed to this function
1038 :
1039 : // A (input) double precision array of DIMENSION ( LDA, n ).
1040 : // Before entry, the leading m by n part of the array A must
1041 : // contain the matrix of coefficients.
1042 : // The matrix, *this. Note that _matvec_blas is called from
1043 : // a const function, vector_mult(), and so we have made this function const
1044 : // as well. Since BLAS knows nothing about const, we have to cast it away
1045 : // now.
1046 2532207 : DenseMatrix<T> & a_ref = const_cast<DenseMatrix<T> &> ( *this );
1047 2532207 : std::vector<T> & a = a_ref.get_values();
1048 :
1049 : // LDA (input)
1050 : // On entry, LDA specifies the first dimension of A as declared
1051 : // in the calling (sub) program. LDA must be at least
1052 : // max( 1, m ).
1053 28208966 : PetscBLASInt LDA = M;
1054 :
1055 : // X (input) double precision array of DIMENSION at least
1056 : // ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
1057 : // and at least
1058 : // ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
1059 : // Before entry, the incremented array X must contain the
1060 : // vector x.
1061 : // Here, we must cast away the const-ness of "arg" since BLAS knows
1062 : // nothing about const
1063 2532207 : DenseVector<T> & x_ref = const_cast<DenseVector<T> &> ( arg );
1064 2532207 : std::vector<T> & x = x_ref.get_values();
1065 :
1066 : // INCX (input)
1067 : // On entry, INCX specifies the increment for the elements of
1068 : // X. INCX must not be zero.
1069 28208966 : PetscBLASInt INCX = 1;
1070 :
1071 : // BETA (input)
1072 : // On entry, BETA specifies the scalar beta. When BETA is
1073 : // supplied as zero then Y need not be set on input.
1074 : // The second scalar constant passed to this function
1075 :
1076 : // Y (input) double precision array of DIMENSION at least
1077 : // ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
1078 : // and at least
1079 : // ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
1080 : // Before entry with BETA non-zero, the incremented array Y
1081 : // must contain the vector y. On exit, Y is overwritten by the
1082 : // updated vector y.
1083 : // The input vector "dest"
1084 2532207 : std::vector<T> & y = dest.get_values();
1085 :
1086 : // INCY (input)
1087 : // On entry, INCY specifies the increment for the elements of
1088 : // Y. INCY must not be zero.
1089 28208966 : PetscBLASInt INCY = 1;
1090 :
1091 : // Finally, ready to call the BLAS function
1092 33273172 : BLASgemv_(TRANS, &M, &N, pPS(&alpha), pPS(a.data()), &LDA,
1093 5064206 : pPS(x.data()), &INCX, pPS(&beta), pPS(y.data()), &INCY);
1094 28208966 : }
1095 :
1096 :
1097 : #else
1098 :
1099 :
1100 : template<typename T>
1101 0 : void DenseMatrix<T>::_matvec_blas(T,
1102 : T,
1103 : DenseVector<T> &,
1104 : const DenseVector<T> &,
1105 : bool) const
1106 : {
1107 0 : libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
1108 : }
1109 :
1110 :
1111 : #endif
1112 :
1113 :
1114 : //--------------------------------------------------------------
1115 : // Explicit instantiations
1116 : template LIBMESH_EXPORT void DenseMatrix<Real>::_multiply_blas(const DenseMatrixBase<Real> &, _BLAS_Multiply_Flag);
1117 : template LIBMESH_EXPORT void DenseMatrix<Real>::_lu_decompose_lapack();
1118 : template LIBMESH_EXPORT void DenseMatrix<Real>::_lu_back_substitute_lapack(const DenseVector<Real> &,
1119 : DenseVector<Real> &);
1120 : template LIBMESH_EXPORT void DenseMatrix<Real>::_matvec_blas(Real,
1121 : Real,
1122 : DenseVector<Real> &,
1123 : const DenseVector<Real> &,
1124 : bool) const;
1125 : template LIBMESH_EXPORT void DenseMatrix<Real>::_svd_lapack(DenseVector<Real> &);
1126 : template LIBMESH_EXPORT void DenseMatrix<Real>::_svd_lapack(DenseVector<Real> &,
1127 : DenseMatrix<Number> &,
1128 : DenseMatrix<Number> &);
1129 : template LIBMESH_EXPORT void DenseMatrix<Real>::_svd_helper (char,
1130 : char,
1131 : std::vector<Real> &,
1132 : std::vector<Number> &,
1133 : std::vector<Number> &);
1134 : template LIBMESH_EXPORT void DenseMatrix<Real>::_svd_solve_lapack (const DenseVector<Real> &, DenseVector<Real> &, Real) const;
1135 : template LIBMESH_EXPORT void DenseMatrix<Real>::_evd_lapack(DenseVector<Real> &,
1136 : DenseVector<Real> &,
1137 : DenseMatrix<Real> *,
1138 : DenseMatrix<Real> *);
1139 :
1140 : #if !(LIBMESH_USE_REAL_NUMBERS)
1141 : template LIBMESH_EXPORT void DenseMatrix<Number>::_multiply_blas(const DenseMatrixBase<Number> &, _BLAS_Multiply_Flag);
1142 : template LIBMESH_EXPORT void DenseMatrix<Number>::_lu_decompose_lapack();
1143 : template LIBMESH_EXPORT void DenseMatrix<Number>::_lu_back_substitute_lapack(const DenseVector<Number> &,
1144 : DenseVector<Number> &);
1145 : template LIBMESH_EXPORT void DenseMatrix<Number>::_matvec_blas(Number,
1146 : Number,
1147 : DenseVector<Number> &,
1148 : const DenseVector<Number> &,
1149 : bool) const;
1150 : template LIBMESH_EXPORT void DenseMatrix<Number>::_svd_lapack(DenseVector<Real> &);
1151 : template LIBMESH_EXPORT void DenseMatrix<Number>::_svd_lapack(DenseVector<Real> &,
1152 : DenseMatrix<Number> &,
1153 : DenseMatrix<Number> &);
1154 : template LIBMESH_EXPORT void DenseMatrix<Number>::_svd_helper (char,
1155 : char,
1156 : std::vector<Real> &,
1157 : std::vector<Number> &,
1158 : std::vector<Number> &);
1159 : template LIBMESH_EXPORT void DenseMatrix<Number>::_svd_solve_lapack (const DenseVector<Number> &, DenseVector<Number> &, Real) const;
1160 : template LIBMESH_EXPORT void DenseMatrix<Number>::_evd_lapack(DenseVector<Number> &,
1161 : DenseVector<Number> &,
1162 : DenseMatrix<Number> *,
1163 : DenseMatrix<Number> *);
1164 :
1165 : #endif
1166 :
1167 : } // namespace libMesh
|