LCOV - code coverage report
Current view: top level - src/numerics - dense_matrix_blas_lapack.C (source / functions) Hit Total Coverage
Test: libMesh/libmesh: #4229 (6a9aeb) with base 727f46 Lines: 184 220 83.6 %
Date: 2025-08-19 19:27:09 Functions: 8 20 40.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // The libMesh Finite Element Library.
       2             : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
       3             : 
       4             : // This library is free software; you can redistribute it and/or
       5             : // modify it under the terms of the GNU Lesser General Public
       6             : // License as published by the Free Software Foundation; either
       7             : // version 2.1 of the License, or (at your option) any later version.
       8             : 
       9             : // This library is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             : // Lesser General Public License for more details.
      13             : 
      14             : // You should have received a copy of the GNU Lesser General Public
      15             : // License along with this library; if not, write to the Free Software
      16             : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      17             : 
      18             : 
      19             : // Local Includes
      20             : #include "libmesh/dense_matrix.h"
      21             : #include "libmesh/dense_vector.h"
      22             : #include "libmesh/int_range.h"
      23             : 
      24             : #if (LIBMESH_HAVE_PETSC)
      25             : # include "libmesh/petsc_macro.h"
      26             : # include "libmesh/ignore_warnings.h"
      27             : # include <petscblaslapack.h>
      28             : # include "libmesh/restore_warnings.h"
      29             : #endif
      30             : 
      31             : namespace libMesh
      32             : {
      33             : 
      34             : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
      35             : 
      36             : template<typename T>
      37     5545974 : void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T> & other,
      38             :                                     _BLAS_Multiply_Flag flag)
      39             : {
      40      272480 :   int result_size = 0;
      41             : 
      42             :   // For each case, determine the size of the final result make sure
      43             :   // that the inner dimensions match
      44     5545974 :   switch (flag)
      45             :     {
      46           0 :     case LEFT_MULTIPLY:
      47             :       {
      48           0 :         result_size = other.m() * this->n();
      49           0 :         if (other.n() == this->m())
      50           0 :           break;
      51             :       }
      52             :       libmesh_fallthrough();
      53             :     case RIGHT_MULTIPLY:
      54             :       {
      55     2773009 :         result_size = other.n() * this->m();
      56     2773009 :         if (other.m() == this->n())
      57      136242 :           break;
      58             :       }
      59             :       libmesh_fallthrough();
      60             :     case LEFT_MULTIPLY_TRANSPOSE:
      61             :       {
      62     1298405 :         result_size = other.n() * this->n();
      63     1298405 :         if (other.m() == this->m())
      64      136238 :           break;
      65             :       }
      66             :       libmesh_fallthrough();
      67             :     case RIGHT_MULTIPLY_TRANSPOSE:
      68             :       {
      69     1474560 :         result_size = other.m() * this->m();
      70     1474560 :         if (other.n() == this->n())
      71           0 :           break;
      72             :       }
      73             :       libmesh_fallthrough();
      74             :     default:
      75           0 :       libmesh_error_msg("Unknown flag selected or matrices are incompatible for multiplication.");
      76             :     }
      77             : 
      78             :   // For this to work, the passed arg. must actually be a DenseMatrix<T>
      79      272480 :   const DenseMatrix<T> * const_that = cast_ptr<const DenseMatrix<T> *>(&other);
      80             : 
      81             :   // Also, although 'that' is logically const in this BLAS routine,
      82             :   // the PETSc BLAS interface does not specify that any of the inputs are
      83             :   // const.  To use it, I must cast away const-ness.
      84      272480 :   DenseMatrix<T> * that = const_cast<DenseMatrix<T> *> (const_that);
      85             : 
      86             :   // Initialize A, B pointers for LEFT_MULTIPLY* cases
      87      272480 :   DenseMatrix<T> * A = this;
      88      272480 :   DenseMatrix<T> * B = that;
      89             : 
      90             :   // For RIGHT_MULTIPLY* cases, swap the meaning of A and B.
      91             :   // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished:
      92             :   // pass A B   -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A   "lt multiply"
      93             :   // pass B A   -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B   "rt multiply"
      94             :   // pass A B^T -> (Fortran) -> A^T B   -> (C++) -> (A^T B)^T   -> (identity) -> B^T A "lt multiply t"
      95             :   // pass B^T A -> (Fortran) -> B A^T   -> (C++) -> (B A^T)^T   -> (identity) -> A B^T "rt multiply t"
      96     5545974 :   if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE)
      97      136242 :     std::swap(A,B);
      98             : 
      99             :   // transa, transb values to pass to blas
     100             :   char
     101     5545974 :     transa[] = "n",
     102     5545974 :     transb[] = "n";
     103             : 
     104             :   // Integer values to pass to BLAS:
     105             :   //
     106             :   // M
     107             :   // In Fortran, the number of rows of op(A),
     108             :   // In the BLAS documentation, typically known as 'M'.
     109             :   //
     110             :   // In C/C++, we set:
     111             :   // M = n_cols(A) if (transa='n')
     112             :   //     n_rows(A) if (transa='t')
     113     5545974 :   PetscBLASInt M = static_cast<PetscBLASInt>( A->n() );
     114             : 
     115             :   // N
     116             :   // In Fortran, the number of cols of op(B), and also the number of cols of C.
     117             :   // In the BLAS documentation, typically known as 'N'.
     118             :   //
     119             :   // In C/C++, we set:
     120             :   // N = n_rows(B) if (transb='n')
     121             :   //     n_cols(B) if (transb='t')
     122     5545974 :   PetscBLASInt N = static_cast<PetscBLASInt>( B->m() );
     123             : 
     124             :   // K
     125             :   // In Fortran, the number of cols of op(A), and also
     126             :   // the number of rows of op(B). In the BLAS documentation,
     127             :   // typically known as 'K'.
     128             :   //
     129             :   // In C/C++, we set:
     130             :   // K = n_rows(A) if (transa='n')
     131             :   //     n_cols(A) if (transa='t')
     132     5545974 :   PetscBLASInt K = static_cast<PetscBLASInt>( A->m() );
     133             : 
     134             :   // LDA (leading dimension of A). In our cases,
     135             :   // LDA is always the number of columns of A.
     136     5545974 :   PetscBLASInt LDA = static_cast<PetscBLASInt>( A->n() );
     137             : 
     138             :   // LDB (leading dimension of B).  In our cases,
     139             :   // LDB is always the number of columns of B.
     140     5545974 :   PetscBLASInt LDB = static_cast<PetscBLASInt>( B->n() );
     141             : 
     142     5545974 :   if (flag == LEFT_MULTIPLY_TRANSPOSE)
     143             :     {
     144     1298405 :       transb[0] = 't';
     145     1298405 :       N = static_cast<PetscBLASInt>( B->n() );
     146             :     }
     147             : 
     148     4247569 :   else if (flag == RIGHT_MULTIPLY_TRANSPOSE)
     149             :     {
     150     1474560 :       transa[0] = 't';
     151           0 :       std::swap(M,K);
     152             :     }
     153             : 
     154             :   // LDC (leading dimension of C).  LDC is the
     155             :   // number of columns in the solution matrix.
     156     5545974 :   PetscBLASInt LDC = M;
     157             : 
     158             :   // Scalar values to pass to BLAS
     159             :   //
     160             :   // scalar multiplying the whole product AB
     161     5545974 :   T alpha = 1.;
     162             : 
     163             :   // scalar multiplying C, which is the original matrix.
     164     5545974 :   T beta  = 0.;
     165             : 
     166             :   // Storage for the result
     167     5818454 :   std::vector<T> result (result_size);
     168             : 
     169             :   // Finally ready to call the BLAS
     170     6090930 :   BLASgemm_(transa, transb, &M, &N, &K, pPS(&alpha),
     171      544956 :             pPS(A->get_values().data()), &LDA,
     172      544956 :             pPS(B->get_values().data()), &LDB, pPS(&beta),
     173             :             pPS(result.data()), &LDC);
     174             : 
     175             :   // Update the relevant dimension for this matrix.
     176     5545974 :   switch (flag)
     177             :     {
     178           0 :     case LEFT_MULTIPLY:            { this->_m = other.m(); break; }
     179     2773009 :     case RIGHT_MULTIPLY:           { this->_n = other.n(); break; }
     180     1298405 :     case LEFT_MULTIPLY_TRANSPOSE:  { this->_m = other.n(); break; }
     181     1474560 :     case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; }
     182           0 :     default:
     183           0 :       libmesh_error_msg("Unknown flag selected.");
     184             :     }
     185             : 
     186             :   // Swap my data vector with the result
     187      272480 :   this->_val.swap(result);
     188     5545974 : }
     189             : 
     190             : #else
     191             : 
     192             : template<typename T>
     193           0 : void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T> &,
     194             :                                     _BLAS_Multiply_Flag)
     195             : {
     196           0 :   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
     197             : }
     198             : 
     199             : #endif
     200             : 
     201             : 
     202             : 
     203             : 
     204             : 
     205             : 
     206             : 
     207             : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
     208             : 
     209             : template<typename T>
     210      758098 : void DenseMatrix<T>::_lu_decompose_lapack ()
     211             : {
     212             :   // If this function was called, there better not be any
     213             :   // previous decomposition of the matrix.
     214       60509 :   libmesh_assert_equal_to (this->_decomposition_type, NONE);
     215             : 
     216             :   // The calling sequence for dgetrf is:
     217             :   // dgetrf(M, N, A, lda, ipiv, info)
     218             : 
     219             :   // M (input)
     220             :   //   The number of rows of the matrix A.  M >= 0.
     221             :   // In C/C++, pass the number of *cols* of A
     222      758098 :   PetscBLASInt M = this->n();
     223             : 
     224             :   // N (input)
     225             :   //   The number of columns of the matrix A.  N >= 0.
     226             :   // In C/C++, pass the number of *rows* of A
     227      758098 :   PetscBLASInt N = this->m();
     228             : 
     229             :   // A (input/output) double precision array, dimension (LDA,N)
     230             :   //   On entry, the M-by-N matrix to be factored.
     231             :   //   On exit, the factors L and U from the factorization
     232             :   //   A = P*L*U; the unit diagonal elements of L are not stored.
     233             : 
     234             :   // LDA (input)
     235             :   //     The leading dimension of the array A.  LDA >= max(1,M).
     236      758098 :   PetscBLASInt LDA = M;
     237             : 
     238             :   // ipiv (output) integer array, dimension (min(m,n))
     239             :   //      The pivot indices; for 1 <= i <= min(m,n), row i of the
     240             :   //      matrix was interchanged with row IPIV(i).
     241             :   // Here, we pass _pivots.data(), a private class member used to store pivots
     242      758098 :   this->_pivots.resize( std::min(M,N) );
     243             : 
     244             :   // info (output)
     245             :   //      = 0:  successful exit
     246             :   //      < 0:  if INFO = -i, the i-th argument had an illegal value
     247             :   //      > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
     248             :   //            has been completed, but the factor U is exactly
     249             :   //            singular, and division by zero will occur if it is used
     250             :   //            to solve a system of equations.
     251      758098 :   PetscBLASInt INFO = 0;
     252             : 
     253             :   // Ready to call the actual factorization routine through PETSc's interface
     254      758098 :   LAPACKgetrf_(&M, &N, pPS(this->_val.data()), &LDA, _pivots.data(),
     255             :                &INFO);
     256             : 
     257             :   // Check return value for errors
     258      758098 :   libmesh_error_msg_if(INFO != 0, "INFO=" << INFO << ", Error during Lapack LU factorization!");
     259             : 
     260             :   // Set the flag for LU decomposition
     261      758098 :   this->_decomposition_type = LU_BLAS_LAPACK;
     262      758098 : }
     263             : 
     264             : #else
     265             : 
     266             : template<typename T>
     267           0 : void DenseMatrix<T>::_lu_decompose_lapack ()
     268             : {
     269           0 :   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
     270             : }
     271             : 
     272             : #endif
     273             : 
     274             : 
     275             : 
     276             : template<typename T>
     277           0 : void DenseMatrix<T>::_svd_lapack (DenseVector<Real> & sigma)
     278             : {
     279             :   // The calling sequence for dgetrf is:
     280             :   // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
     281             : 
     282             :   // JOBU (input)
     283             :   //      Specifies options for computing all or part of the matrix U:
     284             :   //      = 'A':  all M columns of U are returned in array U:
     285             :   //      = 'S':  the first min(m,n) columns of U (the left singular
     286             :   //              vectors) are returned in the array U;
     287             :   //      = 'O':  the first min(m,n) columns of U (the left singular
     288             :   //              vectors) are overwritten on the array A;
     289             :   //      = 'N':  no columns of U (no left singular vectors) are
     290             :   //              computed.
     291           0 :   char JOBU = 'N';
     292             : 
     293             :   // JOBVT (input)
     294             :   //       Specifies options for computing all or part of the matrix
     295             :   //       V**T:
     296             :   //       = 'A':  all N rows of V**T are returned in the array VT;
     297             :   //       = 'S':  the first min(m,n) rows of V**T (the right singular
     298             :   //               vectors) are returned in the array VT;
     299             :   //       = 'O':  the first min(m,n) rows of V**T (the right singular
     300             :   //               vectors) are overwritten on the array A;
     301             :   //       = 'N':  no rows of V**T (no right singular vectors) are
     302             :   //               computed.
     303           0 :   char JOBVT = 'N';
     304             : 
     305           0 :   std::vector<Real> sigma_val;
     306           0 :   std::vector<Number> U_val;
     307           0 :   std::vector<Number> VT_val;
     308             : 
     309           0 :   _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val);
     310             : 
     311             :   // Copy the singular values into sigma, ignore U_val and VT_val
     312           0 :   sigma.resize(cast_int<unsigned int>(sigma_val.size()));
     313           0 :   for (auto i : make_range(sigma.size()))
     314           0 :     sigma(i) = sigma_val[i];
     315           0 : }
     316             : 
     317             : template<typename T>
     318        1470 : void DenseMatrix<T>::_svd_lapack (DenseVector<Real> & sigma,
     319             :                                   DenseMatrix<Number> & U,
     320             :                                   DenseMatrix<Number> & VT)
     321             : {
     322             :   // The calling sequence for dgetrf is:
     323             :   // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )
     324             : 
     325             :   // JOBU (input)
     326             :   //      Specifies options for computing all or part of the matrix U:
     327             :   //      = 'A':  all M columns of U are returned in array U:
     328             :   //      = 'S':  the first min(m,n) columns of U (the left singular
     329             :   //              vectors) are returned in the array U;
     330             :   //      = 'O':  the first min(m,n) columns of U (the left singular
     331             :   //              vectors) are overwritten on the array A;
     332             :   //      = 'N':  no columns of U (no left singular vectors) are
     333             :   //              computed.
     334          42 :   char JOBU = 'S';
     335             : 
     336             :   // JOBVT (input)
     337             :   //       Specifies options for computing all or part of the matrix
     338             :   //       V**T:
     339             :   //       = 'A':  all N rows of V**T are returned in the array VT;
     340             :   //       = 'S':  the first min(m,n) rows of V**T (the right singular
     341             :   //               vectors) are returned in the array VT;
     342             :   //       = 'O':  the first min(m,n) rows of V**T (the right singular
     343             :   //               vectors) are overwritten on the array A;
     344             :   //       = 'N':  no rows of V**T (no right singular vectors) are
     345             :   //               computed.
     346          42 :   char JOBVT = 'S';
     347             : 
     348             :   // Note: Lapack is going to compute the singular values of A^T.  If
     349             :   // A=U * S * V^T, then A^T = V * S * U^T, which means that the
     350             :   // values returned in the "U_val" array actually correspond to the
     351             :   // entries of the V matrix, and the values returned in the VT_val
     352             :   // array actually correspond to the entries of U^T.  Therefore, we
     353             :   // pass VT in the place of U and U in the place of VT below!
     354          84 :   std::vector<Real> sigma_val;
     355        1470 :   int M = this->n();
     356        1470 :   int N = this->m();
     357          84 :   int min_MN = (M < N) ? M : N;
     358             : 
     359             :   // Size user-provided storage appropriately. Inside svd_helper:
     360             :   // U_val is sized to (M x min_MN)
     361             :   // VT_val is sized to (min_MN x N)
     362             :   // So, we set up U to have the shape of "VT_val^T", and VT to
     363             :   // have the shape of "U_val^T".
     364             :   //
     365             :   // Finally, since the results are stored in column-major order by
     366             :   // Lapack, but we actually want the transpose of what Lapack
     367             :   // returns, this means (conveniently) that we don't even have to
     368             :   // copy anything after the call to _svd_helper, it should already be
     369             :   // in the correct order!
     370        1470 :   U.resize(N, min_MN);
     371        1428 :   VT.resize(min_MN, M);
     372             : 
     373        1470 :   _svd_helper(JOBU, JOBVT, sigma_val, VT.get_values(), U.get_values());
     374             : 
     375             :   // Copy the singular values into sigma.
     376        1470 :   sigma.resize(cast_int<unsigned int>(sigma_val.size()));
     377      143010 :   for (auto i : make_range(sigma.size()))
     378      149628 :     sigma(i) = sigma_val[i];
     379        1470 : }
     380             : 
     381             : #if (LIBMESH_HAVE_PETSC)
     382             : 
     383             : template<typename T>
     384        1470 : void DenseMatrix<T>::_svd_helper (char JOBU,
     385             :                                   char JOBVT,
     386             :                                   std::vector<Real> & sigma_val,
     387             :                                   std::vector<Number> & U_val,
     388             :                                   std::vector<Number> & VT_val)
     389             : {
     390             : 
     391             :   // M (input)
     392             :   //   The number of rows of the matrix A.  M >= 0.
     393             :   // In C/C++, pass the number of *cols* of A
     394        1470 :   PetscBLASInt M = this->n();
     395             : 
     396             :   // N (input)
     397             :   //   The number of columns of the matrix A.  N >= 0.
     398             :   // In C/C++, pass the number of *rows* of A
     399        1470 :   PetscBLASInt N = this->m();
     400             : 
     401          84 :   PetscBLASInt min_MN = (M < N) ? M : N;
     402          84 :   PetscBLASInt max_MN = (M > N) ? M : N;
     403             : 
     404             :   // A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
     405             :   //   On entry, the M-by-N matrix A.
     406             :   //   On exit,
     407             :   //   if JOBU  = 'O', A is overwritten with the first min(m,n)
     408             :   //                   columns of U (the left singular vectors,
     409             :   //                   stored columnwise);
     410             :   //   if JOBVT = 'O', A is overwritten with the first min(m,n)
     411             :   //                   rows of V**T (the right singular vectors,
     412             :   //                   stored rowwise);
     413             :   //   if JOBU != 'O' and JOBVT != 'O', the contents of A are destroyed.
     414             : 
     415             :   // LDA (input)
     416             :   //     The leading dimension of the array A.  LDA >= max(1,M).
     417        1470 :   PetscBLASInt LDA = M;
     418             : 
     419             :   // S (output) DOUBLE PRECISION array, dimension (min(M,N))
     420             :   //   The singular values of A, sorted so that S(i) >= S(i+1).
     421        1470 :   sigma_val.resize( min_MN );
     422             : 
     423             :   // LDU (input)
     424             :   //     The leading dimension of the array U.  LDU >= 1; if
     425             :   //     JOBU = 'S' or 'A', LDU >= M.
     426        1470 :   PetscBLASInt LDU = M;
     427             : 
     428             :   // U (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
     429             :   //   (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
     430             :   //   If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
     431             :   //   if JOBU = 'S', U contains the first min(m,n) columns of U
     432             :   //   (the left singular vectors, stored columnwise);
     433             :   //   if JOBU = 'N' or 'O', U is not referenced.
     434        1470 :   if (JOBU == 'S')
     435        1470 :     U_val.resize( LDU*min_MN );
     436             :   else
     437           0 :     U_val.resize( LDU*M );
     438             : 
     439             :   // LDVT (input)
     440             :   //      The leading dimension of the array VT.  LDVT >= 1; if
     441             :   //      JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
     442        1470 :   PetscBLASInt LDVT = N;
     443        1470 :   if (JOBVT == 'S')
     444        1470 :     LDVT = min_MN;
     445             : 
     446             :   // VT (output) DOUBLE PRECISION array, dimension (LDVT,N)
     447             :   //    If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
     448             :   //    V**T;
     449             :   //    if JOBVT = 'S', VT contains the first min(m,n) rows of
     450             :   //    V**T (the right singular vectors, stored rowwise);
     451             :   //    if JOBVT = 'N' or 'O', VT is not referenced.
     452        1470 :   VT_val.resize( LDVT*N );
     453             : 
     454             :   // LWORK (input)
     455             :   //       The dimension of the array WORK.
     456             :   //       LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
     457             :   //       For good performance, LWORK should generally be larger.
     458             :   //
     459             :   //       If LWORK = -1, then a workspace query is assumed; the routine
     460             :   //       only calculates the optimal size of the WORK array, returns
     461             :   //       this value as the first entry of the WORK array, and no error
     462             :   //       message related to LWORK is issued by XERBLA.
     463        1470 :   PetscBLASInt larger = (3*min_MN+max_MN > 5*min_MN) ? 3*min_MN+max_MN : 5*min_MN;
     464        1470 :   PetscBLASInt LWORK  = (larger > 1) ? larger : 1;
     465             : 
     466             : 
     467             :   // WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
     468             :   //      On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
     469             :   //      if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
     470             :   //      superdiagonal elements of an upper bidiagonal matrix B
     471             :   //      whose diagonal is in S (not necessarily sorted). B
     472             :   //      satisfies A = U * B * VT, so it has the same singular values
     473             :   //      as A, and singular vectors related by U and VT.
     474        1512 :   std::vector<Number> WORK( LWORK );
     475             : 
     476             :   // INFO (output)
     477             :   //      = 0:  successful exit.
     478             :   //      < 0:  if INFO = -i, the i-th argument had an illegal value.
     479             :   //      > 0:  if DBDSQR did not converge, INFO specifies how many
     480             :   //            superdiagonals of an intermediate bidiagonal form B
     481             :   //            did not converge to zero. See the description of WORK
     482             :   //            above for details.
     483        1470 :   PetscBLASInt INFO = 0;
     484             : 
     485             :   // Ready to call the actual factorization routine through PETSc's interface.
     486             : #ifdef LIBMESH_USE_REAL_NUMBERS
     487             :   // Note that the call to LAPACKgesvd_ may modify _val
     488        1470 :   LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, pPS(_val.data()), &LDA,
     489             :                pPS(sigma_val.data()), pPS(U_val.data()), &LDU,
     490             :                pPS(VT_val.data()), &LDVT, pPS(WORK.data()), &LWORK,
     491             :                &INFO);
     492             : #else
     493             :   // When we have LIBMESH_USE_COMPLEX_NUMBERS then we must pass an array of Complex
     494             :   // numbers to LAPACKgesvd_, but _val may contain Reals so we copy to Number below to
     495             :   // handle both the real-valued and complex-valued cases.
     496             :   std::vector<Number> val_copy(_val.size());
     497             :   for (auto i : make_range(_val.size()))
     498             :     val_copy[i] = _val[i];
     499             : 
     500             :   std::vector<Real> RWORK(5 * min_MN);
     501             :   LAPACKgesvd_(&JOBU, &JOBVT, &M, &N, val_copy.data(), &LDA, sigma_val.data(), U_val.data(),
     502             :                &LDU, VT_val.data(), &LDVT, WORK.data(), &LWORK, RWORK.data(), &INFO);
     503             : #endif
     504             : 
     505             :   // Check return value for errors
     506        1470 :   libmesh_error_msg_if(INFO != 0, "INFO=" << INFO << ", Error during Lapack SVD calculation!");
     507        1470 : }
     508             : 
     509             : 
     510             : #else
     511             : 
     512             : template<typename T>
     513           0 : void DenseMatrix<T>::_svd_helper (char,
     514             :                                   char,
     515             :                                   std::vector<Real> &,
     516             :                                   std::vector<Number> &,
     517             :                                   std::vector<Number> &)
     518             : {
     519           0 :   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
     520             : }
     521             : 
     522             : #endif
     523             : 
     524             : 
     525             : 
     526             : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
     527             : 
     528             : template<typename T>
     529        5654 : void DenseMatrix<T>::_svd_solve_lapack(const DenseVector<T> & rhs,
     530             :                                        DenseVector<T> & x,
     531             :                                        Real rcond) const
     532             : {
     533             :   // Since BLAS is expecting column-major storage, we first need to
     534             :   // make a transposed copy of *this, then pass it to the gelss
     535             :   // routine instead of the original.  This extra copy is kind of a
     536             :   // bummer, it might be better if we could use the full SVD to
     537             :   // compute the least-squares solution instead...  Note that it isn't
     538             :   // completely terrible either, since A_trans gets overwritten by
     539             :   // Lapack, and we usually would end up making a copy of A outside
     540             :   // the function call anyway.
     541        6682 :   DenseMatrix<T> A_trans;
     542        5654 :   this->get_transpose(A_trans);
     543             : 
     544             :   // M
     545             :   // The number of rows of the input matrix. M >= 0.
     546             :   // This is actually the number of *columns* of A_trans.
     547        5654 :   PetscBLASInt M = A_trans.n();
     548             : 
     549             :   // N
     550             :   // The number of columns of the matrix A. N >= 0.
     551             :   // This is actually the number of *rows* of A_trans.
     552        5654 :   PetscBLASInt N = A_trans.m();
     553             : 
     554             :   // We'll use the min and max of (M,N) several times below.
     555        5654 :   PetscBLASInt max_MN = std::max(M,N);
     556        5654 :   PetscBLASInt min_MN = std::min(M,N);
     557             : 
     558             :   // NRHS
     559             :   // The number of right hand sides, i.e., the number of columns
     560             :   // of the matrices B and X. NRHS >= 0.
     561             :   // This could later be generalized to solve for multiple right-hand
     562             :   // sides...
     563        5654 :   PetscBLASInt NRHS = 1;
     564             : 
     565             :   // A is double precision array, dimension (LDA,N)
     566             :   // On entry, the M-by-N matrix A.
     567             :   // On exit, the first min(m,n) rows of A are overwritten with
     568             :   // its right singular vectors, stored rowwise.
     569             :   //
     570             :   // The data vector that will be passed to Lapack.
     571         514 :   std::vector<T> & A_trans_vals = A_trans.get_values();
     572             : 
     573             :   // LDA
     574             :   // The leading dimension of the array A.  LDA >= max(1,M).
     575        5654 :   PetscBLASInt LDA = M;
     576             : 
     577             :   // B is double precision array, dimension (LDB,NRHS)
     578             :   // On entry, the M-by-NRHS right hand side matrix B.
     579             :   // On exit, B is overwritten by the N-by-NRHS solution
     580             :   // matrix X.  If m >= n and RANK = n, the residual
     581             :   // sum-of-squares for the solution in the i-th column is given
     582             :   // by the sum of squares of elements n+1:m in that column.
     583             :   //
     584             :   // Since we don't want the user's rhs vector to be overwritten by
     585             :   // the solution, we copy the rhs values into the solution vector "x"
     586             :   // now.  x needs to be long enough to hold both the (Nx1) solution
     587             :   // vector or the (Mx1) rhs, so size it to the max of those.
     588        5654 :   x.resize(max_MN);
     589       35332 :   for (auto i : make_range(rhs.size()))
     590       32376 :     x(i) = rhs(i);
     591             : 
     592             :   // Make the syntax below simpler by grabbing a reference to this array.
     593         514 :   std::vector<T> & B = x.get_values();
     594             : 
     595             :   // LDB
     596             :   // The leading dimension of the array B. LDB >= max(1,max(M,N)).
     597        5654 :   PetscBLASInt LDB = x.size();
     598             : 
     599             :   // S is double precision array, dimension (min(M,N))
     600             :   // The singular values of A in decreasing order.
     601             :   // The condition number of A in the 2-norm = S(1)/S(min(m,n)).
     602        6168 :   std::vector<T> S(min_MN);
     603             : 
     604             :   // RCOND
     605             :   // Used to determine the effective rank of A.  Singular values
     606             :   // S(i) <= RCOND*S(1) are treated as zero.  If RCOND < 0, machine
     607             :   // precision is used instead.
     608        5654 :   PetscScalar RCOND = PS(rcond);
     609             : 
     610             :   // RANK
     611             :   // The effective rank of A, i.e., the number of singular values
     612             :   // which are greater than RCOND*S(1).
     613        5654 :   PetscBLASInt RANK = 0;
     614             : 
     615             :   // LWORK
     616             :   // The dimension of the array WORK. LWORK >= 1, and also:
     617             :   // LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
     618             :   // For good performance, LWORK should generally be larger.
     619             :   //
     620             :   // If LWORK = -1, then a workspace query is assumed; the routine
     621             :   // only calculates the optimal size of the WORK array, returns
     622             :   // this value as the first entry of the WORK array, and no error
     623             :   // message related to LWORK is issued by XERBLA.
     624             :   //
     625             :   // The factor of 3/2 is arbitrary and is used to satisfy the "should
     626             :   // generally be larger" clause.
     627        5654 :   PetscBLASInt LWORK = (3*min_MN + std::max(2*min_MN, std::max(max_MN, NRHS))) * 3/2;
     628             : 
     629             :   // WORK is double precision array, dimension (MAX(1,LWORK))
     630             :   // On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
     631        6168 :   std::vector<T> WORK(LWORK);
     632             : 
     633             :   // INFO
     634             :   // = 0:  successful exit
     635             :   // < 0:  if INFO = -i, the i-th argument had an illegal value.
     636             :   // > 0:  the algorithm for computing the SVD failed to converge;
     637             :   //       if INFO = i, i off-diagonal elements of an intermediate
     638             :   //       bidiagonal form did not converge to zero.
     639        5654 :   PetscBLASInt INFO = 0;
     640             : 
     641             :   // LAPACKgelss_(const PetscBLASInt *, // M
     642             :   //              const PetscBLASInt *, // N
     643             :   //              const PetscBLASInt *, // NRHS
     644             :   //              PetscScalar *,        // A
     645             :   //              const PetscBLASInt *, // LDA
     646             :   //              PetscScalar *,        // B
     647             :   //              const PetscBLASInt *, // LDB
     648             :   //              PetscReal *,          // S(out) = singular values of A in increasing order
     649             :   //              const PetscReal *,    // RCOND = tolerance for singular values
     650             :   //              PetscBLASInt *,       // RANK(out) = number of "non-zero" singular values
     651             :   //              PetscScalar *,        // WORK
     652             :   //              const PetscBLASInt *, // LWORK
     653             :   //              PetscBLASInt *);      // INFO
     654        5654 :   LAPACKgelss_(&M, &N, &NRHS, pPS(A_trans_vals.data()), &LDA,
     655             :                pPS(B.data()), &LDB, pPS(S.data()), &RCOND, &RANK,
     656             :                pPS(WORK.data()), &LWORK, &INFO);
     657             : 
     658             :   // Check for errors in the Lapack call
     659        5654 :   libmesh_error_msg_if(INFO < 0, "Error, argument " << -INFO << " to LAPACKgelss_ had an illegal value.");
     660        5654 :   libmesh_error_msg_if(INFO > 0, "The algorithm for computing the SVD failed to converge!");
     661             : 
     662             :   // Debugging: print singular values and information about condition number:
     663             :   // libMesh::err << "RCOND=" << RCOND << std::endl;
     664             :   // libMesh::err << "Singular values: " << std::endl;
     665             :   // for (auto i : index_range(S))
     666             :   //   libMesh::err << S[i] << std::endl;
     667             :   // libMesh::err << "The condition number of A is approximately: " << S[0]/S.back() << std::endl;
     668             : 
     669             :   // Lapack has already written the solution into B, but it will be
     670             :   // the wrong size for non-square problems, so we need to resize it
     671             :   // correctly.  The size of the solution vector should be the number
     672             :   // of columns of the original A matrix.  Unfortunately, resizing a
     673             :   // DenseVector currently also zeros it out (unlike a std::vector) so
     674             :   // we'll resize the underlying storage directly (the size is not
     675             :   // stored independently elsewhere).
     676        5654 :   x.get_values().resize(this->n());
     677        5654 : }
     678             : 
     679             : #else
     680             : 
     681             : template<typename T>
     682           0 : void DenseMatrix<T>::_svd_solve_lapack(const DenseVector<T>& /*rhs*/,
     683             :                                        DenseVector<T> & /*x*/,
     684             :                                        Real /*rcond*/) const
     685             : {
     686           0 :   libmesh_not_implemented();
     687             : }
     688             : #endif // (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
     689             : 
     690             : 
     691             : 
     692             : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
     693             : 
     694             : template<typename T>
     695         140 : void DenseMatrix<T>::_evd_lapack (DenseVector<T> & lambda_real,
     696             :                                   DenseVector<T> & lambda_imag,
     697             :                                   DenseMatrix<T> * VL,
     698             :                                   DenseMatrix<T> * VR)
     699             : {
     700             :   // This algorithm only works for square matrices, so verify this up front.
     701         140 :   libmesh_error_msg_if(this->m() != this->n(), "Can only compute eigen-decompositions for square matrices.");
     702             : 
     703             :   // If the user requests left or right eigenvectors, we have to make
     704             :   // sure and pass the transpose of this matrix to Lapack, otherwise
     705             :   // it will compute the inverse transpose of what we are
     706             :   // after... since we know the matrix is square, we can just swap
     707             :   // entries in place.  If the user does not request eigenvectors, we
     708             :   // can skip this extra step, since the eigenvalues for A and A^T are
     709             :   // the same.
     710         140 :   if (VL || VR)
     711             :     {
     712         560 :       for (auto i : make_range(this->_m))
     713         840 :         for (auto j : make_range(i))
     714          12 :           std::swap((*this)(i,j), (*this)(j,i));
     715             :     }
     716             : 
     717             :   // The calling sequence for dgeev is:
     718             :   // DGEEV (character  JOBVL,
     719             :   //        character  JOBVR,
     720             :   //        integer N,
     721             :   //        double precision, dimension( lda, * )  A,
     722             :   //        integer  LDA,
     723             :   //        double precision, dimension( * )  WR,
     724             :   //        double precision, dimension( * )  WI,
     725             :   //        double precision, dimension( ldvl, * )  VL,
     726             :   //        integer  LDVL,
     727             :   //        double precision, dimension( ldvr, * )  VR,
     728             :   //        integer  LDVR,
     729             :   //        double precision, dimension( * )  WORK,
     730             :   //        integer  LWORK,
     731             :   //        integer  INFO)
     732             : 
     733             :   // JOBVL (input)
     734             :   //       = 'N': left eigenvectors of A are not computed;
     735             :   //       = 'V': left eigenvectors of A are computed.
     736         140 :   char JOBVL = VL ? 'V' : 'N';
     737             : 
     738             :   // JOBVR (input)
     739             :   //       = 'N': right eigenvectors of A are not computed;
     740             :   //       = 'V': right eigenvectors of A are computed.
     741         140 :   char JOBVR = VR ? 'V' : 'N';
     742             : 
     743             :   // N (input)
     744             :   //   The number of rows/cols of the matrix A.  N >= 0.
     745         140 :   PetscBLASInt N = this->m();
     746             : 
     747             :   // A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
     748             :   //   On entry, the N-by-N matrix A.
     749             :   //   On exit, A has been overwritten.
     750             : 
     751             :   // LDA (input)
     752             :   //     The leading dimension of the array A.  LDA >= max(1,N).
     753         140 :   PetscBLASInt LDA = N;
     754             : 
     755             :   // WR (output) double precision array, dimension (N)
     756             :   // WI (output) double precision array, dimension (N)
     757             :   //    WR and WI contain the real and imaginary parts,
     758             :   //    respectively, of the computed eigenvalues.  Complex
     759             :   //    conjugate pairs of eigenvalues appear consecutively
     760             :   //    with the eigenvalue having the positive imaginary part
     761             :   //    first.
     762         136 :   lambda_real.resize(N);
     763         140 :   lambda_imag.resize(N);
     764             : 
     765             :   // VL (output) double precision array, dimension (LDVL,N)
     766             :   //    If JOBVL = 'V', the left eigenvectors u(j) are stored one
     767             :   //    after another in the columns of VL, in the same order
     768             :   //    as their eigenvalues.
     769             :   //    If JOBVL = 'N', VL is not referenced.
     770             :   //    If the j-th eigenvalue is real, then u(j) = VL(:,j),
     771             :   //    the j-th column of VL.
     772             :   //    If the j-th and (j+1)-st eigenvalues form a complex
     773             :   //    conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
     774             :   //    u(j+1) = VL(:,j) - i*VL(:,j+1).
     775             :   // Will be set below if needed.
     776             : 
     777             :   // LDVL (input)
     778             :   //      The leading dimension of the array VL.  LDVL >= 1; if
     779             :   //      JOBVL = 'V', LDVL >= N.
     780         140 :   PetscBLASInt LDVL = VL ? N : 1;
     781             : 
     782             :   // VR (output) DOUBLE PRECISION array, dimension (LDVR,N)
     783             :   //    If JOBVR = 'V', the right eigenvectors v(j) are stored one
     784             :   //    after another in the columns of VR, in the same order
     785             :   //    as their eigenvalues.
     786             :   //    If JOBVR = 'N', VR is not referenced.
     787             :   //    If the j-th eigenvalue is real, then v(j) = VR(:,j),
     788             :   //    the j-th column of VR.
     789             :   //    If the j-th and (j+1)-st eigenvalues form a complex
     790             :   //    conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
     791             :   //    v(j+1) = VR(:,j) - i*VR(:,j+1).
     792             :   // Will be set below if needed.
     793             : 
     794             :   // LDVR (input)
     795             :   //      The leading dimension of the array VR.  LDVR >= 1; if
     796             :   //      JOBVR = 'V', LDVR >= N.
     797         140 :   PetscBLASInt LDVR = VR ? N : 1;
     798             : 
     799             :   // WORK (workspace/output) double precision array, dimension (MAX(1,LWORK))
     800             :   //      On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
     801             :   //
     802             :   // LWORK (input)
     803             :   //       The dimension of the array WORK.  LWORK >= max(1,3*N), and
     804             :   //       if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good
     805             :   //       performance, LWORK must generally be larger.
     806             :   //
     807             :   //       If LWORK = -1, then a workspace query is assumed; the routine
     808             :   //       only calculates the optimal size of the WORK array, returns
     809             :   //       this value as the first entry of the WORK array, and no error
     810             :   //       message related to LWORK is issued by XERBLA.
     811         140 :   PetscBLASInt LWORK = (VR || VL) ? 4*N : 3*N;
     812         144 :   std::vector<T> WORK(LWORK);
     813             : 
     814             :   // INFO (output)
     815             :   //      = 0:  successful exit
     816             :   //      < 0:  if INFO = -i, the i-th argument had an illegal value.
     817             :   //      > 0:  if INFO = i, the QR algorithm failed to compute all the
     818             :   //            eigenvalues, and no eigenvectors or condition numbers
     819             :   //            have been computed; elements 1:ILO-1 and i+1:N of WR
     820             :   //            and WI contain eigenvalues which have converged.
     821         140 :   PetscBLASInt INFO = 0;
     822             : 
     823             :   // Get references to raw data
     824           4 :   std::vector<T> & lambda_real_val = lambda_real.get_values();
     825           4 :   std::vector<T> & lambda_imag_val = lambda_imag.get_values();
     826             : 
     827             :   // Set up eigenvector storage if necessary.
     828           4 :   T * VR_ptr = nullptr;
     829         140 :   if (VR)
     830             :     {
     831         140 :       VR->resize(N, N);
     832           4 :       VR_ptr = VR->get_values().data();
     833             :     }
     834             : 
     835           4 :   T * VL_ptr = nullptr;
     836         140 :   if (VL)
     837             :     {
     838         140 :       VL->resize(N, N);
     839           4 :       VL_ptr = VL->get_values().data();
     840             :     }
     841             : 
     842             :   // Ready to call the Lapack routine through PETSc's interface
     843         140 :   LAPACKgeev_(&JOBVL,
     844             :               &JOBVR,
     845             :               &N,
     846             :               pPS(_val.data()),
     847             :               &LDA,
     848             :               pPS(lambda_real_val.data()),
     849             :               pPS(lambda_imag_val.data()),
     850             :               pPS(VL_ptr),
     851             :               &LDVL,
     852             :               pPS(VR_ptr),
     853             :               &LDVR,
     854             :               pPS(WORK.data()),
     855             :               &LWORK,
     856             :               &INFO);
     857             : 
     858             :   // Check return value for errors
     859         140 :   libmesh_error_msg_if(INFO != 0, "INFO=" << INFO << ", Error during Lapack eigenvalue calculation!");
     860             : 
     861             :   // If the user requested either right or left eigenvectors, LAPACK
     862             :   // has now computed the transpose of the desired matrix, i.e. V^T
     863             :   // instead of V.  We could leave this up to user code to handle, but
     864             :   // rather than risking getting very unexpected results, we'll just
     865             :   // transpose it in place before handing it back.
     866         140 :   if (VR)
     867             :     {
     868         560 :       for (auto i : make_range(N))
     869         840 :         for (auto j : make_range(i))
     870         420 :           std::swap((*VR)(i,j), (*VR)(j,i));
     871             :     }
     872             : 
     873         140 :   if (VL)
     874             :     {
     875         560 :       for (auto i : make_range(N))
     876         840 :         for (auto j : make_range(i))
     877         420 :           std::swap((*VL)(i,j), (*VL)(j,i));
     878             :     }
     879         140 : }
     880             : 
     881             : #else
     882             : 
     883             : template<typename T>
     884           0 : void DenseMatrix<T>::_evd_lapack (DenseVector<T> &,
     885             :                                   DenseVector<T> &,
     886             :                                   DenseMatrix<T> *,
     887             :                                   DenseMatrix<T> *)
     888             : {
     889           0 :   libmesh_error_msg("_evd_lapack is currently only available when LIBMESH_USE_REAL_NUMBERS is defined!");
     890             : }
     891             : 
     892             : #endif
     893             : 
     894             : 
     895             : 
     896             : 
     897             : 
     898             : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
     899             : 
     900             : template<typename T>
     901     2875030 : void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T> & b,
     902             :                                                  DenseVector<T> & x)
     903             : {
     904             :   // The calling sequence for getrs is:
     905             :   // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO)
     906             : 
     907             :   // trans (input)
     908             :   //       'n' for no transpose, 't' for transpose
     909     2875030 :   char TRANS[] = "t";
     910             : 
     911             :   // N (input)
     912             :   //   The order of the matrix A.  N >= 0.
     913     2875030 :   PetscBLASInt N = this->m();
     914             : 
     915             : 
     916             :   // NRHS (input)
     917             :   //      The number of right hand sides, i.e., the number of columns
     918             :   //      of the matrix B.  NRHS >= 0.
     919     2875030 :   PetscBLASInt NRHS = 1;
     920             : 
     921             :   // A (input) double precision array, dimension (LDA,N)
     922             :   //   The factors L and U from the factorization A = P*L*U
     923             :   //   as computed by dgetrf.
     924             : 
     925             :   // LDA (input)
     926             :   //     The leading dimension of the array A.  LDA >= max(1,N).
     927     2875030 :   PetscBLASInt LDA = N;
     928             : 
     929             :   // ipiv (input) int array, dimension (N)
     930             :   //      The pivot indices from DGETRF; for 1<=i<=N, row i of the
     931             :   //      matrix was interchanged with row IPIV(i).
     932             :   // Here, we pass _pivots.data() which was computed in _lu_decompose_lapack
     933             : 
     934             :   // B (input/output) double precision array, dimension (LDB,NRHS)
     935             :   //   On entry, the right hand side matrix B.
     936             :   //   On exit, the solution matrix X.
     937             :   // Here, we pass a copy of the rhs vector's data array in x, so that the
     938             :   // passed right-hand side b is unmodified.  I don't see a way around this
     939             :   // copy if we want to maintain an unmodified rhs in LibMesh.
     940      253013 :   x = b;
     941      253013 :   std::vector<T> & x_vec = x.get_values();
     942             : 
     943             :   // We can avoid the copy if we don't care about overwriting the RHS: just
     944             :   // pass b to the Lapack routine and then swap with x before exiting
     945             :   // std::vector<T> & x_vec = b.get_values();
     946             : 
     947             :   // LDB (input)
     948             :   //     The leading dimension of the array B.  LDB >= max(1,N).
     949     2875030 :   PetscBLASInt LDB = N;
     950             : 
     951             :   // INFO (output)
     952             :   //      = 0:  successful exit
     953             :   //      < 0:  if INFO = -i, the i-th argument had an illegal value
     954     2875030 :   PetscBLASInt INFO = 0;
     955             : 
     956             :   // Finally, ready to call the Lapack getrs function
     957     2875030 :   LAPACKgetrs_(TRANS, &N, &NRHS, pPS(_val.data()), &LDA,
     958             :                _pivots.data(), pPS(x_vec.data()), &LDB, &INFO);
     959             : 
     960             :   // Check return value for errors
     961     2875030 :   libmesh_error_msg_if(INFO != 0, "INFO=" << INFO << ", Error during Lapack LU solve!");
     962             : 
     963             :   // Don't do this if you already made a copy of b above
     964             :   // Swap b and x.  The solution will then be in x, and whatever was originally
     965             :   // in x, maybe garbage, maybe nothing, will be in b.
     966             :   // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite
     967             :   // the input.  This *should* make user code simpler, as they don't have to create
     968             :   // an extra vector just to pass it in to the solve function!
     969             :   // b.swap(x);
     970     2875030 : }
     971             : 
     972             : #else
     973             : 
     974             : template<typename T>
     975           0 : void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T> &,
     976             :                                                  DenseVector<T> &)
     977             : {
     978           0 :   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
     979             : }
     980             : 
     981             : #endif
     982             : 
     983             : 
     984             : 
     985             : 
     986             : 
     987             : #if (LIBMESH_HAVE_PETSC && LIBMESH_USE_REAL_NUMBERS)
     988             : 
     989             : template<typename T>
     990    28208966 : void DenseMatrix<T>::_matvec_blas(T alpha,
     991             :                                   T beta,
     992             :                                   DenseVector<T> & dest,
     993             :                                   const DenseVector<T> & arg,
     994             :                                   bool trans) const
     995             : {
     996             :   // Ensure that dest and arg sizes are compatible
     997    28208966 :   if (!trans)
     998             :     {
     999             :       // dest  ~ A     * arg
    1000             :       // (mx1)   (mxn) * (nx1)
    1001    25633116 :       libmesh_error_msg_if((dest.size() != this->m()) || (arg.size() != this->n()),
    1002             :                            "Improper input argument sizes!");
    1003             :     }
    1004             : 
    1005             :   else // trans == true
    1006             :     {
    1007             :       // Ensure that dest and arg are proper size
    1008             :       // dest  ~ A^T   * arg
    1009             :       // (nx1)   (nxm) * (mx1)
    1010     2575850 :       libmesh_error_msg_if((dest.size() != this->n()) || (arg.size() != this->m()),
    1011             :                            "Improper input argument sizes!");
    1012             :     }
    1013             : 
    1014             :   // Calling sequence for dgemv:
    1015             :   //
    1016             :   // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
    1017             : 
    1018             :   // TRANS (input)
    1019             :   //       't' for transpose, 'n' for non-transpose multiply
    1020             :   // We store everything in row-major order, so pass the transpose flag for
    1021             :   // non-transposed matvecs and the 'n' flag for transposed matvecs
    1022    28208966 :   char TRANS[] = "t";
    1023    28208966 :   if (trans)
    1024     2575850 :     TRANS[0] = 'n';
    1025             : 
    1026             :   // M (input)
    1027             :   //   On entry, M specifies the number of rows of the matrix A.
    1028             :   // In C/C++, pass the number of *cols* of A
    1029    28208966 :   PetscBLASInt M = this->n();
    1030             : 
    1031             :   // N (input)
    1032             :   //   On entry, N specifies the number of columns of the matrix A.
    1033             :   // In C/C++, pass the number of *rows* of A
    1034    28208966 :   PetscBLASInt N = this->m();
    1035             : 
    1036             :   // ALPHA (input)
    1037             :   // The scalar constant passed to this function
    1038             : 
    1039             :   // A (input) double precision array of DIMENSION ( LDA, n ).
    1040             :   //   Before entry, the leading m by n part of the array A must
    1041             :   //   contain the matrix of coefficients.
    1042             :   // The matrix, *this.  Note that _matvec_blas is called from
    1043             :   // a const function, vector_mult(), and so we have made this function const
    1044             :   // as well.  Since BLAS knows nothing about const, we have to cast it away
    1045             :   // now.
    1046     2532207 :   DenseMatrix<T> & a_ref = const_cast<DenseMatrix<T> &> ( *this );
    1047     2532207 :   std::vector<T> & a = a_ref.get_values();
    1048             : 
    1049             :   // LDA (input)
    1050             :   //     On entry, LDA specifies the first dimension of A as declared
    1051             :   //     in the calling (sub) program. LDA must be at least
    1052             :   //     max( 1, m ).
    1053    28208966 :   PetscBLASInt LDA = M;
    1054             : 
    1055             :   // X (input) double precision array of DIMENSION at least
    1056             :   //   ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
    1057             :   //   and at least
    1058             :   //   ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
    1059             :   //   Before entry, the incremented array X must contain the
    1060             :   //   vector x.
    1061             :   // Here, we must cast away the const-ness of "arg" since BLAS knows
    1062             :   // nothing about const
    1063     2532207 :   DenseVector<T> & x_ref = const_cast<DenseVector<T> &> ( arg );
    1064     2532207 :   std::vector<T> & x = x_ref.get_values();
    1065             : 
    1066             :   // INCX (input)
    1067             :   //      On entry, INCX specifies the increment for the elements of
    1068             :   //      X. INCX must not be zero.
    1069    28208966 :   PetscBLASInt INCX = 1;
    1070             : 
    1071             :   // BETA (input)
    1072             :   //      On entry, BETA specifies the scalar beta. When BETA is
    1073             :   //      supplied as zero then Y need not be set on input.
    1074             :   // The second scalar constant passed to this function
    1075             : 
    1076             :   // Y (input) double precision array of DIMENSION at least
    1077             :   //   ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
    1078             :   //   and at least
    1079             :   //   ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
    1080             :   //   Before entry with BETA non-zero, the incremented array Y
    1081             :   //   must contain the vector y. On exit, Y is overwritten by the
    1082             :   //   updated vector y.
    1083             :   // The input vector "dest"
    1084     2532207 :   std::vector<T> & y = dest.get_values();
    1085             : 
    1086             :   // INCY (input)
    1087             :   //      On entry, INCY specifies the increment for the elements of
    1088             :   //      Y. INCY must not be zero.
    1089    28208966 :   PetscBLASInt INCY = 1;
    1090             : 
    1091             :   // Finally, ready to call the BLAS function
    1092    33273172 :   BLASgemv_(TRANS, &M, &N, pPS(&alpha), pPS(a.data()), &LDA,
    1093     5064206 :             pPS(x.data()), &INCX, pPS(&beta), pPS(y.data()), &INCY);
    1094    28208966 : }
    1095             : 
    1096             : 
    1097             : #else
    1098             : 
    1099             : 
    1100             : template<typename T>
    1101           0 : void DenseMatrix<T>::_matvec_blas(T,
    1102             :                                   T,
    1103             :                                   DenseVector<T> &,
    1104             :                                   const DenseVector<T> &,
    1105             :                                   bool) const
    1106             : {
    1107           0 :   libmesh_error_msg("No PETSc-provided BLAS/LAPACK available!");
    1108             : }
    1109             : 
    1110             : 
    1111             : #endif
    1112             : 
    1113             : 
    1114             : //--------------------------------------------------------------
    1115             : // Explicit instantiations
    1116             : template LIBMESH_EXPORT void DenseMatrix<Real>::_multiply_blas(const DenseMatrixBase<Real> &, _BLAS_Multiply_Flag);
    1117             : template LIBMESH_EXPORT void DenseMatrix<Real>::_lu_decompose_lapack();
    1118             : template LIBMESH_EXPORT void DenseMatrix<Real>::_lu_back_substitute_lapack(const DenseVector<Real> &,
    1119             :                                                             DenseVector<Real> &);
    1120             : template LIBMESH_EXPORT void DenseMatrix<Real>::_matvec_blas(Real,
    1121             :                                               Real,
    1122             :                                               DenseVector<Real> &,
    1123             :                                               const DenseVector<Real> &,
    1124             :                                               bool) const;
    1125             : template LIBMESH_EXPORT void DenseMatrix<Real>::_svd_lapack(DenseVector<Real> &);
    1126             : template LIBMESH_EXPORT void DenseMatrix<Real>::_svd_lapack(DenseVector<Real> &,
    1127             :                                              DenseMatrix<Number> &,
    1128             :                                              DenseMatrix<Number> &);
    1129             : template LIBMESH_EXPORT void DenseMatrix<Real>::_svd_helper (char,
    1130             :                                               char,
    1131             :                                               std::vector<Real> &,
    1132             :                                               std::vector<Number> &,
    1133             :                                               std::vector<Number> &);
    1134             : template LIBMESH_EXPORT void DenseMatrix<Real>::_svd_solve_lapack (const DenseVector<Real> &, DenseVector<Real> &, Real) const;
    1135             : template LIBMESH_EXPORT void DenseMatrix<Real>::_evd_lapack(DenseVector<Real> &,
    1136             :                                              DenseVector<Real> &,
    1137             :                                              DenseMatrix<Real> *,
    1138             :                                              DenseMatrix<Real> *);
    1139             : 
    1140             : #if !(LIBMESH_USE_REAL_NUMBERS)
    1141             : template LIBMESH_EXPORT void DenseMatrix<Number>::_multiply_blas(const DenseMatrixBase<Number> &, _BLAS_Multiply_Flag);
    1142             : template LIBMESH_EXPORT void DenseMatrix<Number>::_lu_decompose_lapack();
    1143             : template LIBMESH_EXPORT void DenseMatrix<Number>::_lu_back_substitute_lapack(const DenseVector<Number> &,
    1144             :                                                               DenseVector<Number> &);
    1145             : template LIBMESH_EXPORT void DenseMatrix<Number>::_matvec_blas(Number,
    1146             :                                                 Number,
    1147             :                                                 DenseVector<Number> &,
    1148             :                                                 const DenseVector<Number> &,
    1149             :                                                 bool) const;
    1150             : template LIBMESH_EXPORT void DenseMatrix<Number>::_svd_lapack(DenseVector<Real> &);
    1151             : template LIBMESH_EXPORT void DenseMatrix<Number>::_svd_lapack(DenseVector<Real> &,
    1152             :                                                DenseMatrix<Number> &,
    1153             :                                                DenseMatrix<Number> &);
    1154             : template LIBMESH_EXPORT void DenseMatrix<Number>::_svd_helper (char,
    1155             :                                                 char,
    1156             :                                                 std::vector<Real> &,
    1157             :                                                 std::vector<Number> &,
    1158             :                                                 std::vector<Number> &);
    1159             : template LIBMESH_EXPORT void DenseMatrix<Number>::_svd_solve_lapack (const DenseVector<Number> &, DenseVector<Number> &, Real) const;
    1160             : template LIBMESH_EXPORT void DenseMatrix<Number>::_evd_lapack(DenseVector<Number> &,
    1161             :                                                DenseVector<Number> &,
    1162             :                                                DenseMatrix<Number> *,
    1163             :                                                DenseMatrix<Number> *);
    1164             : 
    1165             : #endif
    1166             : 
    1167             : } // namespace libMesh

Generated by: LCOV version 1.14