LCOV - code coverage report
Current view: top level - include/numerics - dense_matrix_impl.h (source / functions) Hit Total Coverage
Test: libMesh/libmesh: #4229 (6a9aeb) with base 727f46 Lines: 174 306 56.9 %
Date: 2025-08-19 19:27:09 Functions: 33 80 41.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // The libMesh Finite Element Library.
       2             : // Copyright (C) 2002-2025 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
       3             : 
       4             : // This library is free software; you can redistribute it and/or
       5             : // modify it under the terms of the GNU Lesser General Public
       6             : // License as published by the Free Software Foundation; either
       7             : // version 2.1 of the License, or (at your option) any later version.
       8             : 
       9             : // This library is distributed in the hope that it will be useful,
      10             : // but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      12             : // Lesser General Public License for more details.
      13             : 
      14             : // You should have received a copy of the GNU Lesser General Public
      15             : // License along with this library; if not, write to the Free Software
      16             : // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
      17             : 
      18             : 
      19             : #ifndef LIBMESH_DENSE_MATRIX_IMPL_H
      20             : #define LIBMESH_DENSE_MATRIX_IMPL_H
      21             : 
      22             : // C++ Includes
      23             : #include <cstdlib> // *must* precede <cmath> for proper std:abs() on PGI, Sun Studio CC
      24             : #include <cmath> // for sqrt
      25             : 
      26             : // Local Includes
      27             : #include "libmesh/dense_matrix.h"
      28             : #include "libmesh/dense_vector.h"
      29             : #include "libmesh/int_range.h"
      30             : #include "libmesh/libmesh.h"
      31             : 
      32             : #ifdef LIBMESH_HAVE_METAPHYSICL
      33             : #include "metaphysicl/dualnumber_decl.h"
      34             : #endif
      35             : 
      36             : namespace libMesh
      37             : {
      38             : 
      39             : 
      40             : 
      41             : // ------------------------------------------------------------
      42             : // Dense Matrix member functions
      43             : 
      44             : template<typename T>
      45           0 : void DenseMatrix<T>::left_multiply (const DenseMatrixBase<T> & M2)
      46             : {
      47           0 :   if (this->use_blas_lapack)
      48           0 :     this->_multiply_blas(M2, LEFT_MULTIPLY);
      49             :   else
      50             :     {
      51             :       // (*this) <- M2 * (*this)
      52             :       // Where:
      53             :       // (*this) = (m x n),
      54             :       // M2      = (m x p),
      55             :       // M3      = (p x n)
      56             : 
      57             :       // M3 is a copy of *this before it gets resize()d
      58           0 :       DenseMatrix<T> M3(*this);
      59             : 
      60             :       // Resize *this so that the result can fit
      61           0 :       this->resize (M2.m(), M3.n());
      62             : 
      63             :       // Call the multiply function in the base class
      64           0 :       this->multiply(*this, M2, M3);
      65           0 :     }
      66           0 : }
      67             : 
      68             : 
      69             : 
      70             : template<typename T>
      71             : template<typename T2>
      72             : void DenseMatrix<T>::left_multiply (const DenseMatrixBase<T2> & M2)
      73             : {
      74             :   // (*this) <- M2 * (*this)
      75             :   // Where:
      76             :   // (*this) = (m x n),
      77             :   // M2      = (m x p),
      78             :   // M3      = (p x n)
      79             : 
      80             :   // M3 is a copy of *this before it gets resize()d
      81             :   DenseMatrix<T> M3(*this);
      82             : 
      83             :   // Resize *this so that the result can fit
      84             :   this->resize (M2.m(), M3.n());
      85             : 
      86             :   // Call the multiply function in the base class
      87             :   this->multiply(*this, M2, M3);
      88             : }
      89             : 
      90             : 
      91             : 
      92             : template<typename T>
      93     1381732 : void DenseMatrix<T>::left_multiply_transpose(const DenseMatrix<T> & A)
      94             : {
      95     1381732 :   if (this->use_blas_lapack)
      96     1298405 :     this->_multiply_blas(A, LEFT_MULTIPLY_TRANSPOSE);
      97             :   else
      98       83327 :     this->_left_multiply_transpose(A);
      99     1381732 : }
     100             : 
     101             : 
     102             : template<typename T>
     103             : template<typename T2>
     104             : void DenseMatrix<T>::left_multiply_transpose(const DenseMatrix<T2> & A)
     105             : {
     106             :   this->_left_multiply_transpose(A);
     107             : }
     108             : 
     109             : 
     110             : template<typename T>
     111             : template<typename T2>
     112       83327 : void DenseMatrix<T>::_left_multiply_transpose(const DenseMatrix<T2> & A)
     113             : {
     114             :   //Check to see if we are doing (A^T)*A
     115       83327 :   if (this == &A)
     116             :     {
     117             :       //libmesh_here();
     118           0 :       DenseMatrix<T> B(*this);
     119             : 
     120             :       // Simple but inefficient way
     121             :       // return this->left_multiply_transpose(B);
     122             : 
     123             :       // More efficient, but more code way
     124             :       // If A is mxn, the result will be a square matrix of Size n x n.
     125           0 :       const unsigned int n_rows = A.m();
     126           0 :       const unsigned int n_cols = A.n();
     127             : 
     128             :       // resize() *this and also zero out all entries.
     129           0 :       this->resize(n_cols,n_cols);
     130             : 
     131             :       // Compute the lower-triangular part
     132           0 :       for (unsigned int i=0; i<n_cols; ++i)
     133           0 :         for (unsigned int j=0; j<=i; ++j)
     134           0 :           for (unsigned int k=0; k<n_rows; ++k) // inner products are over n_rows
     135           0 :             (*this)(i,j) += B(k,i)*B(k,j);
     136             : 
     137             :       // Copy lower-triangular part into upper-triangular part
     138           0 :       for (unsigned int i=0; i<n_cols; ++i)
     139           0 :         for (unsigned int j=i+1; j<n_cols; ++j)
     140           0 :           (*this)(i,j) = (*this)(j,i);
     141           0 :     }
     142             : 
     143             :   else
     144             :     {
     145       83327 :       DenseMatrix<T> B(*this);
     146             : 
     147       83327 :       this->resize (A.n(), B.n());
     148             : 
     149           0 :       libmesh_assert_equal_to (A.m(), B.m());
     150           0 :       libmesh_assert_equal_to (this->m(), A.n());
     151           0 :       libmesh_assert_equal_to (this->n(), B.n());
     152             : 
     153           0 :       const unsigned int m_s = A.n();
     154           0 :       const unsigned int p_s = A.m();
     155           0 :       const unsigned int n_s = this->n();
     156             : 
     157             :       // Do it this way because there is a
     158             :       // decent chance (at least for constraint matrices)
     159             :       // that A.transpose(i,k) = 0.
     160     1199062 :       for (unsigned int i=0; i<m_s; i++)
     161    31704622 :         for (unsigned int k=0; k<p_s; k++)
     162           0 :           if (A.transpose(i,k) != 0.)
     163    27829299 :             for (unsigned int j=0; j<n_s; j++)
     164           0 :               (*this)(i,j) += A.transpose(i,k)*B(k,j);
     165       83327 :     }
     166       83327 : }
     167             : 
     168             : 
     169             : 
     170             : template<typename T>
     171     3040660 : void DenseMatrix<T>::right_multiply (const DenseMatrixBase<T> & M3)
     172             : {
     173     3040660 :   if (this->use_blas_lapack)
     174     2773009 :     this->_multiply_blas(M3, RIGHT_MULTIPLY);
     175             :   else
     176             :     {
     177             :       // (*this) <- (*this) * M3
     178             :       // Where:
     179             :       // (*this) = (m x n),
     180             :       // M2      = (m x p),
     181             :       // M3      = (p x n)
     182             : 
     183             :       // M2 is a copy of *this before it gets resize()d
     184      267651 :       DenseMatrix<T> M2(*this);
     185             : 
     186             :       // Resize *this so that the result can fit
     187      267651 :       this->resize (M2.m(), M3.n());
     188             : 
     189      267651 :       this->multiply(*this, M2, M3);
     190      267651 :     }
     191     3040660 : }
     192             : 
     193             : 
     194             : 
     195             : template<typename T>
     196             : template<typename T2>
     197             : void DenseMatrix<T>::right_multiply (const DenseMatrixBase<T2> & M3)
     198             : {
     199             :   // (*this) <- M3 * (*this)
     200             :   // Where:
     201             :   // (*this) = (m x n),
     202             :   // M2      = (m x p),
     203             :   // M3      = (p x n)
     204             : 
     205             :   // M2 is a copy of *this before it gets resize()d
     206             :   DenseMatrix<T> M2(*this);
     207             : 
     208             :   // Resize *this so that the result can fit
     209             :   this->resize (M2.m(), M3.n());
     210             : 
     211             :   this->multiply(*this, M2, M3);
     212             : }
     213             : 
     214             : 
     215             : 
     216             : 
     217             : template<typename T>
     218     1658880 : void DenseMatrix<T>::right_multiply_transpose (const DenseMatrix<T> & B)
     219             : {
     220     1658880 :   if (this->use_blas_lapack)
     221     1474560 :     this->_multiply_blas(B, RIGHT_MULTIPLY_TRANSPOSE);
     222             :   else
     223      184320 :     this->_right_multiply_transpose(B);
     224     1658880 : }
     225             : 
     226             : 
     227             : 
     228             : template<typename T>
     229             : template<typename T2>
     230             : void DenseMatrix<T>::right_multiply_transpose (const DenseMatrix<T2> & B)
     231             : {
     232             :   this->_right_multiply_transpose(B);
     233             : }
     234             : 
     235             : 
     236             : 
     237             : template<typename T>
     238             : template<typename T2>
     239      184320 : void DenseMatrix<T>::_right_multiply_transpose (const DenseMatrix<T2> & B)
     240             : {
     241             :   //Check to see if we are doing B*(B^T)
     242      184320 :   if (this == &B)
     243             :     {
     244             :       //libmesh_here();
     245           0 :       DenseMatrix<T> A(*this);
     246             : 
     247             :       // Simple but inefficient way
     248             :       // return this->right_multiply_transpose(A);
     249             : 
     250             :       // More efficient, more code
     251             :       // If B is mxn, the result will be a square matrix of Size m x m.
     252           0 :       const unsigned int n_rows = B.m();
     253           0 :       const unsigned int n_cols = B.n();
     254             : 
     255             :       // resize() *this and also zero out all entries.
     256           0 :       this->resize(n_rows,n_rows);
     257             : 
     258             :       // Compute the lower-triangular part
     259           0 :       for (unsigned int i=0; i<n_rows; ++i)
     260           0 :         for (unsigned int j=0; j<=i; ++j)
     261           0 :           for (unsigned int k=0; k<n_cols; ++k) // inner products are over n_cols
     262           0 :             (*this)(i,j) += A(i,k)*A(j,k);
     263             : 
     264             :       // Copy lower-triangular part into upper-triangular part
     265           0 :       for (unsigned int i=0; i<n_rows; ++i)
     266           0 :         for (unsigned int j=i+1; j<n_rows; ++j)
     267           0 :           (*this)(i,j) = (*this)(j,i);
     268           0 :     }
     269             : 
     270             :   else
     271             :     {
     272      184320 :       DenseMatrix<T> A(*this);
     273             : 
     274      184320 :       this->resize (A.m(), B.m());
     275             : 
     276           0 :       libmesh_assert_equal_to (A.n(), B.n());
     277           0 :       libmesh_assert_equal_to (this->m(), A.m());
     278           0 :       libmesh_assert_equal_to (this->n(), B.m());
     279             : 
     280           0 :       const unsigned int m_s = A.m();
     281           0 :       const unsigned int p_s = A.n();
     282           0 :       const unsigned int n_s = this->n();
     283             : 
     284             :       // Do it this way because there is a
     285             :       // decent chance (at least for constraint matrices)
     286             :       // that B.transpose(k,j) = 0.
     287      737280 :       for (unsigned int j=0; j<n_s; j++)
     288     3870720 :         for (unsigned int k=0; k<p_s; k++)
     289     3317760 :           if (B.transpose(k,j) != 0.)
     290     6635520 :             for (unsigned int i=0; i<m_s; i++)
     291     4976640 :               (*this)(i,j) += A(i,k)*B.transpose(k,j);
     292      184320 :     }
     293      184320 : }
     294             : 
     295             : 
     296             : 
     297             : 
     298             : template<typename T>
     299    27557982 : void DenseMatrix<T>::vector_mult (DenseVector<T> & dest,
     300             :                                   const DenseVector<T> & arg) const
     301             : {
     302             :   // Make sure the input sizes are compatible
     303     2290782 :   libmesh_assert_equal_to (this->n(), arg.size());
     304             : 
     305             :   // Resize and clear dest.
     306             :   // Note: DenseVector::resize() also zeros the vector.
     307    27557982 :   dest.resize(this->m());
     308             : 
     309             :   // Short-circuit if the matrix is empty
     310    27557982 :   if(this->m() == 0 || this->n() == 0)
     311       10000 :     return;
     312             : 
     313    27447982 :   if (this->use_blas_lapack)
     314    25633116 :     this->_matvec_blas(1., 0., dest, arg);
     315             :   else
     316             :     {
     317           0 :       const unsigned int n_rows = this->m();
     318           0 :       const unsigned int n_cols = this->n();
     319             : 
     320     6282190 :       for (unsigned int i=0; i<n_rows; i++)
     321    19415104 :         for (unsigned int j=0; j<n_cols; j++)
     322    10648800 :           dest(i) += (*this)(i,j)*arg(j);
     323             :     }
     324             : }
     325             : 
     326             : 
     327             : 
     328             : template<typename T>
     329             : template<typename T2>
     330           0 : void DenseMatrix<T>::vector_mult (DenseVector<typename CompareTypes<T,T2>::supertype> & dest,
     331             :                                   const DenseVector<T2> & arg) const
     332             : {
     333             :   // Make sure the input sizes are compatible
     334             :   libmesh_assert_equal_to (this->n(), arg.size());
     335             : 
     336             :   // Resize and clear dest.
     337             :   // Note: DenseVector::resize() also zeros the vector.
     338           0 :   dest.resize(this->m());
     339             : 
     340             :   // Short-circuit if the matrix is empty
     341           0 :   if (this->m() == 0 || this->n() == 0)
     342             :     return;
     343             : 
     344             :   const unsigned int n_rows = this->m();
     345             :   const unsigned int n_cols = this->n();
     346             : 
     347           0 :   for (unsigned int i=0; i<n_rows; i++)
     348           0 :     for (unsigned int j=0; j<n_cols; j++)
     349             :       dest(i) += (*this)(i,j)*arg(j);
     350             : }
     351             : 
     352             : 
     353             : 
     354             : template<typename T>
     355     2701351 : void DenseMatrix<T>::vector_mult_transpose (DenseVector<T> & dest,
     356             :                                             const DenseVector<T> & arg) const
     357             : {
     358             :   // Make sure the input sizes are compatible
     359      251425 :   libmesh_assert_equal_to (this->m(), arg.size());
     360             : 
     361             :   // Resize and clear dest.
     362             :   // Note: DenseVector::resize() also zeros the vector.
     363     2701351 :   dest.resize(this->n());
     364             : 
     365             :   // Short-circuit if the matrix is empty
     366     2701351 :   if (this->m() == 0)
     367           0 :     return;
     368             : 
     369     2701351 :   if (this->use_blas_lapack)
     370             :     {
     371     2575850 :       this->_matvec_blas(1., 0., dest, arg, /*trans=*/true);
     372             :     }
     373             :   else
     374             :     {
     375           0 :       const unsigned int n_rows = this->m();
     376           0 :       const unsigned int n_cols = this->n();
     377             : 
     378             :       // WORKS
     379             :       // for (unsigned int j=0; j<n_cols; j++)
     380             :       //   for (unsigned int i=0; i<n_rows; i++)
     381             :       //     dest(j) += (*this)(i,j)*arg(i);
     382             : 
     383             :       // ALSO WORKS, (i,j) just swapped
     384     2364298 :       for (unsigned int i=0; i<n_cols; i++)
     385    68843440 :         for (unsigned int j=0; j<n_rows; j++)
     386           0 :           dest(i) += (*this)(j,i)*arg(j);
     387             :     }
     388             : }
     389             : 
     390             : 
     391             : 
     392             : template<typename T>
     393             : template<typename T2>
     394           0 : void DenseMatrix<T>::vector_mult_transpose (DenseVector<typename CompareTypes<T,T2>::supertype> & dest,
     395             :                                             const DenseVector<T2> & arg) const
     396             : {
     397             :   // Make sure the input sizes are compatible
     398             :   libmesh_assert_equal_to (this->m(), arg.size());
     399             : 
     400             :   // Resize and clear dest.
     401             :   // Note: DenseVector::resize() also zeros the vector.
     402           0 :   dest.resize(this->n());
     403             : 
     404             :   // Short-circuit if the matrix is empty
     405           0 :   if (this->m() == 0)
     406             :     return;
     407             : 
     408             :   const unsigned int n_rows = this->m();
     409             :   const unsigned int n_cols = this->n();
     410             : 
     411             :   // WORKS
     412             :   // for (unsigned int j=0; j<n_cols; j++)
     413             :   //   for (unsigned int i=0; i<n_rows; i++)
     414             :   //     dest(j) += (*this)(i,j)*arg(i);
     415             : 
     416             :   // ALSO WORKS, (i,j) just swapped
     417           0 :   for (unsigned int i=0; i<n_cols; i++)
     418           0 :     for (unsigned int j=0; j<n_rows; j++)
     419             :       dest(i) += (*this)(j,i)*arg(j);
     420             : }
     421             : 
     422             : 
     423             : 
     424             : template<typename T>
     425           0 : void DenseMatrix<T>::vector_mult_add (DenseVector<T> & dest,
     426             :                                       const T factor,
     427             :                                       const DenseVector<T> & arg) const
     428             : {
     429             :   // Short-circuit if the matrix is empty
     430           0 :   if (this->m() == 0)
     431             :     {
     432           0 :       dest.resize(0);
     433           0 :       return;
     434             :     }
     435             : 
     436           0 :   if (this->use_blas_lapack)
     437           0 :     this->_matvec_blas(factor, 1., dest, arg);
     438             :   else
     439             :     {
     440           0 :       DenseVector<T> temp(arg.size());
     441           0 :       this->vector_mult(temp, arg);
     442           0 :       dest.add(factor, temp);
     443             :     }
     444             : }
     445             : 
     446             : 
     447             : 
     448             : template<typename T>
     449             : template<typename T2, typename T3>
     450           0 : void DenseMatrix<T>::vector_mult_add (DenseVector<typename CompareTypes<T, typename CompareTypes<T2,T3>::supertype>::supertype> & dest,
     451             :                                       const T2 factor,
     452             :                                       const DenseVector<T3> & arg) const
     453             : {
     454             :   // Short-circuit if the matrix is empty
     455           0 :   if (this->m() == 0)
     456             :     {
     457           0 :       dest.resize(0);
     458           0 :       return;
     459             :     }
     460             : 
     461             :   DenseVector<typename CompareTypes<T,T3>::supertype>
     462           0 :     temp(arg.size());
     463           0 :   this->vector_mult(temp, arg);
     464           0 :   dest.add(factor, temp);
     465             : }
     466             : 
     467             : 
     468             : 
     469             : template<typename T>
     470     1244422 : void DenseMatrix<T>::get_principal_submatrix (unsigned int sub_m,
     471             :                                               unsigned int sub_n,
     472             :                                               DenseMatrix<T> & dest) const
     473             : {
     474      104066 :   libmesh_assert( (sub_m <= this->m()) && (sub_n <= this->n()) );
     475             : 
     476     1140356 :   dest.resize(sub_m, sub_n);
     477    10361690 :   for (unsigned int i=0; i<sub_m; i++)
     478   102478626 :     for (unsigned int j=0; j<sub_n; j++)
     479    93361358 :       dest(i,j) = (*this)(i,j);
     480     1244422 : }
     481             : 
     482             : 
     483             : 
     484             : template<typename T>
     485          70 : void DenseMatrix<T>::outer_product (const DenseVector<T> & a,
     486             :                                     const DenseVector<T> & b)
     487             : {
     488           2 :   const unsigned int m = a.size();
     489           2 :   const unsigned int n = b.size();
     490             : 
     491          68 :   this->resize(m, n);
     492         210 :   for (unsigned int i = 0; i < m; ++i)
     493         560 :     for (unsigned int j = 0; j < n; ++j)
     494         444 :       (*this)(i,j) = a(i) * libmesh_conj(b(j));
     495          70 : }
     496             : 
     497             : 
     498             : 
     499             : template<typename T>
     500     1244422 : void DenseMatrix<T>::get_principal_submatrix (unsigned int sub_m, DenseMatrix<T> & dest) const
     501             : {
     502     1244422 :   get_principal_submatrix(sub_m, sub_m, dest);
     503     1244422 : }
     504             : 
     505             : 
     506             : 
     507             : template<typename T>
     508       35459 : void DenseMatrix<T>::get_transpose (DenseMatrix<T> & dest) const
     509             : {
     510       35459 :   dest.resize(this->n(), this->m());
     511             : 
     512      180745 :   for (auto i : make_range(dest.m()))
     513      825328 :     for (auto j : make_range(dest.n()))
     514      680042 :       dest(i,j) = (*this)(j,i);
     515       35459 : }
     516             : 
     517             : 
     518             : 
     519             : 
     520             : template<typename T>
     521     2925411 : void DenseMatrix<T>::lu_solve (const DenseVector<T> & b,
     522             :                                DenseVector<T> & x)
     523             : {
     524             :   // Check to be sure that the matrix is square before attempting
     525             :   // an LU-solve.  In general, one can compute the LU factorization of
     526             :   // a non-square matrix, but:
     527             :   //
     528             :   // Overdetermined systems (m>n) have a solution only if enough of
     529             :   // the equations are linearly-dependent.
     530             :   //
     531             :   // Underdetermined systems (m<n) typically have infinitely many
     532             :   // solutions.
     533             :   //
     534             :   // We don't want to deal with either of these ambiguous cases here...
     535      253013 :   libmesh_assert_equal_to (this->m(), this->n());
     536             : 
     537     2925411 :   switch(this->_decomposition_type)
     538             :     {
     539      807027 :     case NONE:
     540             :       {
     541      807027 :         if (this->use_blas_lapack)
     542      758098 :           this->_lu_decompose_lapack();
     543             :         else
     544       48929 :           this->_lu_decompose ();
     545       60509 :         break;
     546             :       }
     547             : 
     548     2116932 :     case LU_BLAS_LAPACK:
     549             :       {
     550             :         // Already factored, just need to call back_substitute.
     551     2116932 :         if (this->use_blas_lapack)
     552      192504 :           break;
     553             :       }
     554             :       libmesh_fallthrough();
     555             : 
     556             :     case LU:
     557             :       {
     558             :         // Already factored, just need to call back_substitute.
     559        1452 :         if (!(this->use_blas_lapack))
     560           0 :           break;
     561             :       }
     562             :       libmesh_fallthrough();
     563             : 
     564             :     default:
     565           0 :       libmesh_error_msg("Error! This matrix already has a different decomposition...");
     566             :     }
     567             : 
     568     2925411 :   if (this->use_blas_lapack)
     569     2875030 :     this->_lu_back_substitute_lapack (b, x);
     570             :   else
     571       50381 :     this->_lu_back_substitute (b, x);
     572     2925411 : }
     573             : 
     574             : 
     575             : 
     576             : 
     577             : 
     578             : 
     579             : template<typename T>
     580       50381 : void DenseMatrix<T>::_lu_back_substitute (const DenseVector<T> & b,
     581             :                                           DenseVector<T> & x ) const
     582             : {
     583             :   const unsigned int
     584           0 :     n_cols = this->n();
     585             : 
     586           0 :   libmesh_assert_equal_to (this->m(), n_cols);
     587           0 :   libmesh_assert_equal_to (this->m(), b.size());
     588             : 
     589       50381 :   x.resize (n_cols);
     590             : 
     591             :   // A convenient reference to *this
     592           0 :   const DenseMatrix<T> & A = *this;
     593             : 
     594             :   // Temporary vector storage.  We use this instead of
     595             :   // modifying the RHS.
     596           0 :   DenseVector<T> z = b;
     597             : 
     598             :   // Lower-triangular "top to bottom" solve step, taking into account pivots
     599      460467 :   for (unsigned int i=0; i<n_cols; ++i)
     600             :     {
     601             :       // Swap
     602      410086 :       if (_pivots[i] != static_cast<pivot_index_t>(i))
     603      167765 :         std::swap( z(i), z(_pivots[i]) );
     604             : 
     605      410086 :       x(i) = z(i);
     606             : 
     607     2242650 :       for (unsigned int j=0; j<i; ++j)
     608           0 :         x(i) -= A(i,j)*x(j);
     609             : 
     610           0 :       x(i) /= A(i,i);
     611             :     }
     612             : 
     613             :   // Upper-triangular "bottom to top" solve step
     614       50381 :   const unsigned int last_row = n_cols-1;
     615             : 
     616      460467 :   for (int i=last_row; i>=0; --i)
     617             :     {
     618     2242650 :       for (int j=i+1; j<static_cast<int>(n_cols); ++j)
     619     1832564 :         x(i) -= A(i,j)*x(j);
     620             :     }
     621       50381 : }
     622             : 
     623             : 
     624             : 
     625             : 
     626             : 
     627             : 
     628             : 
     629             : 
     630             : template<typename T>
     631       48929 : void DenseMatrix<T>::_lu_decompose ()
     632             : {
     633             :   // If this function was called, there better not be any
     634             :   // previous decomposition of the matrix.
     635           0 :   libmesh_assert_equal_to (this->_decomposition_type, NONE);
     636             : 
     637             :   // Get the matrix size and make sure it is square
     638             :   const unsigned int
     639           0 :     n_rows = this->m();
     640             : 
     641             :   // A convenient reference to *this
     642           0 :   DenseMatrix<T> & A = *this;
     643             : 
     644       48929 :   _pivots.resize(n_rows);
     645             : 
     646      450303 :   for (unsigned int i=0; i<n_rows; ++i)
     647             :     {
     648             :       // Find the pivot row by searching down the i'th column
     649      401374 :       _pivots[i] = i;
     650             : 
     651             :       // std::abs(complex) must return a Real!
     652           0 :       auto the_max = std::abs( A(i,i) );
     653     2212158 :       for (unsigned int j=i+1; j<n_rows; ++j)
     654             :         {
     655           0 :           auto candidate_max = std::abs( A(j,i) );
     656     1810784 :           if (the_max < candidate_max)
     657             :             {
     658           0 :               the_max = candidate_max;
     659      234548 :               _pivots[i] = j;
     660             :             }
     661             :         }
     662             : 
     663             :       // libMesh::out << "the_max=" << the_max << " found at row " << _pivots[i] << std::endl;
     664             : 
     665             :       // If the max was found in a different row, interchange rows.
     666             :       // Here we interchange the *entire* row, in Gaussian elimination
     667             :       // you would only interchange the subrows A(i,j) and A(p(i),j), for j>i
     668      401374 :       if (_pivots[i] != static_cast<pivot_index_t>(i))
     669             :         {
     670     1764815 :           for (unsigned int j=0; j<n_rows; ++j)
     671     1598400 :             std::swap( A(i,j), A(_pivots[i], j) );
     672             :         }
     673             : 
     674             :       // If the max abs entry found is zero, the matrix is singular
     675           0 :       libmesh_error_msg_if(A(i,i) == libMesh::zero, "Matrix A is singular!");
     676             : 
     677             :       // Scale upper triangle entries of row i by the diagonal entry
     678             :       // Note: don't scale the diagonal entry itself!
     679           0 :       const T diag_inv = 1. / A(i,i);
     680     2212158 :       for (unsigned int j=i+1; j<n_rows; ++j)
     681           0 :         A(i,j) *= diag_inv;
     682             : 
     683             :       // Update the remaining sub-matrix A[i+1:m][i+1:m]
     684             :       // by subtracting off (the diagonal-scaled)
     685             :       // upper-triangular part of row i, scaled by the
     686             :       // i'th column entry of each row.  In terms of
     687             :       // row operations, this is:
     688             :       // for each r > i
     689             :       //   SubRow(r) = SubRow(r) - A(r,i)*SubRow(i)
     690             :       //
     691             :       // If we were scaling the i'th column as well, like
     692             :       // in Gaussian elimination, this would 'zero' the
     693             :       // entry in the i'th column.
     694     2212158 :       for (unsigned int row=i+1; row<n_rows; ++row)
     695    14771792 :         for (unsigned int col=i+1; col<n_rows; ++col)
     696           0 :           A(row,col) -= A(row,i) * A(i,col);
     697             : 
     698             :     } // end i loop
     699             : 
     700             :   // Set the flag for LU decomposition
     701       48929 :   this->_decomposition_type = LU;
     702       48929 : }
     703             : 
     704             : 
     705             : 
     706             : template<typename T>
     707           0 : void DenseMatrix<T>::svd (DenseVector<Real> & sigma)
     708             : {
     709             :   // We use the LAPACK svd implementation
     710           0 :   _svd_lapack(sigma);
     711           0 : }
     712             : 
     713             : 
     714             : template<typename T>
     715        1470 : void DenseMatrix<T>::svd (DenseVector<Real> & sigma,
     716             :                           DenseMatrix<Number> & U,
     717             :                           DenseMatrix<Number> & VT)
     718             : {
     719             :   // We use the LAPACK svd implementation
     720        1470 :   _svd_lapack(sigma, U, VT);
     721        1470 : }
     722             : 
     723             : 
     724             : 
     725             : template<typename T>
     726        5654 : void DenseMatrix<T>::svd_solve(const DenseVector<T> & rhs,
     727             :                                DenseVector<T> & x,
     728             :                                Real rcond) const
     729             : {
     730        5654 :   _svd_solve_lapack(rhs, x, rcond);
     731        5654 : }
     732             : 
     733             : 
     734             : 
     735             : template<typename T>
     736           0 : void DenseMatrix<T>::evd (DenseVector<T> & lambda_real,
     737             :                           DenseVector<T> & lambda_imag)
     738             : {
     739             :   // We use the LAPACK eigenvalue problem implementation
     740           0 :   _evd_lapack(lambda_real, lambda_imag);
     741           0 : }
     742             : 
     743             : 
     744             : 
     745             : template<typename T>
     746           0 : void DenseMatrix<T>::evd_left(DenseVector<T> & lambda_real,
     747             :                               DenseVector<T> & lambda_imag,
     748             :                               DenseMatrix<T> & VL)
     749             : {
     750             :   // We use the LAPACK eigenvalue problem implementation
     751           0 :   _evd_lapack(lambda_real, lambda_imag, &VL, nullptr);
     752           0 : }
     753             : 
     754             : 
     755             : 
     756             : template<typename T>
     757           0 : void DenseMatrix<T>::evd_right(DenseVector<T> & lambda_real,
     758             :                                DenseVector<T> & lambda_imag,
     759             :                                DenseMatrix<T> & VR)
     760             : {
     761             :   // We use the LAPACK eigenvalue problem implementation
     762           0 :   _evd_lapack(lambda_real, lambda_imag, nullptr, &VR);
     763           0 : }
     764             : 
     765             : 
     766             : 
     767             : template<typename T>
     768         140 : void DenseMatrix<T>::evd_left_and_right(DenseVector<T> & lambda_real,
     769             :                                         DenseVector<T> & lambda_imag,
     770             :                                         DenseMatrix<T> & VL,
     771             :                                         DenseMatrix<T> & VR)
     772             : {
     773             :   // We use the LAPACK eigenvalue problem implementation
     774         140 :   _evd_lapack(lambda_real, lambda_imag, &VL, &VR);
     775         140 : }
     776             : 
     777             : 
     778             : 
     779             : template<typename T>
     780           0 : T DenseMatrix<T>::det ()
     781             : {
     782           0 :   switch(this->_decomposition_type)
     783             :     {
     784           0 :     case NONE:
     785             :       {
     786             :         // First LU decompose the matrix.
     787             :         // Note that the lu_decompose routine will check to see if the
     788             :         // matrix is square so we don't worry about it.
     789           0 :         if (this->use_blas_lapack)
     790           0 :           this->_lu_decompose_lapack();
     791             :         else
     792           0 :           this->_lu_decompose ();
     793             :       }
     794             :     case LU:
     795             :     case LU_BLAS_LAPACK:
     796             :       {
     797             :         // Already decomposed, don't do anything
     798           0 :         break;
     799             :       }
     800           0 :     default:
     801           0 :       libmesh_error_msg("Error! Can't compute the determinant under the current decomposition.");
     802             :     }
     803             : 
     804             :   // A variable to keep track of the running product of diagonal terms.
     805           0 :   T determinant = 1.;
     806             : 
     807             :   // Loop over diagonal terms, computing the product.  In practice,
     808             :   // be careful because this value could easily become too large to
     809             :   // fit in a double or float.  To be safe, one should keep track of
     810             :   // the power (of 10) of the determinant in a separate variable
     811             :   // and maintain an order 1 value for the determinant itself.
     812           0 :   unsigned int n_interchanges = 0;
     813           0 :   for (auto i : make_range(this->m()))
     814             :     {
     815           0 :       if (this->_decomposition_type==LU)
     816           0 :         if (_pivots[i] != static_cast<pivot_index_t>(i))
     817           0 :           n_interchanges++;
     818             : 
     819             :       // Lapack pivots are 1-based!
     820           0 :       if (this->_decomposition_type==LU_BLAS_LAPACK)
     821           0 :         if (_pivots[i] != static_cast<pivot_index_t>(i+1))
     822           0 :           n_interchanges++;
     823             : 
     824           0 :       determinant *= (*this)(i,i);
     825             :     }
     826             : 
     827             :   // Compute sign of determinant, depends on number of row interchanges!
     828             :   // The sign should be (-1)^{n}, where n is the number of interchanges.
     829           0 :   Real sign = n_interchanges % 2 == 0 ? 1. : -1.;
     830             : 
     831           0 :   return sign*determinant;
     832             : }
     833             : 
     834             : 
     835             : 
     836             : // The cholesky solve function first decomposes the matrix
     837             : // with cholesky_decompose and then uses the cholesky_back_substitute
     838             : // routine to find the solution x.
     839             : template <typename T>
     840             : template <typename T2>
     841     1030654 : void DenseMatrix<T>::cholesky_solve (const DenseVector<T2> & b,
     842             :                                      DenseVector<T2> & x)
     843             : {
     844             :   // Check for a previous decomposition
     845     1030654 :   switch(this->_decomposition_type)
     846             :     {
     847      771942 :     case NONE:
     848             :       {
     849      771942 :         this->_cholesky_decompose ();
     850      771942 :         break;
     851             :       }
     852             : 
     853       21459 :     case CHOLESKY:
     854             :       {
     855             :         // Already factored, just need to call back_substitute.
     856       21459 :         break;
     857             :       }
     858             : 
     859           0 :     default:
     860           0 :       libmesh_error_msg("Error! This matrix already has a different decomposition...");
     861             :     }
     862             : 
     863             :   // Perform back substitution
     864     1030654 :   this->_cholesky_back_substitute (b, x);
     865     1030654 : }
     866             : 
     867             : 
     868             : 
     869             : 
     870             : // This algorithm is based on the Cholesky decomposition in
     871             : // the Numerical Recipes in C book.
     872             : template<typename T>
     873      771942 : void DenseMatrix<T>::_cholesky_decompose ()
     874             : {
     875             :   // If we called this function, there better not be any
     876             :   // previous decomposition of the matrix.
     877       62300 :   libmesh_assert_equal_to (this->_decomposition_type, NONE);
     878             : 
     879             :   // Shorthand notation for number of rows and columns.
     880             :   const unsigned int
     881      124603 :     n_rows = this->m(),
     882      124603 :     n_cols = this->n();
     883             : 
     884             :   // Just to be really sure...
     885       62300 :   libmesh_assert_equal_to (n_rows, n_cols);
     886             : 
     887             :   // A convenient reference to *this
     888       62300 :   DenseMatrix<T> & A = *this;
     889             : 
     890     5675487 :   for (unsigned int i=0; i<n_rows; ++i)
     891             :     {
     892    49404460 :       for (unsigned int j=i; j<n_cols; ++j)
     893             :         {
     894   541780581 :           for (unsigned int k=0; k<i; ++k)
     895   579266408 :             A(i,j) -= A(i,k) * A(j,k);
     896             : 
     897    44500915 :           if (i == j)
     898             :             {
     899             : #ifndef LIBMESH_USE_COMPLEX_NUMBERS
     900     4520276 :               libmesh_error_msg_if(A(i,j) <= 0.0,
     901             :                                    "Error! Can only use Cholesky decomposition with symmetric positive definite matrices.");
     902             : #endif
     903             : 
     904     5299322 :               A(i,i) = std::sqrt(A(i,j));
     905             :             }
     906             :           else
     907    46046486 :             A(j,i) = A(i,j) / A(i,i);
     908             :         }
     909             :     }
     910             : 
     911             :   // Set the flag for CHOLESKY decomposition
     912      771942 :   this->_decomposition_type = CHOLESKY;
     913      771942 : }
     914             : 
     915             : 
     916             : 
     917             : template <typename T>
     918             : template <typename T2>
     919     1030654 : void DenseMatrix<T>::_cholesky_back_substitute (const DenseVector<T2> & b,
     920             :                                                 DenseVector<T2> & x) const
     921             : {
     922             :   // Shorthand notation for number of rows and columns.
     923             :   const unsigned int
     924      167521 :     n_rows = this->m(),
     925      167521 :     n_cols = this->n();
     926             : 
     927             :   // Just to be really sure...
     928       83759 :   libmesh_assert_equal_to (n_rows, n_cols);
     929             : 
     930             :   // A convenient reference to *this
     931       83759 :   const DenseMatrix<T> & A = *this;
     932             : 
     933             :   // Now compute the solution to Ax =b using the factorization.
     934      946892 :   x.resize(n_rows);
     935             : 
     936             :   // Solve for Ly=b
     937    10352851 :   for (unsigned int i=0; i<n_cols; ++i)
     938             :     {
     939     9322197 :       T2 temp = b(i);
     940             : 
     941   147704947 :       for (unsigned int k=0; k<i; ++k)
     942   135372260 :         temp -= A(i,k)*x(k);
     943             : 
     944    10054410 :       x(i) = temp / A(i,i);
     945             :     }
     946             : 
     947             :   // Solve for L^T x = y
     948    10352851 :   for (unsigned int i=0; i<n_cols; ++i)
     949             :     {
     950     9322197 :       const unsigned int ib = (n_cols-1)-i;
     951             : 
     952   147704947 :       for (unsigned int k=(ib+1); k<n_cols; ++k)
     953   146369313 :         x(ib) -= A(k,ib) * x(k);
     954             : 
     955     8963391 :       x(ib) /= A(ib,ib);
     956             :     }
     957     1030654 : }
     958             : 
     959             : 
     960             : 
     961             : 
     962             : 
     963             : 
     964             : 
     965             : 
     966             : // This routine is commented out since it is not really a memory
     967             : // efficient implementation.  Also, you don't *need* the inverse
     968             : // for anything, instead just use lu_solve to solve Ax=b.
     969             : // template<typename T>
     970             : // void DenseMatrix<T>::inverse ()
     971             : // {
     972             : //   // First LU decompose the matrix
     973             : //   // Note that the lu_decompose routine will check to see if the
     974             : //   // matrix is square so we don't worry about it.
     975             : //   if (!this->_lu_decomposed)
     976             : //     this->_lu_decompose();
     977             : 
     978             : //   // A unit vector which will be used as a rhs
     979             : //   // to pick off a single value each time.
     980             : //   DenseVector<T> e;
     981             : //   e.resize(this->m());
     982             : 
     983             : //   // An empty vector which will be used to hold the solution
     984             : //   // to the back substitutions.
     985             : //   DenseVector<T> x;
     986             : //   x.resize(this->m());
     987             : 
     988             : //   // An empty dense matrix to store the resulting inverse
     989             : //   // temporarily until we can overwrite A.
     990             : //   DenseMatrix<T> inv;
     991             : //   inv.resize(this->m(), this->n());
     992             : 
     993             : //   // Resize the passed in matrix to hold the inverse
     994             : //   inv.resize(this->m(), this->n());
     995             : 
     996             : //   for (unsigned int j=0; j<this->n(); ++j)
     997             : //     {
     998             : //       e.zero();
     999             : //       e(j) = 1.;
    1000             : //       this->_lu_back_substitute(e, x, false);
    1001             : //       for (unsigned int i=0; i<this->n(); ++i)
    1002             : // inv(i,j) = x(i);
    1003             : //     }
    1004             : 
    1005             : //   // Now overwrite all the entries
    1006             : //   *this = inv;
    1007             : // }
    1008             : 
    1009             : 
    1010             : } // namespace libMesh
    1011             : 
    1012             : #endif // LIBMESH_DENSE_MATRIX_IMPL_H

Generated by: LCOV version 1.14