doxygen/libmesh/dense__matrix__impl_8h_source.html

// The libMesh Finite Element Library.

// Copyright (C) 2002-2019 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner


// This library is free software; you can redistribute it and/or

// modify it under the terms of the GNU Lesser General Public

// License as published by the Free Software Foundation; either

// version 2.1 of the License, or (at your option) any later version.


// This library is distributed in the hope that it will be useful,

// but WITHOUT ANY WARRANTY; without even the implied warranty of

// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

// Lesser General Public License for more details.


// You should have received a copy of the GNU Lesser General Public

// License along with this library; if not, write to the Free Software

// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


#ifndef LIBMESH_DENSE_MATRIX_IMPL_H

#define LIBMESH_DENSE_MATRIX_IMPL_H


// C++ Includes

#include <cstdlib> // *must* precede <cmath> for proper std:abs() on PGI, Sun Studio CC

#include <cmath> // for sqrt


// Local Includes

#include "libmesh/dense_matrix.h"

#include "libmesh/dense_vector.h"

#include "libmesh/int_range.h"

#include "libmesh/libmesh.h"


#ifdef LIBMESH_HAVE_METAPHYSICL

namespace MetaPhysicL

{

template <typename, typename>

class DualNumber;

}

namespace std

{

// When instantiating a DenseMatrix<DualNumber> we need these declarations visible

// in order to compile the DenseMatrix<T>::_cholesky_decompose method

template <typename T, typename D>

MetaPhysicL::DualNumber<T, D> sqrt(const MetaPhysicL::DualNumber<T, D> & in);

template <typename T, typename D>

MetaPhysicL::DualNumber<T, D> sqrt(MetaPhysicL::DualNumber<T, D> && in);

}

#endif


namespace libMesh

{


// ------------------------------------------------------------

// Dense Matrix member functions


template<typename T>

void DenseMatrix<T>::left_multiply (const DenseMatrixBase<T> & M2)

{

  if (this->use_blas_lapack)

    this->_multiply_blas(M2, LEFT_MULTIPLY);

  else

    {

      // (*this) <- M2 * (*this)

      // Where:

      // (*this) = (m x n),

      // M2      = (m x p),

      // M3      = (p x n)


      // M3 is a copy of *this before it gets resize()d

      DenseMatrix<T> M3(*this);


      // Resize *this so that the result can fit

      this->resize (M2.m(), M3.n());


      // Call the multiply function in the base class

      this->multiply(*this, M2, M3);

    }

}


template<typename T>

template<typename T2>

void DenseMatrix<T>::left_multiply (const DenseMatrixBase<T2> & M2)

{

  // (*this) <- M2 * (*this)

  // Where:

  // (*this) = (m x n),

  // M2      = (m x p),

  // M3      = (p x n)


  // M3 is a copy of *this before it gets resize()d

  DenseMatrix<T> M3(*this);


  // Resize *this so that the result can fit

  this->resize (M2.m(), M3.n());


  // Call the multiply function in the base class

  this->multiply(*this, M2, M3);

}


template<typename T>

void DenseMatrix<T>::left_multiply_transpose(const DenseMatrix<T> & A)

{

  if (this->use_blas_lapack)

    this->_multiply_blas(A, LEFT_MULTIPLY_TRANSPOSE);

  else

    {

      //Check to see if we are doing (A^T)*A

      if (this == &A)

        {

          //libmesh_here();

          DenseMatrix<T> B(*this);


          // Simple but inefficient way

          // return this->left_multiply_transpose(B);


          // More efficient, but more code way

          // If A is mxn, the result will be a square matrix of Size n x n.

          const unsigned int n_rows = A.m();

          const unsigned int n_cols = A.n();


          // resize() *this and also zero out all entries.

          this->resize(n_cols,n_cols);


          // Compute the lower-triangular part

          for (unsigned int i=0; i<n_cols; ++i)

            for (unsigned int j=0; j<=i; ++j)

              for (unsigned int k=0; k<n_rows; ++k) // inner products are over n_rows

                (*this)(i,j) += B(k,i)*B(k,j);


          // Copy lower-triangular part into upper-triangular part

          for (unsigned int i=0; i<n_cols; ++i)

            for (unsigned int j=i+1; j<n_cols; ++j)

              (*this)(i,j) = (*this)(j,i);

        }


      else

        {

          DenseMatrix<T> B(*this);


          this->resize (A.n(), B.n());


          libmesh_assert_equal_to (A.m(), B.m());

          libmesh_assert_equal_to (this->m(), A.n());

          libmesh_assert_equal_to (this->n(), B.n());


          const unsigned int m_s = A.n();

          const unsigned int p_s = A.m();

          const unsigned int n_s = this->n();


          // Do it this way because there is a

          // decent chance (at least for constraint matrices)

          // that A.transpose(i,k) = 0.

          for (unsigned int i=0; i<m_s; i++)

            for (unsigned int k=0; k<p_s; k++)

              if (A.transpose(i,k) != 0.)

                for (unsigned int j=0; j<n_s; j++)

                  (*this)(i,j) += A.transpose(i,k)*B(k,j);

        }

    }


}


template<typename T>

template<typename T2>

void DenseMatrix<T>::left_multiply_transpose(const DenseMatrix<T2> & A)

{

  //Check to see if we are doing (A^T)*A

  if (this == &A)

    {

      //libmesh_here();

      DenseMatrix<T> B(*this);


      // Simple but inefficient way

      // return this->left_multiply_transpose(B);


      // More efficient, but more code way

      // If A is mxn, the result will be a square matrix of Size n x n.

      const unsigned int n_rows = A.m();

      const unsigned int n_cols = A.n();


      // resize() *this and also zero out all entries.

      this->resize(n_cols,n_cols);


      // Compute the lower-triangular part

      for (unsigned int i=0; i<n_cols; ++i)

        for (unsigned int j=0; j<=i; ++j)

          for (unsigned int k=0; k<n_rows; ++k) // inner products are over n_rows

            (*this)(i,j) += B(k,i)*B(k,j);


      // Copy lower-triangular part into upper-triangular part

      for (unsigned int i=0; i<n_cols; ++i)

        for (unsigned int j=i+1; j<n_cols; ++j)

          (*this)(i,j) = (*this)(j,i);

    }


  else

    {

      DenseMatrix<T> B(*this);


      this->resize (A.n(), B.n());


      libmesh_assert_equal_to (A.m(), B.m());

      libmesh_assert_equal_to (this->m(), A.n());

      libmesh_assert_equal_to (this->n(), B.n());


      const unsigned int m_s = A.n();

      const unsigned int p_s = A.m();

      const unsigned int n_s = this->n();


      // Do it this way because there is a

      // decent chance (at least for constraint matrices)

      // that A.transpose(i,k) = 0.

      for (unsigned int i=0; i<m_s; i++)

        for (unsigned int k=0; k<p_s; k++)

          if (A.transpose(i,k) != 0.)

            for (unsigned int j=0; j<n_s; j++)

              (*this)(i,j) += A.transpose(i,k)*B(k,j);

    }

}


template<typename T>

void DenseMatrix<T>::right_multiply (const DenseMatrixBase<T> & M3)

{

  if (this->use_blas_lapack)

    this->_multiply_blas(M3, RIGHT_MULTIPLY);

  else

    {

      // (*this) <- (*this) * M3

      // Where:

      // (*this) = (m x n),

      // M2      = (m x p),

      // M3      = (p x n)


      // M2 is a copy of *this before it gets resize()d

      DenseMatrix<T> M2(*this);


      // Resize *this so that the result can fit

      this->resize (M2.m(), M3.n());


      this->multiply(*this, M2, M3);

    }

}


template<typename T>

template<typename T2>

void DenseMatrix<T>::right_multiply (const DenseMatrixBase<T2> & M3)

{

  // (*this) <- M3 * (*this)

  // Where:

  // (*this) = (m x n),

  // M2      = (m x p),

  // M3      = (p x n)


  // M2 is a copy of *this before it gets resize()d

  DenseMatrix<T> M2(*this);


  // Resize *this so that the result can fit

  this->resize (M2.m(), M3.n());


  this->multiply(*this, M2, M3);

}


template<typename T>

void DenseMatrix<T>::right_multiply_transpose (const DenseMatrix<T> & B)

{

  if (this->use_blas_lapack)

    this->_multiply_blas(B, RIGHT_MULTIPLY_TRANSPOSE);

  else

    {

      //Check to see if we are doing B*(B^T)

      if (this == &B)

        {

          //libmesh_here();

          DenseMatrix<T> A(*this);


          // Simple but inefficient way

          // return this->right_multiply_transpose(A);


          // More efficient, more code

          // If B is mxn, the result will be a square matrix of Size m x m.

          const unsigned int n_rows = B.m();

          const unsigned int n_cols = B.n();


          // resize() *this and also zero out all entries.

          this->resize(n_rows,n_rows);


          // Compute the lower-triangular part

          for (unsigned int i=0; i<n_rows; ++i)

            for (unsigned int j=0; j<=i; ++j)

              for (unsigned int k=0; k<n_cols; ++k) // inner products are over n_cols

                (*this)(i,j) += A(i,k)*A(j,k);


          // Copy lower-triangular part into upper-triangular part

          for (unsigned int i=0; i<n_rows; ++i)

            for (unsigned int j=i+1; j<n_rows; ++j)

              (*this)(i,j) = (*this)(j,i);

        }


      else

        {

          DenseMatrix<T> A(*this);


          this->resize (A.m(), B.m());


          libmesh_assert_equal_to (A.n(), B.n());

          libmesh_assert_equal_to (this->m(), A.m());

          libmesh_assert_equal_to (this->n(), B.m());


          const unsigned int m_s = A.m();

          const unsigned int p_s = A.n();

          const unsigned int n_s = this->n();


          // Do it this way because there is a

          // decent chance (at least for constraint matrices)

          // that B.transpose(k,j) = 0.

          for (unsigned int j=0; j<n_s; j++)

            for (unsigned int k=0; k<p_s; k++)

              if (B.transpose(k,j) != 0.)

                for (unsigned int i=0; i<m_s; i++)

                  (*this)(i,j) += A(i,k)*B.transpose(k,j);

        }

    }

}


template<typename T>

template<typename T2>

void DenseMatrix<T>::right_multiply_transpose (const DenseMatrix<T2> & B)

{

  //Check to see if we are doing B*(B^T)

  if (this == &B)

    {

      //libmesh_here();

      DenseMatrix<T> A(*this);


      // Simple but inefficient way

      // return this->right_multiply_transpose(A);


      // More efficient, more code

      // If B is mxn, the result will be a square matrix of Size m x m.

      const unsigned int n_rows = B.m();

      const unsigned int n_cols = B.n();


      // resize() *this and also zero out all entries.

      this->resize(n_rows,n_rows);


      // Compute the lower-triangular part

      for (unsigned int i=0; i<n_rows; ++i)

        for (unsigned int j=0; j<=i; ++j)

          for (unsigned int k=0; k<n_cols; ++k) // inner products are over n_cols

            (*this)(i,j) += A(i,k)*A(j,k);


      // Copy lower-triangular part into upper-triangular part

      for (unsigned int i=0; i<n_rows; ++i)

        for (unsigned int j=i+1; j<n_rows; ++j)

          (*this)(i,j) = (*this)(j,i);

    }


  else

    {

      DenseMatrix<T> A(*this);


      this->resize (A.m(), B.m());


      libmesh_assert_equal_to (A.n(), B.n());

      libmesh_assert_equal_to (this->m(), A.m());

      libmesh_assert_equal_to (this->n(), B.m());


      const unsigned int m_s = A.m();

      const unsigned int p_s = A.n();

      const unsigned int n_s = this->n();


      // Do it this way because there is a

      // decent chance (at least for constraint matrices)

      // that B.transpose(k,j) = 0.

      for (unsigned int j=0; j<n_s; j++)

        for (unsigned int k=0; k<p_s; k++)

          if (B.transpose(k,j) != 0.)

            for (unsigned int i=0; i<m_s; i++)

              (*this)(i,j) += A(i,k)*B.transpose(k,j);

    }

}


template<typename T>

void DenseMatrix<T>::vector_mult (DenseVector<T> & dest,

                                  const DenseVector<T> & arg) const

{

  // Make sure the input sizes are compatible

  libmesh_assert_equal_to (this->n(), arg.size());


  // Resize and clear dest.

  // Note: DenseVector::resize() also zeros the vector.

  dest.resize(this->m());


  // Short-circuit if the matrix is empty

  if(this->m() == 0 || this->n() == 0)

    return;


  if (this->use_blas_lapack)

    this->_matvec_blas(1., 0., dest, arg);

  else

    {

      const unsigned int n_rows = this->m();

      const unsigned int n_cols = this->n();


      for (unsigned int i=0; i<n_rows; i++)

        for (unsigned int j=0; j<n_cols; j++)

          dest(i) += (*this)(i,j)*arg(j);

    }

}


template<typename T>

template<typename T2>

void DenseMatrix<T>::vector_mult (DenseVector<typename CompareTypes<T,T2>::supertype> & dest,

                                  const DenseVector<T2> & arg) const

{

  // Make sure the input sizes are compatible

  libmesh_assert_equal_to (this->n(), arg.size());


  // Resize and clear dest.

  // Note: DenseVector::resize() also zeros the vector.

  dest.resize(this->m());


  // Short-circuit if the matrix is empty

  if (this->m() == 0 || this->n() == 0)

    return;


  const unsigned int n_rows = this->m();

  const unsigned int n_cols = this->n();


  for (unsigned int i=0; i<n_rows; i++)

    for (unsigned int j=0; j<n_cols; j++)

      dest(i) += (*this)(i,j)*arg(j);

}


template<typename T>

void DenseMatrix<T>::vector_mult_transpose (DenseVector<T> & dest,

                                            const DenseVector<T> & arg) const

{

  // Make sure the input sizes are compatible

  libmesh_assert_equal_to (this->m(), arg.size());


  // Resize and clear dest.

  // Note: DenseVector::resize() also zeros the vector.

  dest.resize(this->n());


  // Short-circuit if the matrix is empty

  if (this->m() == 0)

    return;


  if (this->use_blas_lapack)

    {

      this->_matvec_blas(1., 0., dest, arg, /*trans=*/true);

    }

  else

    {

      const unsigned int n_rows = this->m();

      const unsigned int n_cols = this->n();


      // WORKS

      // for (unsigned int j=0; j<n_cols; j++)

      //   for (unsigned int i=0; i<n_rows; i++)

      //     dest(j) += (*this)(i,j)*arg(i);


      // ALSO WORKS, (i,j) just swapped

      for (unsigned int i=0; i<n_cols; i++)

        for (unsigned int j=0; j<n_rows; j++)

          dest(i) += (*this)(j,i)*arg(j);

    }

}


template<typename T>

template<typename T2>

void DenseMatrix<T>::vector_mult_transpose (DenseVector<typename CompareTypes<T,T2>::supertype> & dest,

                                            const DenseVector<T2> & arg) const

{

  // Make sure the input sizes are compatible

  libmesh_assert_equal_to (this->m(), arg.size());


  // Resize and clear dest.

  // Note: DenseVector::resize() also zeros the vector.

  dest.resize(this->n());


  // Short-circuit if the matrix is empty

  if (this->m() == 0)

    return;


  const unsigned int n_rows = this->m();

  const unsigned int n_cols = this->n();


  // WORKS

  // for (unsigned int j=0; j<n_cols; j++)

  //   for (unsigned int i=0; i<n_rows; i++)

  //     dest(j) += (*this)(i,j)*arg(i);


  // ALSO WORKS, (i,j) just swapped

  for (unsigned int i=0; i<n_cols; i++)

    for (unsigned int j=0; j<n_rows; j++)

      dest(i) += (*this)(j,i)*arg(j);

}


template<typename T>

void DenseMatrix<T>::vector_mult_add (DenseVector<T> & dest,

                                      const T factor,

                                      const DenseVector<T> & arg) const

{

  // Short-circuit if the matrix is empty

  if (this->m() == 0)

    {

      dest.resize(0);

      return;

    }


  if (this->use_blas_lapack)

    this->_matvec_blas(factor, 1., dest, arg);

  else

    {

      DenseVector<T> temp(arg.size());

      this->vector_mult(temp, arg);

      dest.add(factor, temp);

    }

}


template<typename T>

template<typename T2, typename T3>

void DenseMatrix<T>::vector_mult_add (DenseVector<typename CompareTypes<T, typename CompareTypes<T2,T3>::supertype>::supertype> & dest,

                                      const T2 factor,

                                      const DenseVector<T3> & arg) const

{

  // Short-circuit if the matrix is empty

  if (this->m() == 0)

    {

      dest.resize(0);

      return;

    }


  DenseVector<typename CompareTypes<T,T3>::supertype>

    temp(arg.size());

  this->vector_mult(temp, arg);

  dest.add(factor, temp);

}


template<typename T>

void DenseMatrix<T>::get_principal_submatrix (unsigned int sub_m,

                                              unsigned int sub_n,

                                              DenseMatrix<T> & dest) const

{

  libmesh_assert( (sub_m <= this->m()) && (sub_n <= this->n()) );


  dest.resize(sub_m, sub_n);

  for (unsigned int i=0; i<sub_m; i++)

    for (unsigned int j=0; j<sub_n; j++)

      dest(i,j) = (*this)(i,j);

}


template<typename T>

void DenseMatrix<T>::outer_product (const DenseVector<T> & a,

                                    const DenseVector<T> & b)

{

  const unsigned int m = a.size();

  const unsigned int n = b.size();


  this->resize(m, n);

  for (unsigned int i = 0; i < m; ++i)

    for (unsigned int j = 0; j < n; ++j)

      (*this)(i,j) = a(i) * libmesh_conj(b(j));

}


template<typename T>

void DenseMatrix<T>::get_principal_submatrix (unsigned int sub_m, DenseMatrix<T> & dest) const

{

  get_principal_submatrix(sub_m, sub_m, dest);

}


template<typename T>

void DenseMatrix<T>::get_transpose (DenseMatrix<T> & dest) const

{

  dest.resize(this->n(), this->m());


  for (auto i : IntRange<unsigned int>(0, dest.m()))

    for (auto j : IntRange<unsigned int>(0, dest.n()))

      dest(i,j) = (*this)(j,i);

}


template<typename T>

void DenseMatrix<T>::lu_solve (const DenseVector<T> & b,

                               DenseVector<T> & x)

{

  // Check to be sure that the matrix is square before attempting

  // an LU-solve.  In general, one can compute the LU factorization of

  // a non-square matrix, but:

  //

  // Overdetermined systems (m>n) have a solution only if enough of

  // the equations are linearly-dependent.

  //

  // Underdetermined systems (m<n) typically have infinitely many

  // solutions.

  //

  // We don't want to deal with either of these ambiguous cases here...

  libmesh_assert_equal_to (this->m(), this->n());


  switch(this->_decomposition_type)

    {

    case NONE:

      {

        if (this->use_blas_lapack)

          this->_lu_decompose_lapack();

        else

          this->_lu_decompose ();

        break;

      }


    case LU_BLAS_LAPACK:

      {

        // Already factored, just need to call back_substitute.

        if (this->use_blas_lapack)

          break;

      }

      libmesh_fallthrough();


    case LU:

      {

        // Already factored, just need to call back_substitute.

        if (!(this->use_blas_lapack))

          break;

      }

      libmesh_fallthrough();


    default:

      libmesh_error_msg("Error! This matrix already has a different decomposition...");

    }


  if (this->use_blas_lapack)

    this->_lu_back_substitute_lapack (b, x);

  else

    this->_lu_back_substitute (b, x);

}


template<typename T>

void DenseMatrix<T>::_lu_back_substitute (const DenseVector<T> & b,

                                          DenseVector<T> & x ) const

{

  const unsigned int

    n_cols = this->n();


  libmesh_assert_equal_to (this->m(), n_cols);

  libmesh_assert_equal_to (this->m(), b.size());


  x.resize (n_cols);


  // A convenient reference to *this

  const DenseMatrix<T> & A = *this;


  // Temporary vector storage.  We use this instead of

  // modifying the RHS.

  DenseVector<T> z = b;


  // Lower-triangular "top to bottom" solve step, taking into account pivots

  for (unsigned int i=0; i<n_cols; ++i)

    {

      // Swap

      if (_pivots[i] != static_cast<pivot_index_t>(i))

        std::swap( z(i), z(_pivots[i]) );


      x(i) = z(i);


      for (unsigned int j=0; j<i; ++j)

        x(i) -= A(i,j)*x(j);


      x(i) /= A(i,i);

    }


  // Upper-triangular "bottom to top" solve step

  const unsigned int last_row = n_cols-1;


  for (int i=last_row; i>=0; --i)

    {

      for (int j=i+1; j<static_cast<int>(n_cols); ++j)

        x(i) -= A(i,j)*x(j);

    }

}


template<typename T>

void DenseMatrix<T>::_lu_decompose ()

{

  // If this function was called, there better not be any

  // previous decomposition of the matrix.

  libmesh_assert_equal_to (this->_decomposition_type, NONE);


  // Get the matrix size and make sure it is square

  const unsigned int

    n_rows = this->m();


  // A convenient reference to *this

  DenseMatrix<T> & A = *this;


  _pivots.resize(n_rows);


  for (unsigned int i=0; i<n_rows; ++i)

    {

      // Find the pivot row by searching down the i'th column

      _pivots[i] = i;


      // std::abs(complex) must return a Real!

      auto the_max = std::abs( A(i,i) );

      for (unsigned int j=i+1; j<n_rows; ++j)

        {

          auto candidate_max = std::abs( A(j,i) );

          if (the_max < candidate_max)

            {

              the_max = candidate_max;

              _pivots[i] = j;

            }

        }


      // libMesh::out << "the_max=" << the_max << " found at row " << _pivots[i] << std::endl;


      // If the max was found in a different row, interchange rows.

      // Here we interchange the *entire* row, in Gaussian elimination

      // you would only interchange the subrows A(i,j) and A(p(i),j), for j>i

      if (_pivots[i] != static_cast<pivot_index_t>(i))

        {

          for (unsigned int j=0; j<n_rows; ++j)

            std::swap( A(i,j), A(_pivots[i], j) );

        }


      // If the max abs entry found is zero, the matrix is singular

      if (A(i,i) == libMesh::zero)

        libmesh_error_msg("Matrix A is singular!");


      // Scale upper triangle entries of row i by the diagonal entry

      // Note: don't scale the diagonal entry itself!

      const T diag_inv = 1. / A(i,i);

      for (unsigned int j=i+1; j<n_rows; ++j)

        A(i,j) *= diag_inv;


      // Update the remaining sub-matrix A[i+1:m][i+1:m]

      // by subtracting off (the diagonal-scaled)

      // upper-triangular part of row i, scaled by the

      // i'th column entry of each row.  In terms of

      // row operations, this is:

      // for each r > i

      //   SubRow(r) = SubRow(r) - A(r,i)*SubRow(i)

      //

      // If we were scaling the i'th column as well, like

      // in Gaussian elimination, this would 'zero' the

      // entry in the i'th column.

      for (unsigned int row=i+1; row<n_rows; ++row)

        for (unsigned int col=i+1; col<n_rows; ++col)

          A(row,col) -= A(row,i) * A(i,col);


    } // end i loop


  // Set the flag for LU decomposition

  this->_decomposition_type = LU;

}


template<typename T>

void DenseMatrix<T>::svd (DenseVector<Real> & sigma)

{

  // We use the LAPACK svd implementation

  _svd_lapack(sigma);

}


template<typename T>

void DenseMatrix<T>::svd (DenseVector<Real> & sigma,

                          DenseMatrix<Number> & U,

                          DenseMatrix<Number> & VT)

{

  // We use the LAPACK svd implementation

  _svd_lapack(sigma, U, VT);

}


template<typename T>

void DenseMatrix<T>::svd_solve(const DenseVector<T> & rhs,

                               DenseVector<T> & x,

                               Real rcond) const

{

  _svd_solve_lapack(rhs, x, rcond);

}


template<typename T>

void DenseMatrix<T>::evd (DenseVector<T> & lambda_real,

                          DenseVector<T> & lambda_imag)

{

  // We use the LAPACK eigenvalue problem implementation

  _evd_lapack(lambda_real, lambda_imag);

}


template<typename T>

void DenseMatrix<T>::evd_left(DenseVector<T> & lambda_real,

                              DenseVector<T> & lambda_imag,

                              DenseMatrix<T> & VL)

{

  // We use the LAPACK eigenvalue problem implementation

  _evd_lapack(lambda_real, lambda_imag, &VL, nullptr);

}


template<typename T>

void DenseMatrix<T>::evd_right(DenseVector<T> & lambda_real,

                               DenseVector<T> & lambda_imag,

                               DenseMatrix<T> & VR)

{

  // We use the LAPACK eigenvalue problem implementation

  _evd_lapack(lambda_real, lambda_imag, nullptr, &VR);

}


template<typename T>

void DenseMatrix<T>::evd_left_and_right(DenseVector<T> & lambda_real,

                                        DenseVector<T> & lambda_imag,

                                        DenseMatrix<T> & VL,

                                        DenseMatrix<T> & VR)

{

  // We use the LAPACK eigenvalue problem implementation

  _evd_lapack(lambda_real, lambda_imag, &VL, &VR);

}


template<typename T>

T DenseMatrix<T>::det ()

{

  switch(this->_decomposition_type)

    {

    case NONE:

      {

        // First LU decompose the matrix.

        // Note that the lu_decompose routine will check to see if the

        // matrix is square so we don't worry about it.

        if (this->use_blas_lapack)

          this->_lu_decompose_lapack();

        else

          this->_lu_decompose ();

      }

    case LU:

    case LU_BLAS_LAPACK:

      {

        // Already decomposed, don't do anything

        break;

      }

    default:

      libmesh_error_msg("Error! Can't compute the determinant under the current decomposition.");

    }


  // A variable to keep track of the running product of diagonal terms.

  T determinant = 1.;


  // Loop over diagonal terms, computing the product.  In practice,

  // be careful because this value could easily become too large to

  // fit in a double or float.  To be safe, one should keep track of

  // the power (of 10) of the determinant in a separate variable

  // and maintain an order 1 value for the determinant itself.

  unsigned int n_interchanges = 0;

  for (auto i : IntRange<unsigned int>(0, this->m()))

    {

      if (this->_decomposition_type==LU)

        if (_pivots[i] != static_cast<pivot_index_t>(i))

          n_interchanges++;


      // Lapack pivots are 1-based!

      if (this->_decomposition_type==LU_BLAS_LAPACK)

        if (_pivots[i] != static_cast<pivot_index_t>(i+1))

          n_interchanges++;


      determinant *= (*this)(i,i);

    }


  // Compute sign of determinant, depends on number of row interchanges!

  // The sign should be (-1)^{n}, where n is the number of interchanges.

  Real sign = n_interchanges % 2 == 0 ? 1. : -1.;


  return sign*determinant;

}


// The cholesky solve function first decomposes the matrix

// with cholesky_decompose and then uses the cholesky_back_substitute

// routine to find the solution x.

template <typename T>

template <typename T2>

void DenseMatrix<T>::cholesky_solve (const DenseVector<T2> & b,

                                     DenseVector<T2> & x)

{

  // Check for a previous decomposition

  switch(this->_decomposition_type)

    {

    case NONE:

      {

        this->_cholesky_decompose ();

        break;

      }


    case CHOLESKY:

      {

        // Already factored, just need to call back_substitute.

        break;

      }


    default:

      libmesh_error_msg("Error! This matrix already has a different decomposition...");

    }


  // Perform back substitution

  this->_cholesky_back_substitute (b, x);

}


// This algorithm is based on the Cholesky decomposition in

// the Numerical Recipes in C book.

template<typename T>

void DenseMatrix<T>::_cholesky_decompose ()

{

  // If we called this function, there better not be any

  // previous decomposition of the matrix.

  libmesh_assert_equal_to (this->_decomposition_type, NONE);


  // Shorthand notation for number of rows and columns.

  const unsigned int

    n_rows = this->m(),

    n_cols = this->n();


  // Just to be really sure...

  libmesh_assert_equal_to (n_rows, n_cols);


  // A convenient reference to *this

  DenseMatrix<T> & A = *this;


  for (unsigned int i=0; i<n_rows; ++i)

    {

      for (unsigned int j=i; j<n_cols; ++j)

        {

          for (unsigned int k=0; k<i; ++k)

            A(i,j) -= A(i,k) * A(j,k);


          if (i == j)

            {

#ifndef LIBMESH_USE_COMPLEX_NUMBERS

              if (A(i,j) <= 0.0)

                libmesh_error_msg("Error! Can only use Cholesky decomposition with symmetric positive definite matrices.");

#endif


              A(i,i) = std::sqrt(A(i,j));

            }

          else

            A(j,i) = A(i,j) / A(i,i);

        }

    }


  // Set the flag for CHOLESKY decomposition

  this->_decomposition_type = CHOLESKY;

}


template <typename T>

template <typename T2>

void DenseMatrix<T>::_cholesky_back_substitute (const DenseVector<T2> & b,

                                                DenseVector<T2> & x) const

{

  // Shorthand notation for number of rows and columns.

  const unsigned int

    n_rows = this->m(),

    n_cols = this->n();


  // Just to be really sure...

  libmesh_assert_equal_to (n_rows, n_cols);


  // A convenient reference to *this

  const DenseMatrix<T> & A = *this;


  // Now compute the solution to Ax =b using the factorization.

  x.resize(n_rows);


  // Solve for Ly=b

  for (unsigned int i=0; i<n_cols; ++i)

    {

      T2 temp = b(i);


      for (unsigned int k=0; k<i; ++k)

        temp -= A(i,k)*x(k);


      x(i) = temp / A(i,i);

    }


  // Solve for L^T x = y

  for (unsigned int i=0; i<n_cols; ++i)

    {

      const unsigned int ib = (n_cols-1)-i;


      for (unsigned int k=(ib+1); k<n_cols; ++k)

        x(ib) -= A(k,ib) * x(k);


      x(ib) /= A(ib,ib);

    }

}


// This routine is commented out since it is not really a memory

// efficient implementation.  Also, you don't *need* the inverse

// for anything, instead just use lu_solve to solve Ax=b.

// template<typename T>

// void DenseMatrix<T>::inverse ()

// {

//   // First LU decompose the matrix

//   // Note that the lu_decompose routine will check to see if the

//   // matrix is square so we don't worry about it.

//   if (!this->_lu_decomposed)

//     this->_lu_decompose();


//   // A unit vector which will be used as a rhs

//   // to pick off a single value each time.

//   DenseVector<T> e;

//   e.resize(this->m());


//   // An empty vector which will be used to hold the solution

//   // to the back substitutions.

//   DenseVector<T> x;

//   x.resize(this->m());


//   // An empty dense matrix to store the resulting inverse

//   // temporarily until we can overwrite A.

//   DenseMatrix<T> inv;

//   inv.resize(this->m(), this->n());


//   // Resize the passed in matrix to hold the inverse

//   inv.resize(this->m(), this->n());


//   for (unsigned int j=0; j<this->n(); ++j)

//     {

//       e.zero();

//       e(j) = 1.;

//       this->_lu_back_substitute(e, x, false);

//       for (unsigned int i=0; i<this->n(); ++i)

// inv(i,j) = x(i);

//     }


//   // Now overwrite all the entries

//   *this = inv;

// }


} // namespace libMesh


#endif // LIBMESH_DENSE_MATRIX_IMPL_H