Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patch] LU solver


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: [patch] LU solver
  • From: Jules Bergmann <jules@xxxxxxxxxxxxxxxx>
  • Date: Fri, 30 Sep 2005 11:45:51 -0400

This patch implements & tests the LU solver. It uses LAPACK to perform the actual work.

More work went into writing the tests than the actual code! I found a nice on-line book on "Numerical Computing in Matlab" by Cleve Moler (the MATLAB founder) that I cribbed the residual error bound from. I put a link to the book on the VSIPL++ resource wiki.

				-- Jules
Index: ChangeLog
===================================================================
RCS file: /home/cvs/Repository/vpp/ChangeLog,v
retrieving revision 1.283
diff -c -p -r1.283 ChangeLog
*** ChangeLog	30 Sep 2005 15:19:16 -0000	1.283
--- ChangeLog	30 Sep 2005 15:37:21 -0000
***************
*** 1,5 ****
--- 1,18 ----
  2005-09-30  Jules Bergmann  <jules@xxxxxxxxxxxxxxxx>
  
+ 	Implement LU linear system solver.
+ 	* src/vsip/impl/solver-lu.hpp: New file, LU solver.
+ 	* src/vsip/solvers.hpp: Include solver-lu.
+ 	* src/vsip/impl/lapack.hpp: Add LAPACK routines for LU solver
+ 	  (getrf and getrs).
+ 	* tests/solver-lu.cpp: New file, unit tests for LU solver.
+ 
+ 	* src/vsip/impl/solver-cholesky.hpp: Use stride to determine
+ 	  leading dimension.
+ 
+ 2005-09-30  Jules Bergmann  <jules@xxxxxxxxxxxxxxxx>
+ 
  	Implement toeplitz linear system solver.
  	* src/vsip/solvers.hpp: Include solver-toepsol.
  	* src/vsip/impl/fns_scalar.hpp: Implement impl_conj, impl_real,
Index: src/vsip/solvers.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/solvers.hpp,v
retrieving revision 1.5
diff -c -p -r1.5 solvers.hpp
*** src/vsip/solvers.hpp	30 Sep 2005 15:19:16 -0000	1.5
--- src/vsip/solvers.hpp	30 Sep 2005 15:37:21 -0000
***************
*** 17,22 ****
--- 17,23 ----
  #include <vsip/impl/solver-qr.hpp>
  #include <vsip/impl/solver-covsol.hpp>
  #include <vsip/impl/solver-llsqsol.hpp>
+ #include <vsip/impl/solver-lu.hpp>
  #include <vsip/impl/solver-cholesky.hpp>
  #include <vsip/impl/solver-svd.hpp>
  #include <vsip/impl/solver-toepsol.hpp>
Index: src/vsip/impl/lapack.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/lapack.hpp,v
retrieving revision 1.6
diff -c -p -r1.6 lapack.hpp
*** src/vsip/impl/lapack.hpp	27 Sep 2005 21:30:17 -0000	1.6
--- src/vsip/impl/lapack.hpp	30 Sep 2005 15:37:21 -0000
*************** extern "C"
*** 179,184 ****
--- 179,194 ----
    void cpotrs_(char*, I, I, C, I, C, I, I);
    void zpotrs_(char*, I, I, Z, I, Z, I, I);
  
+   void sgetrf_(I, I, S, I, I, I);
+   void dgetrf_(I, I, D, I, I, I);
+   void cgetrf_(I, I, C, I, I, I);
+   void zgetrf_(I, I, Z, I, I, I);
+ 
+   void sgetrs_(char*, I, I, S, I, I, S, I, I);
+   void dgetrs_(char*, I, I, D, I, I, D, I, I);
+   void cgetrs_(char*, I, I, C, I, I, C, I, I);
+   void zgetrs_(char*, I, I, Z, I, I, Z, I, I);
+ 
  #if VSIP_IMPL_USE_LAPACK_ILAENV
    int ilaenv_(I, char*, char*, I, I, I, I);
  #endif
*************** VSIP_IMPL_LAPACK_POTRS(std::complex<doub
*** 484,489 ****
--- 494,558 ----
  
  
  
+ /// GETRF - compute LU factorization of a general matrix.
+ 
+ /// Returns:
+ ///   true  if info == 0
+ ///   false if info > 0,
+ ///     (When info > 0, this indicates the factorization has been
+ ///     completed, but U is exactly singular.  Division by 0 will
+ ///     occur if factor U is used to solve a system of linear eq.
+ 
+ #define VSIP_IMPL_LAPACK_GETRF(T, FCN)					\
+ inline bool								\
+ getrf(int m, int n, T* a, int lda, int* ipiv)				\
+ {									\
+   int info;								\
+   FCN(&m, &n, a, &lda, ipiv, &info);					\
+   if (info < 0)								\
+   {									\
+     char msg[256];							\
+     sprintf(msg, "lapack::getrf -- illegal arg (info=%d)", info);	\
+     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
+   }									\
+   return (info == 0);							\
+ }
+ 
+ VSIP_IMPL_LAPACK_GETRF(float,                sgetrf_)
+ VSIP_IMPL_LAPACK_GETRF(double,               dgetrf_)
+ VSIP_IMPL_LAPACK_GETRF(std::complex<float>,  cgetrf_)
+ VSIP_IMPL_LAPACK_GETRF(std::complex<double>, zgetrf_)
+ 
+ #undef VSIP_IMPL_LAPACK_GETRF
+ 
+ 
+ 
+ /// GETRS - Solves a system of linear equations with a LU-factored
+ /// square matrix, with multiple right-hand sides.
+ 
+ #define VSIP_IMPL_LAPACK_GETRS(T, FCN)					\
+ inline void								\
+ getrs(char trans, int n, int nhrs, T* a, int lda, int* ipiv, T* b, int ldb) \
+ {									\
+   int info;								\
+   FCN(&trans, &n, &nhrs, a, &lda, ipiv, b, &ldb, &info);		\
+   if (info != 0)							\
+   {									\
+     char msg[256];							\
+     sprintf(msg, "lapack::getrs -- illegal arg (info=%d)", info);	\
+     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
+   }									\
+ }
+ 
+ VSIP_IMPL_LAPACK_GETRS(float,                sgetrs_)
+ VSIP_IMPL_LAPACK_GETRS(double,               dgetrs_)
+ VSIP_IMPL_LAPACK_GETRS(std::complex<float>,  cgetrs_)
+ VSIP_IMPL_LAPACK_GETRS(std::complex<double>, zgetrs_)
+ 
+ #undef VSIP_IMPL_LAPACK_GETRS
+ 
+ 
+ 
  } // namespace vsip::impl::lapack
  } // namespace vsip::impl
  } // namespace vsip
Index: src/vsip/impl/solver-cholesky.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/solver-cholesky.hpp,v
retrieving revision 1.1
diff -c -p -r1.1 solver-cholesky.hpp
*** src/vsip/impl/solver-cholesky.hpp	10 Sep 2005 17:41:16 -0000	1.1
--- src/vsip/impl/solver-cholesky.hpp	30 Sep 2005 15:37:21 -0000
*************** Chold_impl<T>::decompose(Matrix<T, Block
*** 219,225 ****
  		uplo_ == upper ? 'U' : 'L', // A upper/lower lower triangular
  		length_,		    // order of matrix A
  		ext.data(),		    // matrix A
! 		length_);		    // lda - first dim of A
  
    return success;
  }
--- 219,225 ----
  		uplo_ == upper ? 'U' : 'L', // A upper/lower lower triangular
  		length_,		    // order of matrix A
  		ext.data(),		    // matrix A
! 		ext.stride(1));		    // lda - first dim of A
  
    return success;
  }
*************** Chold_impl<T>::impl_solve(
*** 250,257 ****
      lapack::potrs(uplo_ == upper ? 'U' : 'L',
  		  length_,
  		  b.size(1),		    // number of RHS systems
! 		  a_ext.data(), length_,    // A, lda
! 		  b_ext.data(), b.size(0)); // B, ldb
    }
    x = b_int;
  
--- 250,257 ----
      lapack::potrs(uplo_ == upper ? 'U' : 'L',
  		  length_,
  		  b.size(1),		    // number of RHS systems
! 		  a_ext.data(), a_ext.stride(1), // A, lda
! 		  b_ext.data(), b_ext.stride(1));  // B, ldb
    }
    x = b_int;
  
Index: src/vsip/impl/solver-lu.hpp
===================================================================
RCS file: src/vsip/impl/solver-lu.hpp
diff -N src/vsip/impl/solver-lu.hpp
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- src/vsip/impl/solver-lu.hpp	30 Sep 2005 15:37:21 -0000
***************
*** 0 ****
--- 1,293 ----
+ /* Copyright (c) 2005 by CodeSourcery, LLC.  All rights reserved. */
+ 
+ /** @file    vsip/impl/solver-lu.hpp
+     @author  Jules Bergmann
+     @date    2005-09-29
+     @brief   VSIPL++ Library: LU linear system solver.
+ 
+ */
+ 
+ #ifndef VSIP_IMPL_SOLVER_LU_HPP
+ #define VSIP_IMPL_SOLVER_LU_HPP
+ 
+ /***********************************************************************
+   Included Files
+ ***********************************************************************/
+ 
+ #include <algorithm>
+ 
+ #include <vsip/support.hpp>
+ #include <vsip/matrix.hpp>
+ #include <vsip/impl/math-enum.hpp>
+ #include <vsip/impl/lapack.hpp>
+ #include <vsip/impl/temp_buffer.hpp>
+ 
+ 
+ 
+ /***********************************************************************
+   Declarations
+ ***********************************************************************/
+ 
+ namespace vsip
+ {
+ 
+ namespace impl
+ {
+ 
+ /// Cholesky factorization implementation class.  Common functionality
+ /// for lud by-value and by-reference classes.
+ 
+ template <typename T>
+ class Lud_impl
+   : Compile_time_assert<blas::Blas_traits<T>::valid>
+ {
+   typedef Dense<2, T, col2_type> data_block_type;
+ 
+   // Constructors, copies, assignments, and destructors.
+ public:
+   Lud_impl(length_type)
+     VSIP_THROW((std::bad_alloc));
+   Lud_impl(Lud_impl const&)
+     VSIP_THROW((std::bad_alloc));
+ 
+   Lud_impl& operator=(Lud_impl const&) VSIP_NOTHROW;
+   ~Lud_impl() VSIP_NOTHROW;
+ 
+   // Accessors.
+ public:
+   length_type length()const VSIP_NOTHROW { return length_; }
+ 
+   // Solve systems.
+ public:
+   template <typename Block>
+   bool decompose(Matrix<T, Block>) VSIP_NOTHROW;
+ 
+ protected:
+   template <mat_op_type tr,
+ 	    typename    Block0,
+ 	    typename    Block1>
+   bool impl_solve(const_Matrix<T, Block0>, Matrix<T, Block1>)
+     VSIP_NOTHROW;
+ 
+   // Member data.
+ private:
+   typedef std::vector<int, Aligned_allocator<int> > vector_type;
+ 
+   length_type  length_;			// Order of A.
+   vector_type  ipiv_;			// Additional info on Q
+ 
+   Matrix<T, data_block_type> data_;	// Factorized Cholesky matrix (A)
+ };
+ 
+ } // namespace vsip::impl
+ 
+ 
+ 
+ /// LU solver object.
+ 
+ template <typename              T               = VSIP_DEFAULT_VALUE_TYPE,
+ 	  return_mechanism_type ReturnMechanism = by_value>
+ class lud;
+ 
+ 
+ 
+ /// LU solver object (by-reference).
+ 
+ template <typename T>
+ class lud<T, by_reference>
+   : public impl::Lud_impl<T>
+ {
+   typedef impl::Lud_impl<T> base_type;
+ 
+   // Constructors, copies, assignments, and destructors.
+ public:
+   lud(length_type length)
+     VSIP_THROW((std::bad_alloc))
+       : base_type(length)
+     {}
+ 
+   ~lud() VSIP_NOTHROW {}
+ 
+   // By-reference solvers.
+ public:
+   template <mat_op_type tr,
+ 	    typename    Block0,
+ 	    typename    Block1>
+   bool solve(const_Matrix<T, Block0> b, Matrix<T, Block1> x)
+     VSIP_NOTHROW
+   { return this->template impl_solve<tr, Block0, Block1>(b, x); }
+ };
+ 
+ 
+ 
+ /// LU solver object (by-value).
+ 
+ template <typename T>
+ class lud<T, by_value>
+   : public impl::Lud_impl<T>
+ {
+   typedef impl::Lud_impl<T> base_type;
+ 
+   // Constructors, copies, assignments, and destructors.
+ public:
+   lud(length_type length)
+     VSIP_THROW((std::bad_alloc))
+       : base_type(length)
+     {}
+ 
+   ~lud() VSIP_NOTHROW {}
+ 
+   // By-value solvers.
+ public:
+   template <mat_op_type tr,
+ 	    typename    Block0>
+   Matrix<T>
+   solve(const_Matrix<T, Block0> b)
+     VSIP_NOTHROW
+   {
+     Matrix<T> x(b.size(0), b.size(1));
+     this->template impl_solve<tr>(b, x); 
+     return x;
+   }
+ };
+ 
+ 
+ 
+ /***********************************************************************
+   Definitions
+ ***********************************************************************/
+ 
+ namespace impl
+ {
+ 
+ template <typename T>
+ Lud_impl<T>::Lud_impl(
+   length_type length
+   )
+ VSIP_THROW((std::bad_alloc))
+   : length_ (length),
+     ipiv_   (length_),
+     data_   (length_, length_)
+ {
+   assert(length_ > 0);
+ }
+ 
+ 
+ 
+ template <typename T>
+ Lud_impl<T>::Lud_impl(Lud_impl const& lu)
+ VSIP_THROW((std::bad_alloc))
+   : length_ (lu.length_),
+     ipiv_   (length_),
+     data_   (length_, length_)
+ {
+   data_ = lu.data_;
+   for (index_type i=0; i<length_; ++i)
+     ipiv_[i] = lu.ipiv_[i];
+ }
+ 
+ 
+ 
+ template <typename T>
+ Lud_impl<T>::~Lud_impl()
+   VSIP_NOTHROW
+ {
+ }
+ 
+ 
+ 
+ /// Form LU factorization of matrix A
+ ///
+ /// Requires
+ ///   A to be a square matrix, either
+ ///
+ /// FLOPS:
+ ///   real   : UPDATE
+ ///   complex: UPDATE
+ 
+ template <typename T>
+ template <typename Block>
+ bool
+ Lud_impl<T>::decompose(Matrix<T, Block> m)
+   VSIP_NOTHROW
+ {
+   assert(m.size(0) == length_ && m.size(1) == length_);
+ 
+   data_ = m;
+ 
+   Ext_data<data_block_type> ext(data_.block());
+ 
+   bool success = lapack::getrf(
+ 		length_, length_,
+ 		ext.data(), ext.stride(1),	// matrix A, ldA
+ 		&ipiv_[0]);			// pivots
+ 
+   return success;
+ }
+ 
+ 
+ 
+ /// Solve Op(A) x = b (where A previously given to decompose)
+ ///
+ /// Op(A) is
+ ///   A   if tr == mat_ntrans
+ ///   A^T if tr == mat_trans
+ ///   A'  if tr == mat_herm (valid for T complex only)
+ ///
+ /// Requires
+ ///   B to be a (length, P) matrix
+ ///   X to be a (length, P) matrix
+ ///
+ /// Effects:
+ ///   X contains solution to Op(A) X = B
+ 
+ template <typename T>
+ template <mat_op_type tr,
+ 	  typename    Block0,
+ 	  typename    Block1>
+ bool
+ Lud_impl<T>::impl_solve(
+   const_Matrix<T, Block0> b,
+   Matrix<T, Block1>       x)
+   VSIP_NOTHROW
+ {
+   assert(b.size(0) == length_);
+   assert(b.size(0) == x.size(0) && b.size(1) == x.size(1));
+ 
+   char trans;
+ 
+   Matrix<T, Dense<2, T, col2_type> > b_int(b.size(0), b.size(1));
+   b_int = b;
+ 
+   if (tr == mat_ntrans)
+     trans = 'N';
+   else if (tr == mat_trans)
+     trans = 'T';
+   else if (tr == mat_herm)
+   {
+     assert(Is_complex<T>::value);
+     trans = 'C';
+   }
+ 
+   {
+     Ext_data<Dense<2, T, col2_type> > b_ext(b_int.block());
+     Ext_data<data_block_type>         a_ext(data_.block());
+ 
+     lapack::getrs(trans,
+ 		  length_,			  // order of A
+ 		  b.size(1),			  // nrhs: number of RH sides
+ 		  a_ext.data(), a_ext.stride(1),  // A, lda
+ 		  &ipiv_[0],			  // pivots
+ 		  b_ext.data(), b_ext.stride(1)); // B, ldb
+   }
+   x = b_int;
+ 
+   return true;
+ }
+ 
+ } // namespace vsip::impl
+ 
+ } // namespace vsip
+ 
+ 
+ #endif // VSIP_IMPL_SOLVER_LU_HPP
Index: tests/solver-lu.cpp
===================================================================
RCS file: tests/solver-lu.cpp
diff -N tests/solver-lu.cpp
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- tests/solver-lu.cpp	30 Sep 2005 15:37:21 -0000
***************
*** 0 ****
--- 1,346 ----
+ /* Copyright (c) 2005 by CodeSourcery, LLC.  All rights reserved. */
+ 
+ /** @file    tests/solver-lu.cpp
+     @author  Jules Bergmann
+     @date    2005-09-30
+     @brief   VSIPL++ Library: Unit tests for LU solver.
+ */
+ 
+ /***********************************************************************
+   Included Files
+ ***********************************************************************/
+ 
+ #include <cassert>
+ 
+ #include <vsip/initfin.hpp>
+ #include <vsip/support.hpp>
+ #include <vsip/tensor.hpp>
+ #include <vsip/solvers.hpp>
+ 
+ #include "test.hpp"
+ #include "test-precision.hpp"
+ #include "test-random.hpp"
+ #include "solver-common.hpp"
+ #include "load_view.hpp"
+ 
+ #define VERBOSE       0
+ #define DO_SWEEP      0
+ #define DO_BIG        1
+ #define FILE_MATRIX_1 0
+ 
+ #if VERBOSE || 1
+ #  include <iostream>
+ #  include "output.hpp"
+ #  include "extdata-output.hpp"
+ #endif
+ 
+ using namespace std;
+ using namespace vsip;
+ 
+ 
+ 
+ /***********************************************************************
+   Support Definitions
+ ***********************************************************************/
+ 
+ template <typename T,
+ 	  typename Block>
+ typename vsip::impl::Scalar_of<T>::type
+ norm_2(const_Vector<T, Block> v)
+ {
+   return sqrt(sumval(magsq(v)));
+ }
+ 
+ 
+ 
+ /***********************************************************************
+   LU tests
+ ***********************************************************************/
+ 
+ double max_err1 = 0.0;
+ double max_err2 = 0.0;
+ double max_err3 = 0.0;
+ 
+ 
+ 
+ // Chold test w/random matrix.
+ 
+ 
+ template <typename T,
+ 	  typename Block1,
+ 	  typename Block2>
+ void
+ solve_lu(
+   return_mechanism_type rtm,
+   Matrix<T, Block1>     a,
+   Matrix<T, Block2>     b)
+ {
+   length_type n = a.size(0);
+   length_type p = b.size(1);
+ 
+   assert(n == a.size(1));
+   assert(n == b.size(0));
+ 
+   Matrix<T> x1(n, p);
+   Matrix<T> x2(n, p);
+   Matrix<T> x3(n, p);
+ 
+   if (rtm == by_reference)
+   {
+     // 1. Build solver and factor A.
+     lud<T, by_reference> lu(n);
+     assert(lu.length() == n);
+ 
+     bool success = lu.decompose(a);
+     assert(success);
+ 
+     // 2. Solve A X = B.
+     lu.template solve<mat_ntrans>(b, x1);
+     lu.template solve<mat_trans>(b, x2);
+     lu.template solve<Test_traits<T>::trans>(b, x3); // mat_herm if T complex
+   }
+   if (rtm == by_value)
+   {
+     // 1. Build solver and factor A.
+     lud<T, by_value> lu(n);
+     assert(lu.length() == n);
+ 
+     bool success = lu.decompose(a);
+     assert(success);
+ 
+     // 2. Solve A X = B.
+     x1 = lu.template solve<mat_ntrans>(b);
+     x2 = lu.template solve<mat_trans>(b);
+     x3 = lu.template solve<Test_traits<T>::trans>(b); // mat_herm if T complex
+   }
+ 
+ 
+   // 3. Check result.
+ 
+   Matrix<T> chk1(n, p);
+   Matrix<T> chk2(n, p);
+   Matrix<T> chk3(n, p);
+ 
+   prod(a, x1, chk1);
+   prod(trans(a), x2, chk2);
+   prod(trans_or_herm(a), x3, chk3);
+ 
+   typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
+ 
+   Vector<float> sv_s(n);
+   svd<T, by_reference> sv(n, n, svd_uvnos, svd_uvnos);
+   sv.decompose(a, sv_s);
+ 
+   scalar_type a_norm_2 = sv_s(0);
+ 
+ 
+   // Gaussian roundoff error (J.H Wilkinson)
+   // (From Moler, Chapter 2.9, p19)
+   //
+   //  || residual ||
+   // ----------------- <= p eps
+   // || A || || x_* ||
+   //
+   // Where 
+   //   x_* is computed solution (x is true solution)
+   //   residual = b - A x_*
+   //   eps is machine precision
+   //   p is usually less than 10
+ 
+   scalar_type eps     = Precision_traits<scalar_type>::eps;
+   scalar_type p_limit = scalar_type(20);
+ 
+   for (index_type i=0; i<p; ++i)
+   {
+     scalar_type residual_1 = norm_2((b - chk1).col(i));
+     scalar_type err1       = residual_1 / (a_norm_2 * norm_2(x1.col(i)) * eps);
+     scalar_type residual_2 = norm_2((b - chk2).col(i));
+     scalar_type err2       = residual_2 / (a_norm_2 * norm_2(x2.col(i)) * eps);
+     scalar_type residual_3 = norm_2((b - chk3).col(i));
+     scalar_type err3       = residual_3 / (a_norm_2 * norm_2(x3.col(i)) * eps);
+ 
+ #if VERBOSE
+     scalar_type cond = sv_s(0) / sv_s(n-1);
+     cout << "err " << i << " = "
+ 	 << err1 << ", " << err2 << ", " << err3
+ 	 << "  cond = " << cond
+ 	 << endl;
+ #endif
+ 
+     assert(err1 < p_limit);
+     assert(err2 < p_limit);
+     assert(err3 < p_limit);
+ 
+     if (err1 > max_err1) max_err1 = err1;
+     if (err2 > max_err2) max_err2 = err2;
+     if (err3 > max_err3) max_err3 = err3;
+   }
+ 
+ #if VERBOSE
+   cout << "a = " << endl << a << endl;
+   cout << "x1 = " << endl << x1 << endl;
+   cout << "x2 = " << endl << x2 << endl;
+   cout << "b = " << endl << b << endl;
+   cout << "chk1 = " << endl << chk1 << endl;
+   cout << "chk2 = " << endl << chk2 << endl;
+   cout << "chk3 = " << endl << chk3 << endl;
+ #endif
+ }
+ 
+ 
+ 
+ // Simple lud test w/diagonal matrix.
+ 
+ template <typename T>
+ void
+ test_lud_diag(
+   return_mechanism_type rtm,
+   length_type n,
+   length_type p)
+ {
+   Matrix<T> a(n, n);
+   Matrix<T> b(n, p);
+ 
+   a        = T();
+   a.diag() = T(1);
+   if (n > 0) a(0, 0)  = mag(Test_traits<T>::value1());
+   if (n > 2) a(2, 2)  = mag(Test_traits<T>::value2());
+   if (n > 3) a(3, 3)  = mag(Test_traits<T>::value3());
+ 
+   for (index_type i=0; i<p; ++i)
+     b.col(i) = test_ramp(T(1), T(i), n);
+   if (p > 1)
+     b.col(1) += Test_traits<T>::offset();
+ 
+   solve_lu(rtm, a, b);
+ }
+ 
+ 
+ 
+ // Chold test w/random matrix.
+ 
+ template <typename T>
+ void
+ test_lud_random(
+   return_mechanism_type rtm,
+   length_type           n,
+   length_type           p)
+ {
+   Matrix<T> a(n, n);
+   Matrix<T> b(n, p);
+ 
+   randm(a);
+   randm(b);
+ 
+   solve_lu(rtm, a, b);
+ }
+ 
+ 
+ 
+ // Chold test w/matrix from file.
+ 
+ template <typename FileT,
+ 	  typename T>
+ void
+ test_lud_file(
+   return_mechanism_type rtm,
+   char*                 afilename,
+   char*                 bfilename,
+   length_type           n,
+   length_type           p)
+ {
+   Load_view<2, FileT> load_a(afilename, Domain<2>(n, n));
+   Load_view<2, FileT> load_b(bfilename, Domain<2>(n, p));
+ 
+   Matrix<T> a(n, n);
+   Matrix<T> b(n, p);
+ 
+   a = load_a.view();
+   b = load_b.view();
+ 
+   solve_lu(rtm, a, b);
+ }
+ 
+ 
+ 
+ template <typename T>
+ void lud_cases(return_mechanism_type rtm)
+ {
+   for (index_type p=1; p<=3; ++p)
+   {
+     test_lud_diag<T>(rtm, 1, p);
+     test_lud_diag<T>(rtm, 5, p);
+     test_lud_diag<T>(rtm, 6, p);
+     test_lud_diag<T>(rtm, 17, p);
+   }
+ 
+ 
+   for (index_type p=1; p<=3; ++p)
+   {
+     test_lud_random<T>(rtm, 1, p);
+     test_lud_random<T>(rtm, 2, p);
+     test_lud_random<T>(rtm, 5, p);
+     test_lud_random<T>(rtm, 6, p);
+     test_lud_random<T>(rtm, 16, p);
+     test_lud_random<T>(rtm, 17, p);
+   }
+ 
+ #if DO_BIG
+   test_lud_random<T>(rtm, 97,   5+1);
+   test_lud_random<T>(rtm, 97+1, 5);
+   test_lud_random<T>(rtm, 97+2, 5+2);
+ #endif
+ 
+ #if DO_SWEEP
+   for (index_type i=1; i<100; i+= 8)
+     for (index_type j=1; j<10; j += 4)
+     {
+       test_lud_random<T>(rtm, i,   j+1);
+       test_lud_random<T>(rtm, i+1, j);
+       test_lud_random<T>(rtm, i+2, j+2);
+     }
+ #endif
+ }
+ 
+ 
+ 
+ /***********************************************************************
+   Main
+ ***********************************************************************/
+ 
+ template <> float  Precision_traits<float>::eps = 0.0;
+ template <> double Precision_traits<double>::eps = 0.0;
+ 
+ 
+ 
+ int
+ main(int argc, char** argv)
+ {
+   vsipl init(argc, argv);
+ 
+   Precision_traits<float>::compute_eps();
+   Precision_traits<double>::compute_eps();
+ 
+ #if FILE_MATRIX_1
+   test_lud_file<complex<float>, complex<double> >(
+     "lu-a-complex-float-99x99.dat", "lu-b-complex-float-99x7.dat", 99, 7);
+   test_lud_file<complex<float>, complex<float> >(
+     "lu-a-complex-float-99x99.dat", "lu-b-complex-float-99x7.dat", 99, 7);
+ #endif
+ 
+   lud_cases<float>           (by_reference);
+   lud_cases<double>          (by_reference);
+   lud_cases<complex<float> > (by_reference);
+   lud_cases<complex<double> >(by_reference);
+ 
+   lud_cases<float>           (by_value);
+   lud_cases<double>          (by_value);
+   lud_cases<complex<float> > (by_value);
+   lud_cases<complex<double> >(by_value);
+ 
+ #if VERBOSE
+   cout << "max_err1 " << max_err1 << endl;
+   cout << "max_err2 " << max_err2 << endl;
+   cout << "max_err3 " << max_err3 << endl;
+ #endif
+ }