Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patch] SVD solver


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: [patch] SVD solver
  • From: Jules Bergmann <jules@xxxxxxxxxxxxxxxx>
  • Date: Tue, 27 Sep 2005 12:29:16 -0400

Implementation (and tests) of the SVD solver object, using LAPACK underneath.

				-- Jules
Index: ChangeLog
===================================================================
RCS file: /home/cvs/Repository/vpp/ChangeLog,v
retrieving revision 1.274
diff -c -p -r1.274 ChangeLog
*** ChangeLog	26 Sep 2005 20:23:28 -0000	1.274
--- ChangeLog	27 Sep 2005 16:28:28 -0000
***************
*** 1,3 ****
--- 1,20 ----
+ 2005-09-27  Jules Bergmann  <jules@xxxxxxxxxxxxxxxx>
+ 
+ 	* configure.ac: Add -lpthread for MKL 5.x.
+ 	* src/vsip/solvers.hpp: Include solver-svd.
+ 	* src/vsip/impl/lapack.hpp: Add LAPACK routines for SVD (gebrd,
+ 	  orgbr/ungbr, sbdsqr).  Replace assertions on LAPACK info with
+ 	  exceptions.
+ 	* src/vsip/impl/matvec.hpp: Add trans_or_herm() function.
+ 	* src/vsip/impl/metaprogramming.hpp: Add Bool_type to encapsulate
+ 	  a bool as a type.
+ 	* src/vsip/impl/solver-svd.hpp: New file, implement SVD solver.
+ 	* src/vsip/impl/subblock.hpp (Diag::size): Check block_d argumment.
+ 	* tests/solver-common.hpp: Add compare_view functions.  Define
+ 	  perferred tranpose for value type (regular or conjugate) in
+ 	  Test_traits.
+ 	* tests/solver-svd.cpp: New file, unit tests for SVD solver.
+ 
  2005-09-26  Jules Bergmann  <jules@xxxxxxxxxxxxxxxx>
  
  	* src/vsip/math.hpp: Include expr_generator_block.hpp
Index: configure.ac
===================================================================
RCS file: /home/cvs/Repository/vpp/configure.ac,v
retrieving revision 1.40
diff -c -p -r1.40 configure.ac
*** configure.ac	21 Sep 2005 06:45:07 -0000	1.40
--- configure.ac	27 Sep 2005 16:28:28 -0000
*************** if test "$enable_lapack" != "no"; then
*** 566,572 ****
        AC_MSG_CHECKING([for LAPACK/MKL 5.x library])
  
        LDFLAGS="$keep_LDFLAGS -L$with_mkl_prefix"
!       LIBS="$keep_LIBS -lmkl_lapack -lmkl -lg2c"
  
        lapack_use_ilaenv=0
      elif test "$trypkg" == "mkl7"; then
--- 566,572 ----
        AC_MSG_CHECKING([for LAPACK/MKL 5.x library])
  
        LDFLAGS="$keep_LDFLAGS -L$with_mkl_prefix"
!       LIBS="$keep_LIBS -lmkl_lapack -lmkl -lg2c -lpthread"
  
        lapack_use_ilaenv=0
      elif test "$trypkg" == "mkl7"; then
Index: src/vsip/solvers.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/solvers.hpp,v
retrieving revision 1.3
diff -c -p -r1.3 solvers.hpp
*** src/vsip/solvers.hpp	10 Sep 2005 17:41:16 -0000	1.3
--- src/vsip/solvers.hpp	27 Sep 2005 16:28:28 -0000
***************
*** 18,22 ****
--- 18,23 ----
  #include <vsip/impl/solver-covsol.hpp>
  #include <vsip/impl/solver-llsqsol.hpp>
  #include <vsip/impl/solver-cholesky.hpp>
+ #include <vsip/impl/solver-svd.hpp>
  
  #endif // VSIP_SOLVERS_HPP
Index: src/vsip/impl/lapack.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/lapack.hpp,v
retrieving revision 1.5
diff -c -p -r1.5 lapack.hpp
*** src/vsip/impl/lapack.hpp	10 Sep 2005 17:41:16 -0000	1.5
--- src/vsip/impl/lapack.hpp	27 Sep 2005 16:28:28 -0000
*************** extern "C"
*** 154,159 ****
--- 154,174 ----
    void cunmqr_(char*, char*, I, I, I, C, I, C, C, I, C, I, I);
    void zunmqr_(char*, char*, I, I, I, Z, I, Z, Z, I, Z, I, I);
  
+   void sgebrd_(I, I, S, I, S, S, S, S, S, I, I);
+   void dgebrd_(I, I, D, I, D, D, D, D, D, I, I);
+   void cgebrd_(I, I, C, I, S, S, C, C, C, I, I);
+   void zgebrd_(I, I, Z, I, D, D, Z, Z, Z, I, I);
+ 
+   void sorgbr_(char*, I, I, I, S, I, S, S, I, I);
+   void dorgbr_(char*, I, I, I, D, I, D, D, I, I);
+   void cungbr_(char*, I, I, I, C, I, C, C, I, I);
+   void zungbr_(char*, I, I, I, Z, I, Z, Z, I, I);
+ 
+   void sbdsqr_(char*, I, I, I, I, S, S, S, I, S, I, S, I, S, I);
+   void dbdsqr_(char*, I, I, I, I, D, D, D, I, D, I, D, I, D, I);
+   void cbdsqr_(char*, I, I, I, I, S, S, C, I, C, I, C, I, C, I);
+   void zbdsqr_(char*, I, I, I, I, D, D, Z, I, Z, I, Z, I, Z, I);
+ 
    void spotrf_(char*, I, S, I, I);
    void dpotrf_(char*, I, D, I, I);
    void cpotrf_(char*, I, C, I, I);
*************** inline void geqrf(int m, int n, T* a, in
*** 191,197 ****
  {									\
    int info;								\
    FCN(&m, &n, a, &lda, tau, work, &lwork, &info);			\
!   assert(info == 0);							\
  }									\
  									\
  template <>								\
--- 206,217 ----
  {									\
    int info;								\
    FCN(&m, &n, a, &lda, tau, work, &lwork, &info);			\
!   if (info != 0)							\
!   {									\
!     char msg[256];							\
!     sprintf(msg, "lapack::geqrf -- illegal arg (info=%d)", info);	\
!     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
!   }									\
  }									\
  									\
  template <>								\
*************** inline void geqr2(int m, int n, T* a, in
*** 221,227 ****
  {									\
    int info;								\
    FCN(&m, &n, a, &lda, tau, work, &info);				\
!   assert(info == 0);							\
  }									\
  									\
  template <>								\
--- 241,252 ----
  {									\
    int info;								\
    FCN(&m, &n, a, &lda, tau, work, &info);				\
!   if (info != 0)							\
!   {									\
!     char msg[256];							\
!     sprintf(msg, "lapack::geqr2 -- illegal arg (info=%d)", info);	\
!     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
!   }									\
  }									\
  									\
  template <>								\
*************** inline void mqr(char side, char trans,		
*** 255,261 ****
  {									\
    int info;								\
    FCN(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info); \
!   assert(info == 0);							\
  }									\
  									\
  template <>								\
--- 280,291 ----
  {									\
    int info;								\
    FCN(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info); \
!   if (info != 0)							\
!   {									\
!     char msg[256];							\
!     sprintf(msg, "lapack::mqr -- illegal arg (info=%d)", info);	\
!     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
!   }									\
  }									\
  									\
  template <>								\
*************** VSIP_IMPL_LAPACK_MQR(std::complex<double
*** 279,284 ****
--- 309,422 ----
  
  
  
+ #define VSIP_IMPL_LAPACK_GEBRD(T, FCN, NAME)				\
+ inline void gebrd(int m, int n, T* a, int lda,				\
+ 		  vsip::impl::Scalar_of<T >::type* d,			\
+ 		  vsip::impl::Scalar_of<T >::type* e,			\
+ 		  T* tauq, T* taup, T* work, int& lwork)		\
+ {									\
+   int info;								\
+   FCN(&m, &n, a, &lda, d, e, tauq, taup, work, &lwork, &info);		\
+   if (info != 0)							\
+   {									\
+     char msg[256];							\
+     sprintf(msg, "lapack::gebrd -- illegal arg (info=%d)", info);	\
+     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
+   }									\
+ }									\
+ 									\
+ template <>								\
+ inline int								\
+ gebrd_blksize<T >(int m, int n) /* Note [1] */				\
+ {									\
+   return ilaenv(1, NAME, "", m, n, -1, -1);				\
+ }
+ 
+ template <typename T>
+ inline int
+ gebrd_blksize(int m, int n);
+ 
+ 
+ VSIP_IMPL_LAPACK_GEBRD(float,                sgebrd_, "sgebrd")
+ VSIP_IMPL_LAPACK_GEBRD(double,               dgebrd_, "dgebrd")
+ VSIP_IMPL_LAPACK_GEBRD(std::complex<float>,  cgebrd_, "cgebrd")
+ VSIP_IMPL_LAPACK_GEBRD(std::complex<double>, zgebrd_, "zgebrd")
+ 
+ #undef VSIP_IMPL_LAPACK_GEBRD
+ 
+ 
+ 
+ #define VSIP_IMPL_LAPACK_GBR(T, FCN, NAME)				\
+ inline void gbr(char vect,						\
+ 		int m, int n, int k,					\
+ 		T *a, int lda,						\
+ 		T *tau,							\
+ 		T *work, int& lwork)					\
+ {									\
+   int info;								\
+   FCN(&vect, &m, &n, &k, a, &lda, tau, work, &lwork, &info);		\
+   if (info != 0)							\
+   {									\
+     char msg[256];							\
+     sprintf(msg, "lapack::gbr -- illegal arg (info=%d)", info);	\
+     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
+   }									\
+ }									\
+ 									\
+ template <>								\
+ inline int								\
+ gbr_blksize<T >(char vect, int m, int n, int k) /* Note [1] */		\
+ {									\
+   char arg[2]; arg[0] = vect; arg[1] = 0;				\
+   return ilaenv(1, NAME, arg, m, n, k, -1);				\
+ }
+ 
+ template <typename T>
+ inline int
+ gbr_blksize(char vect, int m, int n, int k);
+ 
+ VSIP_IMPL_LAPACK_GBR(float,                sorgbr_, "sorgbr")
+ VSIP_IMPL_LAPACK_GBR(double,               dorgbr_, "dorgbr")
+ VSIP_IMPL_LAPACK_GBR(std::complex<float>,  cungbr_, "cungbr")
+ VSIP_IMPL_LAPACK_GBR(std::complex<double>, zungbr_, "zungbr")
+ 
+ #undef VSIP_IMPL_LAPACK_GBR
+ 
+ 
+ 
+ /// BDSQR - compute the singular value decomposition of a general matrix
+ ///         that has been reduce to bidiagonal form.
+ 
+ #define VSIP_IMPL_LAPACK_BDSQR(T, FCN)					\
+ inline void bdsqr(char uplo,						\
+ 		  int n, int ncvt, int nru, int ncc,			\
+ 		  vsip::impl::Scalar_of<T >::type* d,			\
+ 		  vsip::impl::Scalar_of<T >::type* e,			\
+ 		  T *vt, int ldvt,					\
+ 		  T *u, int ldu,					\
+ 		  T *c, int ldc,					\
+ 		  T *work)						\
+ {									\
+   int info;								\
+   FCN(&uplo, &n, &ncvt, &nru, &ncc, d, e,				\
+       vt, &ldvt, u, &ldu, c, &ldc, work, &info);			\
+   if (info != 0)							\
+   {									\
+     char msg[256];							\
+     sprintf(msg, "lapack::bdsqr -- illegal arg (info=%d)", info);	\
+     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
+   }									\
+ }
+ 
+ VSIP_IMPL_LAPACK_BDSQR(float,                sbdsqr_)
+ VSIP_IMPL_LAPACK_BDSQR(double,               dbdsqr_)
+ VSIP_IMPL_LAPACK_BDSQR(std::complex<float>,  cbdsqr_)
+ VSIP_IMPL_LAPACK_BDSQR(std::complex<double>, zbdsqr_)
+ 
+ #undef VSIP_IMPL_LAPACK_BDSQR
+ 
+ 
+ 
  /// POTRF - compute cholesky factorization of a symmetric (hermtian)
  /// postive definite matrix
  
*************** potrf(char uplo, int n, T* a, int lda)		
*** 295,302 ****
  {									\
    int info;								\
    FCN(&uplo, &n, a, &lda, &info);					\
!   assert(info >= 0);							\
!   if (info > 0) printf("POTRF: %d\n", info);				\
    return (info == 0);							\
  }
  
--- 433,444 ----
  {									\
    int info;								\
    FCN(&uplo, &n, a, &lda, &info);					\
!   if (info < 0)								\
!   {									\
!     char msg[256];							\
!     sprintf(msg, "lapack::potrf -- illegal arg (info=%d)", info);	\
!     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
!   }									\
    return (info == 0);							\
  }
  
*************** potrs(char uplo, int n, int nhrs, T* a, 
*** 325,331 ****
  {									\
    int info;								\
    FCN(&uplo, &n, &nhrs, a, &lda, b, &ldb, &info);			\
!   assert(info == 0);							\
  }
  
  VSIP_IMPL_LAPACK_POTRS(float,                spotrs_)
--- 467,478 ----
  {									\
    int info;								\
    FCN(&uplo, &n, &nhrs, a, &lda, b, &ldb, &info);			\
!   if (info != 0)							\
!   {									\
!     char msg[256];							\
!     sprintf(msg, "lapack::potrs -- illegal arg (info=%d)", info);	\
!     VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));			\
!   }									\
  }
  
  VSIP_IMPL_LAPACK_POTRS(float,                spotrs_)
*************** extern "C"
*** 349,361 ****
  /// LAPACK error handler.  Called by LAPACK functions if illegal
  /// argument is passed.
  
! void xerbla_(char* name, int* /*info*/)
  {
    char copy[8];
    strncpy(copy, name, 6);
    copy[6] = 0;
  
!   VSIP_IMPL_THROW(vsip::impl::unimplemented("lapack -- illegal arg"));
  }
  
  }
--- 496,511 ----
  /// LAPACK error handler.  Called by LAPACK functions if illegal
  /// argument is passed.
  
! void xerbla_(char* name, int* info)
  {
    char copy[8];
+   char msg[256];
+ 
    strncpy(copy, name, 6);
    copy[6] = 0;
+   sprintf(msg, "lapack -- illegal arg (name=%s  info=%d)", copy, *info);
  
!   VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
  }
  
  }
Index: src/vsip/impl/matvec.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/matvec.hpp,v
retrieving revision 1.1
diff -c -p -r1.1 matvec.hpp
*** src/vsip/impl/matvec.hpp	19 Sep 2005 21:06:46 -0000	1.1
--- src/vsip/impl/matvec.hpp	27 Sep 2005 16:28:28 -0000
*************** kron( T0 alpha, Matrix<T1, Block1> v, Ma
*** 90,95 ****
--- 90,147 ----
    return r;
  }
  
+ 
+ 
+ /// Class to perform transpose or hermetian (conjugate-transpose),
+ /// depending on value type.
+ 
+ /// Primary case - perform transpose.
+ 
+ template <typename T,
+ 	  typename Block>
+ struct Trans_or_herm
+ {
+   typedef typename const_Matrix<T, Block>::transpose_type result_type;
+ 
+   static result_type
+   exec(const_Matrix<T, Block> m) VSIP_NOTHROW
+   {
+     return m.transpose();
+   }
+ };
+ 
+ /// Complex specialization - perform hermetian.
+ 
+ template <typename T,
+ 	  typename Block>
+ struct Trans_or_herm<complex<T>, Block>
+ {
+   typedef typename const_Matrix<complex<T>, Block>::transpose_type 
+       transpose_type;
+   typedef impl::Unary_func_view<impl::conj_functor, transpose_type> 
+       functor_type;
+   typedef typename functor_type::result_type result_type;
+ 
+   static result_type
+   exec(const_Matrix<complex<T>, Block> m) VSIP_NOTHROW
+   {
+     return functor_type::apply(m.transpose());
+   } 
+ };
+ 
+ 
+ 
+ /// Perform transpose or hermetian, depending on value type.
+ 
+ template <typename T,
+ 	  typename Block>
+ inline
+ typename Trans_or_herm<T, Block>::result_type
+ trans_or_herm(const_Matrix<T, Block> m)
+ {
+   return Trans_or_herm<T, Block>::exec(m);
+ };
+ 
  } // namespace impl
  
  
Index: src/vsip/impl/metaprogramming.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/metaprogramming.hpp,v
retrieving revision 1.8
diff -c -p -r1.8 metaprogramming.hpp
*** src/vsip/impl/metaprogramming.hpp	28 Aug 2005 02:15:57 -0000	1.8
--- src/vsip/impl/metaprogramming.hpp	27 Sep 2005 16:28:28 -0000
*************** template <typename T>
*** 108,113 ****
--- 108,118 ----
  struct Is_complex<std::complex<T> >
  { static bool const value = true; };
  
+ 
+ template <bool value>
+ struct Bool_type
+ {};
+ 
  } // namespace impl
  } // namespace vsip
  
Index: src/vsip/impl/solver-svd.hpp
===================================================================
RCS file: src/vsip/impl/solver-svd.hpp
diff -N src/vsip/impl/solver-svd.hpp
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- src/vsip/impl/solver-svd.hpp	27 Sep 2005 16:28:28 -0000
***************
*** 0 ****
--- 1,801 ----
+ /* Copyright (c) 2005 by CodeSourcery, LLC.  All rights reserved. */
+ 
+ /** @file    vsip/impl/solver-svd.hpp
+     @author  Jules Bergmann
+     @date    2005-09-11
+     @brief   VSIPL++ Library: SVD Linear system solver.
+ 
+ */
+ 
+ #ifndef VSIP_IMPL_SOLVER_SVD_HPP
+ #define VSIP_IMPL_SOLVER_SVD_HPP
+ 
+ /***********************************************************************
+   Included Files
+ ***********************************************************************/
+ 
+ #include <algorithm>
+ 
+ #include <vsip/support.hpp>
+ #include <vsip/matrix.hpp>
+ #include <vsip/math.hpp>
+ #include <vsip/impl/math-enum.hpp>
+ #include <vsip/impl/lapack.hpp>
+ #include <vsip/impl/temp_buffer.hpp>
+ 
+ 
+ 
+ /***********************************************************************
+   Declarations
+ ***********************************************************************/
+ 
+ namespace vsip
+ {
+ 
+ namespace impl
+ {
+ 
+ /// SVD decomposition implementation class.  Common functionality for
+ /// svd by-value and by-reference classes.
+ 
+ template <typename T,
+ 	  bool     Blocked = true>
+ class Svd_impl
+   : Compile_time_assert<blas::Blas_traits<T>::valid>
+ {
+   typedef Dense<2, T, col2_type> data_block_type;
+ 
+   // Constructors, copies, assignments, and destructors.
+ public:
+   Svd_impl(length_type, length_type, storage_type, storage_type)
+     VSIP_THROW((std::bad_alloc));
+   Svd_impl(Svd_impl const&)
+     VSIP_THROW((std::bad_alloc));
+ 
+   Svd_impl& operator=(Svd_impl const&) VSIP_NOTHROW;
+   ~Svd_impl() VSIP_NOTHROW;
+ 
+   // Accessors.
+ public:
+   length_type  rows()     const VSIP_NOTHROW { return m_; }
+   length_type  columns()  const VSIP_NOTHROW { return n_; }
+   storage_type ustorage() const VSIP_NOTHROW { return ust_; }
+   storage_type vstorage() const VSIP_NOTHROW { return vst_; }
+ 
+   // Solve systems.
+ protected:
+   template <typename Block0,
+ 	    typename Block1>
+   bool impl_decompose(Matrix<T, Block0>,
+ 		      Vector<scalar_f, Block1>) VSIP_NOTHROW;
+ 
+   template <mat_op_type       tr,
+ 	    product_side_type ps,
+ 	    typename          Block0,
+ 	    typename          Block1>
+   bool impl_produ(const_Matrix<T, Block0>, Matrix<T, Block1>)
+     VSIP_NOTHROW;
+ 
+   template <mat_op_type       tr,
+ 	    product_side_type ps,
+ 	    typename          Block0,
+ 	    typename          Block1>
+   bool impl_prodv(const_Matrix<T, Block0>, Matrix<T, Block1>)
+     VSIP_NOTHROW;
+ 
+   template <typename          Block>
+   bool impl_u(index_type, index_type, Matrix<T, Block>)
+     VSIP_NOTHROW;
+ 
+   template <typename          Block>
+   bool impl_v(index_type, index_type, Matrix<T, Block>)
+     VSIP_NOTHROW;
+ 
+   length_type  impl_order()  const VSIP_NOTHROW { return p_; }
+ 
+   // Member data.
+ private:
+   typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
+   typedef std::vector<T, Aligned_allocator<T> > vector_type;
+   typedef std::vector<scalar_type, Aligned_allocator<scalar_type> >
+ 		svector_type;
+ 
+   length_type  m_;			// Number of rows.
+   length_type  n_;			// Number of cols.
+   length_type  p_;			// min(rows, cols)
+   storage_type ust_;			// U storage type
+   storage_type vst_;			// V storage type
+ 
+   Matrix<T, data_block_type> data_;	// Factorized matrix
+   Matrix<T, data_block_type> q_;	// U matrix
+   Matrix<T, data_block_type> pt_;	// V' matrix
+ 
+   vector_type  tauq_;			// Additional info on Q
+   vector_type  taup_;			// Additional info on P
+   svector_type b_d_;			// Diagonal elements of B
+   svector_type b_e_;			// Off-diagonal elements of B
+ 					//  - gebrd requires min(m, n)-1
+ 					//  - bdsqr requires min(m, n)
+ 
+   length_type lwork_gebrd_;		// size of workspace needed for gebrd
+   vector_type work_gebrd_;		// workspace for gebrd
+   length_type lwork_gbr_;		// size of workspace needed for gebrd
+   vector_type work_gbr_;		// workspace for gebrd
+ };
+ 
+ } // namespace vsip::impl
+ 
+ 
+ 
+ /// SVD solver object.
+ 
+ template <typename              T               = VSIP_DEFAULT_VALUE_TYPE,
+ 	  return_mechanism_type ReturnMechanism = by_value>
+ class svd;
+ 
+ template <typename T>
+ class svd<T, by_reference>
+   : public impl::Svd_impl<T>
+ {
+   typedef impl::Svd_impl<T> base_type;
+ 
+   // Constructors, copies, assignments, and destructors.
+ public:
+   svd(length_type rows, length_type cols, storage_type ust, storage_type vst)
+     VSIP_THROW((std::bad_alloc))
+     : base_type(rows, cols, ust, vst)
+   {}
+ 
+   ~svd() VSIP_NOTHROW {}
+ 
+   // By-reference solvers.
+ public:
+   template <typename Block0,
+ 	    typename Block1>
+   bool decompose(Matrix<T, Block0> m, Vector<scalar_f, Block1> dest)
+     VSIP_NOTHROW
+   { return this->impl_decompose(m, dest); }
+ 
+   template <mat_op_type       tr,
+ 	    product_side_type ps,
+ 	    typename          Block0,
+ 	    typename          Block1>
+   bool produ(const_Matrix<T, Block0> b, Matrix<T, Block1> x)
+     VSIP_NOTHROW
+   // { return true; }
+   { return this->template impl_produ<tr, ps>(b, x); }
+ 
+   template <mat_op_type       tr,
+ 	    product_side_type ps,
+ 	    typename          Block0,
+ 	    typename          Block1>
+   bool prodv(const_Matrix<T, Block0> b, Matrix<T, Block1> x)
+     VSIP_NOTHROW
+   { return this->template impl_prodv<tr, ps>(b, x); }
+ 
+   template <typename Block>
+   bool u(index_type low, index_type high, Matrix<T, Block> dest)
+     VSIP_NOTHROW
+   { return this->template impl_u(low, high, dest); }
+ 
+   template <typename Block>
+   bool v(index_type low, index_type high, Matrix<T, Block> dest)
+     VSIP_NOTHROW
+   { return this->template impl_v(low, high, dest); }
+ };
+ 
+ template <typename T>
+ class svd<T, by_value>
+   : public impl::Svd_impl<T>
+ {
+   typedef impl::Svd_impl<T> base_type;
+ 
+   // Constructors, copies, assignments, and destructors.
+ public:
+   svd(length_type rows, length_type cols, storage_type ust, storage_type vst)
+     VSIP_THROW((std::bad_alloc))
+     : base_type(rows, cols, ust, vst)
+   {}
+ 
+   ~svd() VSIP_NOTHROW {}
+ 
+   // By-value solvers.
+ public:
+   template <typename Block0>
+   Vector<scalar_f>
+   decompose(Matrix<T, Block0> m)
+     VSIP_THROW((std::bad_alloc, computation_error))
+   {
+     Vector<scalar_f> dest(this->impl_order());
+     if (!this->impl_decompose(m, dest))
+       VSIP_IMPL_THROW(computation_error("svd::decompose"));
+     return dest;
+   }
+ 
+   template <mat_op_type       tr,
+ 	    product_side_type ps,
+ 	    typename          Block>
+   Matrix<T>
+   produ(const_Matrix<T, Block> b)
+     VSIP_THROW((std::bad_alloc, computation_error))
+   {
+     length_type q_rows = this->rows();
+     length_type q_cols = this->ustorage() == svd_uvfull ? this->rows() :
+                                                           this->impl_order();
+ 
+     length_type x_rows, x_cols;
+     if (ps == mat_lside)
+     {
+       x_rows = (tr == mat_ntrans) ? q_rows : q_cols;
+       x_cols = b.size(1);
+     }
+     else /* (ps == mat_rside) */
+     {
+       x_rows = b.size(0);
+       x_cols = (tr == mat_ntrans) ? q_cols : q_rows;
+     }
+     Matrix<T> x(x_rows, x_cols);
+     this->template impl_produ<tr, ps>(b, x);
+     return x;
+   }
+ 
+   template <mat_op_type       tr,
+ 	    product_side_type ps,
+ 	    typename          Block>
+   Matrix<T>
+   prodv(const_Matrix<T, Block> b)
+     VSIP_THROW((std::bad_alloc, computation_error))
+   { 
+     length_type vt_rows = this->vstorage() == svd_uvfull ? this->columns() :
+                                                            this->impl_order();
+     length_type vt_cols = this->columns();
+ 
+     length_type x_rows, x_cols;
+     if (ps == mat_lside)
+     {
+       x_rows = (tr == mat_ntrans) ? vt_cols : vt_rows;
+       x_cols = b.size(1);
+     }
+     else /* (ps == mat_rside) */
+     {
+       x_rows = b.size(0);
+       x_cols = (tr == mat_ntrans) ? vt_rows : vt_cols;
+     }
+     Matrix<T> x(x_rows, x_cols);
+     this->template impl_prodv<tr, ps>(b, x);
+     return x;
+   }
+ 
+   Matrix<T>
+   u(index_type low, index_type high)
+     VSIP_THROW((std::bad_alloc, computation_error))
+   {
+     assert(this->ustorage() == svd_uvpart && high <= this->impl_order() ||
+ 	   this->ustorage() == svd_uvfull && high <= this->rows());
+ 
+     Matrix<T> dest(this->rows(), high - low + 1);
+     if (!this->template impl_u(low, high, dest))
+       VSIP_IMPL_THROW(computation_error("svd::u"));
+     return dest;
+   }
+ 
+   Matrix<T>
+   v(index_type low, index_type high)
+     VSIP_THROW((std::bad_alloc, computation_error))
+   {
+     assert(this->vstorage() == svd_uvpart && high <= this->impl_order() ||
+ 	   this->vstorage() == svd_uvfull && high <= this->columns());
+ 
+     Matrix<T> dest(this->columns(), high - low + 1);
+     if (!this->template impl_v(low, high, dest))
+       VSIP_IMPL_THROW(computation_error("svd::v"));
+     return dest;
+   }
+ };
+ 
+ 
+ 
+ /***********************************************************************
+   Definitions
+ ***********************************************************************/
+ 
+ namespace impl
+ {
+ 
+ length_type inline
+ select_dim(storage_type st, length_type full, length_type part)
+ {
+   return (st == svd_uvfull) ? full :
+          (st == svd_uvpart) ? part : 0;
+ }
+ 
+ template <typename T,
+ 	  bool     Blocked>
+ Svd_impl<T, Blocked>::Svd_impl(
+   length_type  rows,
+   length_type  cols,
+   storage_type ust,
+   storage_type vst
+   )
+ VSIP_THROW((std::bad_alloc))
+   : m_          (rows),
+     n_          (cols),
+     p_          (std::min(m_, n_)),
+     ust_        (ust),
+     vst_        (vst),
+ 
+     data_       (m_, n_),
+     q_          (select_dim(ust_, m_, m_), select_dim(ust_, m_, p_)),
+     pt_         (select_dim(vst_, n_, p_), select_dim(vst_, n_, n_)),
+ 
+     tauq_       (p_),
+     taup_       (p_),
+     b_d_        (p_),
+     b_e_        (p_),
+ 
+     lwork_gebrd_((m_ + n_) * lapack::gebrd_blksize<T>(m_, n_)),
+     work_gebrd_ (lwork_gebrd_),
+     lwork_gbr_  (p_ * lapack::gbr_blksize<T>('Q', m_, m_, n_)),
+     work_gbr_   (lwork_gbr_)
+ {
+   assert(m_ > 0 && n_ > 0);
+   assert(ust_ == svd_uvnos || ust_ == svd_uvpart || ust_ == svd_uvfull);
+   assert(vst_ == svd_uvnos || vst_ == svd_uvpart || vst_ == svd_uvfull);
+ }
+ 
+ 
+ 
+ template <typename T,
+ 	  bool     Blocked>
+ Svd_impl<T, Blocked>::Svd_impl(Svd_impl const& sv)
+ VSIP_THROW((std::bad_alloc))
+   : m_          (sv.m_),
+     n_          (sv.n_),
+     p_          (sv.p_),
+     ust_        (sv.ust_),
+     vst_        (sv.vst_),
+ 
+     data_       (m_, n_),
+     q_          (select_dim(ust_, m_, m_), select_dim(ust_, m_, p_)),
+     pt_         (select_dim(vst_, n_, p_), select_dim(vst_, n_, n_)),
+ 
+     tauq_       (p_),
+     taup_       (p_),
+     b_d_        (p_),
+     b_e_        (p_),
+     lwork_gebrd_((m_ + n_) * lapack::gebrd_blksize<T>(m_, n_)),
+     work_gebrd_ (lwork_gebrd_),
+     lwork_gbr_  (p_ * lapack::gbr_blksize<T>('Q', m_, m_, n_)),
+     work_gbr_   (lwork_gbr_)
+ {
+   data_ = sv.data_;
+   q_    = sv.q_;
+   pt_   = sv.pt_;
+   for (index_type i=0; i<p_; ++i)
+   {
+     b_d_[i]  = sv.b_d_[i];
+     b_e_[i]  = sv.b_e_[i];
+     tauq_[i] = sv.tauq_[i];
+     taup_[i] = sv.taup_[i];
+   }
+ }
+ 
+ 
+ 
+ template <typename T,
+ 	  bool     Blocked>
+ Svd_impl<T, Blocked>::~Svd_impl()
+   VSIP_NOTHROW
+ {
+ }
+ 
+ 
+ 
+ /// Decompose matrix M into
+ ///
+ /// Return
+ ///   DEST contains M's singular values.
+ ///
+ /// Requires
+ ///   M to be a full rank, modifiable matrix of ROWS x COLS.
+ 
+ template <typename T,
+ 	  bool     Blocked>
+ template <typename Block0,
+ 	  typename Block1>
+ bool
+ Svd_impl<T, Blocked>::impl_decompose(
+   Matrix<T, Block0>              m,
+   Vector<vsip::scalar_f, Block1> dest)
+   VSIP_NOTHROW
+ {
+   assert(m.size(0) == m_ && m.size(1) == n_);
+   assert(dest.size() == p_);
+ 
+   int lwork   = lwork_gebrd_;
+ 
+   data_ = m;
+ 
+   // Step 1: Reduce general matrix A to bidiagonal form.
+   //
+   // If m >= n, then
+   //   A = Q_1 B_1 P'
+   // Where
+   //   Q_1 (m, n) orthogonal/unitary
+   //   B_1 (n, n) upper diagonal
+   //   P'  (n, n) orthogonal/unitary
+   //
+   // If m < n, then
+   //   A = Q_1 B_1 P'
+   // Where
+   //   Q_1 (m, m) orthogonal/unitary
+   //   B_1 (m, m) lower diagonal
+   //   P'  (m, n) orthogonal/unitary
+   {
+     Ext_data<data_block_type> ext(data_.block());
+ 
+     lapack::gebrd(m_, n_,
+ 		  ext.data(), ext.stride(1),	// A, lda
+ 		  &b_d_[0],			// diagonal of B
+ 		  &b_e_[0],			// off-diagonal of B
+ 		  &tauq_[0],
+ 		  &taup_[0],
+ 		  &work_gebrd_[0], lwork);
+     assert((length_type)lwork <= lwork_gebrd_);
+     // FLOPS:
+     //   scalar : (4/3)*n^2*(3*m-n) for m >= n
+     //          : (4/3)*m^2*(3*n-m) for m <  n
+     //   complex: 4*
+   }
+ 
+   
+   // Step 2: Generate real orthoganol/unitary matrices Q and P'
+ 
+   if (ust_ == svd_uvfull || ust_ == svd_uvpart)
+   {
+     // svd_uvfull: generate whole Q (m_, m_):
+     // svd_uvpart: generate first p_ columns of Q (m_, p_):
+ 
+     length_type cols = (ust_ == svd_uvfull) ? m_ : p_;
+ 
+     if (m_ >= n_)
+       q_(Domain<2>(m_, n_)) = data_;
+     else
+       q_ = data_(Domain<2>(m_, m_));
+ 
+     Ext_data<data_block_type> ext_q(q_.block());
+     lwork   = lwork_gbr_;
+     lapack::gbr('Q', m_, cols, n_,
+ 		ext_q.data(), ext_q.stride(1),	// A, lda
+ 		&tauq_[0],
+ 		&work_gbr_[0], lwork);
+   }
+ 
+ 
+   if (vst_ == svd_uvfull || vst_ == svd_uvpart)
+   {
+     // svd_uvfull: generate whole P' (n_, n_):
+     // svd_uvpart: generate first p_ rows of P' (p_, n_):
+ 
+     length_type rows = (vst_ == svd_uvfull) ? n_ : p_;
+ 
+     if (m_ >= n_)
+       pt_ = data_(Domain<2>(n_, n_));
+     else
+       pt_(Domain<2>(m_, n_)) = data_;
+ 
+     Ext_data<data_block_type> ext_pt(pt_.block());
+     lwork   = lwork_gbr_;
+     lapack::gbr('P', rows, n_, m_,
+ 		ext_pt.data(), ext_pt.stride(1),	// A, lda
+ 		&taup_[0],
+ 		&work_gbr_[0], lwork);
+   }
+ 
+ 
+   {
+     Ext_data<data_block_type> ext_q (q_.block());
+     Ext_data<data_block_type> ext_pt(pt_.block());
+ 
+     length_type nru    = (ust_ != svd_uvnos) ? m_ : 0;
+     T*          q_ptr  = (ust_ != svd_uvnos) ? ext_q.data()    : 0;
+     stride_type q_ld   = (ust_ != svd_uvnos) ? ext_q.stride(1) : 1;
+ 
+     length_type ncvt   = (vst_ != svd_uvnos) ? n_ : 0;
+     T*          pt_ptr = (vst_ != svd_uvnos) ? ext_pt.data()    : 0;
+     stride_type pt_ld  = (vst_ != svd_uvnos) ? ext_pt.stride(1) : 1;
+     
+     // Compute SVD of bidiagonal matrix B.
+ 
+     // Note: MKL says that work-size need only 4*(p_-1), 
+     //       however MKL 5.x needs 4*(p_).
+     vector_type work(4*p_);
+     char uplo = (m_ >= n_) ? 'U' : 'L';
+     lapack::bdsqr(uplo,
+ 		p_,	// Order of matrix B.
+ 		ncvt,	// Number of columns of VT (right singular vectors)
+ 		nru,	// Number of rows of U     (left  singular vectors)
+ 		0,	// Number of columns of C: 0 since no C supplied.
+ 		&b_d_[0],	//
+ 		&b_e_[0],	//
+ 	        pt_ptr, pt_ld,		// [p_ x ncvt]
+ 		q_ptr,  q_ld,		// [nru x p_]
+ 		0, 1,	// Not referenced since ncc = 0
+ 		&work[0]);
+     // Flops (scalar):
+     //  n^2 (singular values)
+     //  6n^2 * nru  (left singular vectors)	(complex 2*)
+     //  6n^2 * ncvt (right singular vectors)	(complex 2*)
+   }
+ 
+   for (index_type i=0; i<p_; ++i)
+     dest(i) = b_d_[i];
+ 
+   return true;
+ }
+ 
+ 
+ 
+ /// prod_uv() is a set of helper routines for produ() and prodv().
+ 
+ /// It is overloaded on Bool_type<Is_complex<T>::value> to handle
+ /// transpose and hermetian properly.  (Tranpose is defined for non-complex
+ /// T, but does not make sense.  Hermetion is only defined for complex
+ /// T).
+ 
+ template <mat_op_type       tr,
+ 	  product_side_type ps,
+ 	  typename          T,
+ 	  typename          Block0,
+ 	  typename          Block1,
+ 	  typename          Block2>
+ inline void
+ prod_uv(
+   const_Matrix<T, Block0> uv,
+   const_Matrix<T, Block1> b,
+   Matrix<T, Block2>       x,
+   Bool_type<false>         /*is_complex*/)
+ {
+   VSIP_IMPL_STATIC_ASSERT(Is_complex<T>::value == false);
+   VSIP_IMPL_STATIC_ASSERT(tr != mat_herm);
+ 
+   if (ps == mat_lside)
+   {
+     if (tr == mat_ntrans)
+     {
+       assert(b.size(0) == uv.size(1));
+       assert(x.size(0) == uv.size(0));
+       assert(b.size(1) == x.size(1));
+       x = prod(uv, b);
+     }
+     else if (tr == mat_trans)
+     {
+       assert(b.size(0) == uv.size(0));
+       assert(x.size(0) == uv.size(1));
+       assert(b.size(1) == x.size(1));
+       x = prod(trans(uv), b);
+     }
+     else if (tr == mat_herm)
+     {
+       assert(false);
+     }
+   }
+   else /* (ps == mat_rside) */
+   {
+     if (tr == mat_ntrans)
+     {
+       assert(b.size(1) == uv.size(0));
+       assert(x.size(1) == uv.size(1));
+       assert(b.size(0) == x.size(0));
+       x = prod(b, uv);
+     }
+     else if (tr == mat_trans)
+     {
+       assert(b.size(1) == uv.size(1));
+       assert(x.size(1) == uv.size(0));
+       assert(b.size(0) == x.size(0));
+       x = prod(b, trans(uv));
+     }
+     else if (tr == mat_herm)
+     {
+       assert(false);
+     }
+   }
+ }
+ 
+ 
+ 
+ template <mat_op_type       tr,
+ 	  product_side_type ps,
+ 	  typename          T,
+ 	  typename          Block0,
+ 	  typename          Block1,
+ 	  typename          Block2>
+ inline void
+ prod_uv(
+   const_Matrix<T, Block0> uv,
+   const_Matrix<T, Block1> b,
+   Matrix<T, Block2>       x,
+   Bool_type<true>         /*is_complex*/)
+ {
+   VSIP_IMPL_STATIC_ASSERT(Is_complex<T>::value == true);
+   VSIP_IMPL_STATIC_ASSERT(tr != mat_trans);
+ 
+   if (ps == mat_lside)
+   {
+     if (tr == mat_ntrans)
+     {
+       assert(b.size(0) == uv.size(1));
+       assert(x.size(0) == uv.size(0));
+       assert(b.size(1) == x.size(1));
+       x = prod(uv, b);
+     }
+     else if (tr == mat_trans)
+     {
+       assert(0);
+     }
+     else if (tr == mat_herm)
+     {
+       assert(b.size(0) == uv.size(0));
+       assert(x.size(0) == uv.size(1));
+       assert(b.size(1) == x.size(1));
+       x = prod(herm(uv), b);
+     }
+   }
+   else /* (ps == mat_rside) */
+   {
+     if (tr == mat_ntrans)
+     {
+       assert(b.size(1) == uv.size(0));
+       assert(x.size(1) == uv.size(1));
+       assert(b.size(0) == x.size(0));
+       x = prod(b, uv);
+     }
+     else if (tr == mat_trans)
+     {
+       assert(0);
+     }
+     else if (tr == mat_herm)
+     {
+       assert(b.size(1) == uv.size(1));
+       assert(x.size(1) == uv.size(0));
+       assert(b.size(0) == x.size(0));
+       x = prod(b, herm(uv));
+     }
+   }
+ }
+ 
+ 
+ 
+ /// Compute product of U and b
+ ///
+ /// If svd_uvpart: U is (m, p)
+ /// If svd_uvfull: U is (m, m)
+ ///
+ /// ustorage   | ps        | tr         | product | b (in) | x (out)
+ /// svd_uvpart | mat_lside | mat_ntrans | U b     | (p, s) | (m, s)
+ /// svd_uvpart | mat_lside | mat_trans  | U' b    | (m, s) | (p, s)
+ /// svd_uvpart | mat_lside | mat_herm   | U* b    | (m, s) | (p, s)
+ ///
+ /// svd_uvpart | mat_rside | mat_ntrans | b U     | (s, m) | (s, p)
+ /// svd_uvpart | mat_rside | mat_trans  | b U'    | (s, p) | (s, m)
+ /// svd_uvpart | mat_rside | mat_herm   | b U*    | (s, p) | (s, m)
+ ///
+ /// svd_uvfull | mat_lside | mat_ntrans | U b     | (m, s) | (m, s)
+ /// svd_uvfull | mat_lside | mat_trans  | U' b    | (m, s) | (m, s)
+ /// svd_uvfull | mat_lside | mat_herm   | U* b    | (m, s) | (m, s)
+ ///
+ /// svd_uvfull | mat_rside | mat_ntrans | b U     | (s, m) | (s, m)
+ /// svd_uvfull | mat_rside | mat_trans  | b U'    | (s, m) | (s, m)
+ /// svd_uvfull | mat_rside | mat_herm   | b U*    | (s, m) | (s, m)
+ 
+ template <typename T,
+ 	  bool     Blocked>
+ template <mat_op_type       tr,
+ 	  product_side_type ps,
+ 	  typename          Block0,
+ 	  typename          Block1>
+ bool
+ Svd_impl<T, Blocked>::impl_produ(
+   const_Matrix<T, Block0> b,
+   Matrix<T, Block1>       x)
+   VSIP_NOTHROW
+ {
+   prod_uv<tr, ps>(this->q_, b, x, Bool_type<Is_complex<T>::value>());
+   return true;
+ }
+ 
+ 
+ 
+ /// Compute product of V and b
+ ///
+ /// Note: product is with V, not V' (unless asked)
+ ///
+ /// If svd_uvpart: V is (n, p)
+ /// If svd_uvfull: V is (n, n)
+ ///
+ /// ustorage   | ps        | tr         | product | b (in) | x (out)
+ /// svd_uvpart | mat_lside | mat_ntrans | V b     | (p, s) | (n, s)
+ /// svd_uvpart | mat_lside | mat_trans  | V' b    | (n, s) | (p, s)
+ /// svd_uvpart | mat_lside | mat_herm   | V* b    | (n, s) | (p, s)
+ ///
+ /// svd_uvpart | mat_rside | mat_ntrans | b V     | (s, n) | (s, p)
+ /// svd_uvpart | mat_rside | mat_trans  | b V'    | (s, p) | (s, n)
+ /// svd_uvpart | mat_rside | mat_herm   | b V*    | (s, p) | (s, n)
+ ///
+ /// svd_uvfull | mat_lside | mat_ntrans | V b     | (n, s) | (n, s)
+ /// svd_uvfull | mat_lside | mat_trans  | V' b    | (n, s) | (n, s)
+ /// svd_uvfull | mat_lside | mat_herm   | V* b    | (n, s) | (n, s)
+ ///
+ /// svd_uvfull | mat_rside | mat_ntrans | b V     | (s, n) | (s, n)
+ /// svd_uvfull | mat_rside | mat_trans  | b V'    | (s, n) | (s, n)
+ /// svd_uvfull | mat_rside | mat_herm   | b V*    | (s, n) | (s, n)
+ 
+ template <typename T,
+ 	  bool     Blocked>
+ template <mat_op_type       tr,
+ 	  product_side_type ps,
+ 	  typename          Block0,
+ 	  typename          Block1>
+ bool
+ Svd_impl<T, Blocked>::impl_prodv(
+   const_Matrix<T, Block0> b,
+   Matrix<T, Block1>       x)
+   VSIP_NOTHROW
+ {
+   prod_uv<tr, ps>(trans_or_herm(this->pt_), b, x,
+ 		  Bool_type<Is_complex<T>::value>());
+   return true;
+ }
+ 
+ 
+ 
+ /// Return the submatrix U containing columns (low .. high) inclusive.
+ 
+ template <typename T,
+ 	  bool     Blocked>
+ template <typename Block>
+ bool
+ Svd_impl<T, Blocked>::impl_u(
+   index_type       low,
+   index_type       high,
+   Matrix<T, Block> u)
+   VSIP_NOTHROW
+ {
+   assert(ust_ == svd_uvpart && high < p_ || ust_ == svd_uvfull && high < m_);
+   assert(u.size(0) == m_);
+   assert(u.size(1) == high - low + 1);
+ 
+   u = q_(Domain<2>(m_, Domain<1>(low, 1, high-low+1)));
+ 
+   return true;
+ }
+ 
+ 
+ 
+ /// Return the submatrix V containing columns (low .. high) inclusive.
+ 
+ template <typename T,
+ 	  bool     Blocked>
+ template <typename Block>
+ bool
+ Svd_impl<T, Blocked>::impl_v(
+   index_type       low,
+   index_type       high,
+   Matrix<T, Block> v)
+   VSIP_NOTHROW
+ {
+   assert(vst_ == svd_uvpart && high < p_ || vst_ == svd_uvfull && high < n_);
+   assert(v.size(0) == n_);
+   assert(v.size(1) == high - low + 1);
+ 
+   v = trans_or_herm(pt_(Domain<2>(Domain<1>(low, 1, high-low+1), n_)));
+ 
+   return true;
+ }
+ 
+ } // namespace vsip::impl
+ 
+ } // namespace vsip
+ 
+ #endif // VSIP_IMPL_SOLVER_SVD_HPP
Index: src/vsip/impl/subblock.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/subblock.hpp,v
retrieving revision 1.33
diff -c -p -r1.33 subblock.hpp
*** src/vsip/impl/subblock.hpp	26 Sep 2005 20:11:05 -0000	1.33
--- src/vsip/impl/subblock.hpp	27 Sep 2005 16:28:28 -0000
*************** class Diag_block 
*** 1541,1547 ****
      }
    length_type size(dimension_type block_d, dimension_type d) const VSIP_NOTHROW
      { 
!       assert(dim == 1 && d == 0);
        return std::min( this->blk_->size(2, 0),
          this->blk_->size(2, 1) );
      }
--- 1541,1547 ----
      }
    length_type size(dimension_type block_d, dimension_type d) const VSIP_NOTHROW
      { 
!       assert(block_d == 1 && dim == 1 && d == 0);
        return std::min( this->blk_->size(2, 0),
          this->blk_->size(2, 1) );
      }
Index: tests/solver-common.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/solver-common.hpp,v
retrieving revision 1.4
diff -c -p -r1.4 solver-common.hpp
*** tests/solver-common.hpp	26 Sep 2005 20:11:05 -0000	1.4
--- tests/solver-common.hpp	27 Sep 2005 16:28:28 -0000
*************** struct Test_traits
*** 50,55 ****
--- 50,57 ----
    static T value2() { return T(0.5);  }
    static T value3() { return T(-0.5); }
    static T conj(T a) { return a; }
+ 
+   static vsip::mat_op_type const trans = vsip::mat_trans;
  };
  
  template <typename T>
*************** struct Test_traits<vsip::complex<T> >
*** 60,65 ****
--- 62,69 ----
    static vsip::complex<T> value2() { return vsip::complex<T>(0.5, 1); }
    static vsip::complex<T> value3() { return vsip::complex<T>(1, -1); }
    static vsip::complex<T> conj(vsip::complex<T> a) { return vsip::conj(a); }
+ 
+   static vsip::mat_op_type const trans = vsip::mat_herm;
  };
  
  
*************** prodh(
*** 174,177 ****
--- 178,236 ----
      }
  }
  
+ 
+ 
+ template <typename T,
+ 	  typename Block1,
+ 	  typename Block2>
+ void
+ compare_view(
+   vsip::const_Vector<T, Block1>           a,
+   vsip::const_Vector<T, Block2>           b,
+   typename vsip::impl::Scalar_of<T>::type thresh
+   )
+ {
+   typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
+ 
+   vsip::Index<1> idx;
+   scalar_type err = vsip::maxval((mag(a - b)
+ 				  / Precision_traits<scalar_type>::eps),
+ 				 idx);
+ 
+   if (err > thresh)
+   {
+     for (vsip::index_type r=0; r<a.size(0); ++r)
+ 	assert(equal(a.get(r), b.get(r)));
+   }
+ }
+ 
+ 
+ 
+ template <typename T,
+ 	  typename Block1,
+ 	  typename Block2>
+ void
+ compare_view(
+   vsip::const_Matrix<T, Block1>           a,
+   vsip::const_Matrix<T, Block2>           b,
+   typename vsip::impl::Scalar_of<T>::type thresh
+   )
+ {
+   typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
+ 
+   vsip::Index<2> idx;
+   scalar_type err = vsip::maxval((mag(a - b)
+ 				  / Precision_traits<scalar_type>::eps),
+ 				 idx);
+ 
+   if (err > thresh)
+   {
+     std::cout << "a = \n" << a;
+     std::cout << "b = \n" << b;
+     for (vsip::index_type r=0; r<a.size(0); ++r)
+       for (vsip::index_type c=0; c<a.size(1); ++c)
+ 	assert(equal(a.get(r, c), b.get(r, c)));
+   }
+ }
+ 
  #endif // VSIP_TESTS_SOLVER_COMMON_HPP
Index: tests/solver-svd.cpp
===================================================================
RCS file: tests/solver-svd.cpp
diff -N tests/solver-svd.cpp
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- tests/solver-svd.cpp	27 Sep 2005 16:28:29 -0000
***************
*** 0 ****
--- 1,545 ----
+ /* Copyright (c) 2005 by CodeSourcery, LLC.  All rights reserved. */
+ 
+ /** @file    tests/solver-svd.cpp
+     @author  Jules Bergmann
+     @date    2005-09-12
+     @brief   VSIPL++ Library: Unit tests SVD solver.
+ */
+ 
+ /***********************************************************************
+   Included Files
+ ***********************************************************************/
+ 
+ #include <cassert>
+ 
+ #include <vsip/initfin.hpp>
+ #include <vsip/support.hpp>
+ #include <vsip/tensor.hpp>
+ #include <vsip/solvers.hpp>
+ 
+ #include "test.hpp"
+ #include "test-precision.hpp"
+ #include "test-random.hpp"
+ #include "solver-common.hpp"
+ 
+ #define VERBOSE  0
+ #define DO_FULL  0
+ 
+ #if VERBOSE
+ #  include <iostream>
+ #  include "output.hpp"
+ #  include "extdata-output.hpp"
+ #endif
+ 
+ using namespace std;
+ using namespace vsip;
+ 
+ 
+ 
+ /***********************************************************************
+   Support
+ ***********************************************************************/
+ 
+ template <typename T,
+ 	  typename Block>
+ typename vsip::impl::Scalar_of<T>::type
+ norm_1(const_Vector<T, Block> v)
+ {
+   return sumval(mag(v));
+ }
+ 
+ 
+ 
+ /// Matrix norm-1
+ 
+ template <typename T,
+ 	  typename Block>
+ typename vsip::impl::Scalar_of<T>::type
+ norm_1(const_Matrix<T, Block> m)
+ {
+   typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
+   scalar_type norm = sumval(mag(m.col(0)));
+ 
+   for (index_type j=1; j<m.size(1); ++j)
+   {
+     norm = std::max(norm, sumval(mag(m.col(j))));
+   }
+ 
+   return norm;
+ }
+ 
+ 
+ 
+ /// Matrix norm-infinity
+ 
+ template <typename T,
+ 	  typename Block>
+ typename vsip::impl::Scalar_of<T>::type
+ norm_inf(const_Matrix<T, Block> m)
+ {
+   return norm_1(m.transpose());
+ }
+ 
+ 
+ 
+ /***********************************************************************
+   svd function tests
+ ***********************************************************************/
+ 
+ template <typename              T,
+ 	  typename              Block0,
+ 	  typename              Block1,
+ 	  typename              Block2,
+ 	  typename              Block3>
+ void
+ apply_svd(
+   svd<T, by_reference>&    sv,
+   Matrix<T, Block0>        a,
+   Vector<scalar_f, Block1> sv_s,
+   Matrix<T, Block2>        sv_u,
+   Matrix<T, Block3>        sv_v)
+ {
+   length_type m = sv.rows();
+   length_type n = sv.columns();
+   length_type p = std::min(m, n);
+   length_type u_columns = sv.ustorage() == svd_uvfull ? m : p;
+   length_type v_rows    = sv.vstorage() == svd_uvfull ? n : p;
+ 
+   sv.decompose(a, sv_s);
+   if (sv.ustorage() != svd_uvnos)
+     sv.u(0, u_columns-1, sv_u);
+   if (sv.vstorage() != svd_uvnos)
+     sv.v(0, v_rows-1,    sv_v);
+ }
+ 
+ 
+ 
+ template <typename              T,
+ 	  typename              Block0,
+ 	  typename              Block1,
+ 	  typename              Block2,
+ 	  typename              Block3>
+ void
+ apply_svd(
+   svd<T, by_value>&        sv,
+   Matrix<T, Block0>        a,
+   Vector<scalar_f, Block1> sv_s,
+   Matrix<T, Block2>        sv_u,
+   Matrix<T, Block3>        sv_v)
+ {
+   length_type m = sv.rows();
+   length_type n = sv.columns();
+   length_type p = std::min(m, n);
+   length_type u_columns = sv.ustorage() == svd_uvfull ? m : p;
+   length_type v_rows    = sv.vstorage() == svd_uvfull ? n : p;
+ 
+   sv_s = sv.decompose(a);
+   if (sv.ustorage() != svd_uvnos)
+     sv_u = sv.u(0, u_columns-1);
+   if (sv.vstorage() != svd_uvnos)
+     sv_v = sv.v(0, v_rows-1);
+ }
+ 
+ 
+ 
+ template <mat_op_type       tr,
+ 	  product_side_type ps,
+ 	  typename          T,
+ 	  typename          Block0,
+ 	  typename          Block1>
+ void
+ apply_svd_produ(
+   svd<T, by_reference>&    sv,
+   const_Matrix<T, Block0>  b,
+   Matrix<T, Block1>        produ)
+ {
+   sv.template produ<tr, ps>(b, produ);
+ }
+ 
+ 
+ 
+ template <mat_op_type       tr,
+ 	  product_side_type ps,
+ 	  typename          T,
+ 	  typename          Block0,
+ 	  typename          Block1>
+ void
+ apply_svd_produ(
+   svd<T, by_value>&        sv,
+   const_Matrix<T, Block0>  b,
+   Matrix<T, Block1>        produ)
+ {
+   produ = sv.template produ<tr, ps>(b);
+ }
+ 
+ 
+ 
+ template <mat_op_type       tr,
+ 	  product_side_type ps,
+ 	  typename          T,
+ 	  typename          Block0,
+ 	  typename          Block1>
+ void
+ apply_svd_prodv(
+   svd<T, by_reference>&    sv,
+   const_Matrix<T, Block0>  b,
+   Matrix<T, Block1>        prodv)
+ {
+   sv.template prodv<tr, ps>(b, prodv);
+ }
+ 
+ 
+ 
+ template <mat_op_type       tr,
+ 	  product_side_type ps,
+ 	  typename          T,
+ 	  typename          Block0,
+ 	  typename          Block1>
+ void
+ apply_svd_prodv(
+   svd<T, by_value>&        sv,
+   const_Matrix<T, Block0>  b,
+   Matrix<T, Block1>        prodv)
+ {
+   prodv = sv.template prodv<tr, ps>(b);
+ }
+ 
+ 
+ 
+ template <return_mechanism_type RtM,
+ 	  typename              T,
+ 	  typename              Block>
+ void
+ test_svd(
+   storage_type     ustorage,
+   storage_type     vstorage,
+   Matrix<T, Block> a,
+   length_type      loop)
+ {
+   using vsip::impl::trans_or_herm;
+   typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
+ 
+   length_type m = a.size(0);
+   length_type n = a.size(1);
+ 
+   length_type p = std::min(m, n);
+   assert(m > 0 && n > 0);
+ 
+   length_type u_cols = ustorage == svd_uvfull ? m : p;
+   length_type v_cols = vstorage == svd_uvfull ? n : p;
+ 
+   Vector<float> sv_s(p);		// singular values
+   Matrix<T>     sv_u(m, u_cols);	// U matrix
+   Matrix<T>     sv_v(n, v_cols);	// V matrix
+ 
+   svd<T, RtM> sv(m, n, ustorage, vstorage);
+ 
+   assert(sv.rows()     == m);
+   assert(sv.columns()  == n);
+   assert(sv.ustorage() == ustorage);
+   assert(sv.vstorage() == vstorage);
+ 
+   for (index_type i=0; i<loop; ++i)
+   {
+     apply_svd(sv, a, sv_s, sv_u, sv_v);
+ 
+     // Check that sv_sv is non-increasing.
+     for (index_type i=0; i<p-1; ++i)
+       assert(sv_s(i) >= sv_s(i+1));
+ 
+     // Check that product of u, s, v equals a.
+     if (ustorage != svd_uvnos && vstorage != svd_uvnos)
+     {
+       Matrix<T> sv_sm(m, n, T());
+       sv_sm.diag() = sv_s;
+ 
+       Matrix<T> chk(m, n);
+       if (ustorage == svd_uvfull && vstorage == svd_uvfull)
+       {
+ 	chk = prod(prod(sv_u, sv_sm), trans_or_herm(sv_v));
+       }
+       else
+       {
+ 	chk = prod(prod(sv_u(Domain<2>(m, p)), sv_sm(Domain<2>(p, p))),
+ 		   trans_or_herm(sv_v(Domain<2>(n, p))));
+       }
+ 
+       Index<2> idx;
+       scalar_type err = maxval((mag(chk - a)
+ 			      / Precision_traits<scalar_type>::eps),
+ 			     idx);
+       scalar_type errx = maxval(mag(chk - a), idx);
+       scalar_type norm_est = std::sqrt(norm_1(a) * norm_inf(a));
+       
+       err  = err / norm_est;
+       errx = errx / norm_est;
+ 
+ #if VERBOSE
+       cout << "a    = " << endl << a  << endl;
+       cout << "sv_s = " << endl << sv_s << endl;
+       cout << "sv_u = " << endl << sv_u << endl;
+       cout << "sv_v = " << endl << sv_v << endl;
+       cout << "chk  = " << endl << chk << endl;
+       cout << "err = " << err << "   "
+ 	   << "norm = " << norm_est << endl;
+       cout << "eps = " << Precision_traits<scalar_type>::eps << endl;
+       cout << "p:" << p << "   "
+ 	   << "err = " << err   << "   "
+ 	   << "errx = " << errx << endl;
+ #endif
+ 
+       if (err > 5.0)
+       {
+ 	for (index_type r=0; r<m; ++r)
+ 	  for (index_type c=0; c<n; ++c)
+ 	    assert(equal(chk(r, c), a(r, c)));
+       }
+     }
+ 
+     const length_type chk_single_uv = 2;
+ 
+     if (ustorage != svd_uvnos)
+     {
+       length_type u_cols = (ustorage == svd_uvfull) ? m : p;
+ 
+       Matrix<T> in_m (m,      m,    T());
+       Matrix<T> in_p (u_cols, u_cols, T());
+ 
+       Matrix<T> pu_nl(m,      u_cols, T());
+       Matrix<T> pu_tl(u_cols, m,      T());
+       Matrix<T> pu_nr(m,      u_cols, T());
+       Matrix<T> pu_tr(u_cols, m,      T());
+ 
+       Vector<T> zero_m(m, T());
+       Vector<T> zero_p(u_cols, T());
+ 
+       index_type pos = 0;
+       for (index_type i=0; i<chk_single_uv; ++i, pos = (17*pos+5)%u_cols)
+       {
+ 	in_m(pos, pos) = T(1);
+ 	in_p(pos, pos) = T(1);
+       
+ 	apply_svd_produ<mat_ntrans,            mat_lside>(sv, in_p, pu_nl);
+ 	apply_svd_produ<Test_traits<T>::trans, mat_lside>(sv, in_m, pu_tl);
+ 	apply_svd_produ<mat_ntrans,            mat_rside>(sv, in_m, pu_nr);
+ 	apply_svd_produ<Test_traits<T>::trans, mat_rside>(sv, in_p, pu_tr);
+ 
+ 	compare_view(pu_nl.col(pos), sv_u.col(pos), 5.0);
+ 	compare_view(pu_tl.col(pos), trans_or_herm(sv_u).col(pos), 5.0);
+ 	compare_view(pu_nr.row(pos), sv_u.row(pos), 5.0);
+ 	compare_view(pu_tr.row(pos), trans_or_herm(sv_u).row(pos), 5.0);
+ 
+ 	for (index_type j=0; j<u_cols; ++j)
+ 	{
+ 	  if (j != pos)
+ 	  {
+ 	    compare_view(pu_nl.col(j), zero_m, 5.0);
+ 	    compare_view(pu_tl.col(j), zero_p, 5.0);
+ 	    compare_view(pu_nr.row(j), zero_p, 5.0);
+ 	    compare_view(pu_tr.row(j), zero_m, 5.0);
+ 	  }
+ 	}
+ 	in_m(pos, pos) = T();
+ 	in_p(pos, pos) = T();
+       }
+     }
+ 
+     if (vstorage != svd_uvnos)
+     {
+       length_type v_cols = (vstorage == svd_uvfull) ? n : p;
+ 
+       Matrix<T> in_p (v_cols, v_cols, T());
+       Matrix<T> in_n (n,      n,      T());
+ 
+       Matrix<T> pv_nl(n,      v_cols, T());
+       Matrix<T> pv_tl(v_cols, n,      T());
+       Matrix<T> pv_nr(n,      v_cols, T());
+       Matrix<T> pv_tr(v_cols, n,      T());
+       
+       Vector<T> zero_n(n,      T());
+       Vector<T> zero_p(v_cols, T());
+       
+       index_type pos = 0;
+       for (index_type i=0; i<chk_single_uv; ++i, pos = (17*pos+5)%v_cols)
+       {
+ 	in_p(pos, pos) = T(1);
+ 	in_n(pos, pos) = T(1);
+       
+ 	apply_svd_prodv<mat_ntrans,            mat_lside>(sv, in_p, pv_nl);
+ 	apply_svd_prodv<Test_traits<T>::trans, mat_lside>(sv, in_n, pv_tl);
+ 	apply_svd_prodv<mat_ntrans,            mat_rside>(sv, in_n, pv_nr);
+ 	apply_svd_prodv<Test_traits<T>::trans, mat_rside>(sv, in_p, pv_tr);
+ 
+ 	compare_view(pv_nl.col(pos), sv_v.col(pos), 5.0);
+ 	compare_view(pv_tl.col(pos), trans_or_herm(sv_v).col(pos), 5.0);
+ 	compare_view(pv_nr.row(pos), sv_v.row(pos), 5.0);
+ 	compare_view(pv_tr.row(pos), trans_or_herm(sv_v).row(pos), 5.0);
+ 	
+ 	for (index_type j=0; j<v_cols; ++j)
+ 	{
+ 	  if (j != pos)
+ 	  {
+ 	    compare_view(pv_nl.col(j), zero_n, 5.0);
+ 	    compare_view(pv_tl.col(j), zero_p, 5.0);
+ 	    compare_view(pv_nr.row(j), zero_p, 5.0);
+ 	    compare_view(pv_tr.row(j), zero_n, 5.0);
+ 	  }
+ 	}
+ 	in_p(pos, pos) = T();
+ 	in_n(pos, pos) = T();
+       }
+     }
+ 
+     // Solver a different problem next iteration.
+     a(0, 0) = a(0, 0) + T(1);
+   }
+ }
+ 
+ 
+ 
+ // Description:
+ 
+ template <return_mechanism_type RtM,
+ 	  typename              T>
+ void
+ test_svd_ident(
+   storage_type ustorage,
+   storage_type vstorage,
+   length_type  m,
+   length_type  n,
+   length_type  loop)
+ {
+   length_type p = std::min(m, n);
+   assert(m > 0 && n > 0);
+ 
+   Matrix<T>     a(m, n);
+   Vector<float> sv_s(p);	// singular values
+   Matrix<T>     sv_u(m, m);	// U matrix
+   Matrix<T>     sv_v(n, n);	// V matrix
+ 
+   // Setup a.
+   a        = T();
+   a.diag() = T(1);
+   if (p > 0) a(0, 0)  = Test_traits<T>::value1();
+   if (p > 2) a(2, 2)  = Test_traits<T>::value2();
+   if (p > 3) a(3, 3)  = Test_traits<T>::value3();
+ 
+   test_svd<RtM>(ustorage, vstorage, a, loop);
+ }
+ 
+ 
+ 
+ template <return_mechanism_type RtM,
+ 	  typename              T>
+ void
+ test_svd_rand(
+   storage_type ustorage,
+   storage_type vstorage,
+   length_type  m,
+   length_type  n,
+   length_type  loop)
+ {
+   typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
+ 
+   length_type p = std::min(m, n);
+   assert(m > 0 && n > 0);
+ 
+   Matrix<T>     a(m, n);
+   Vector<float> sv_s(p);	// singular values
+   Matrix<T>     sv_u(m, m);	// U matrix
+   Matrix<T>     sv_v(n, n);	// U matrix
+ 
+   // Setup a.
+   randm(a);
+ 
+   test_svd<RtM>(ustorage, vstorage, a, loop);
+ }
+ 
+ 
+ 
+ template <return_mechanism_type RtM,
+ 	  typename              T>
+ void svd_cases(
+   storage_type ustorage,
+   storage_type vstorage,
+   length_type  loop)
+ {
+   test_svd_ident<RtM, T>(ustorage, vstorage, 1, 1, loop);
+   test_svd_ident<RtM, T>(ustorage, vstorage, 1, 7, loop);
+   test_svd_ident<RtM, T>(ustorage, vstorage, 9, 1, loop);
+ 
+   test_svd_ident<RtM, T>(ustorage, vstorage, 5,   5, loop);
+   test_svd_ident<RtM, T>(ustorage, vstorage, 16,  5, loop);
+   test_svd_ident<RtM, T>(ustorage, vstorage, 3,  20, loop);
+ 
+   test_svd_rand<RtM, T>(ustorage, vstorage, 5, 5, loop);
+   test_svd_rand<RtM, T>(ustorage, vstorage, 5, 3, loop);
+   test_svd_rand<RtM, T>(ustorage, vstorage, 3, 5, loop);
+ #if DO_FULL
+   test_svd_rand<RtM, T>(ustorage, vstorage, 17, 5, loop);
+   test_svd_rand<RtM, T>(ustorage, vstorage, 5, 17, loop);
+   test_svd_rand<RtM, T>(ustorage, vstorage, 17, 19, loop);
+   test_svd_rand<RtM, T>(ustorage, vstorage, 25, 27, loop);
+   test_svd_rand<RtM, T>(ustorage, vstorage, 32, 32, loop);
+   test_svd_rand<RtM, T>(ustorage, vstorage, 8, 32, loop);
+   test_svd_rand<RtM, T>(ustorage, vstorage, 32, 10, loop);
+ #endif
+ }
+ 
+ 
+ 
+ template <return_mechanism_type RtM>
+ void
+ svd_types(
+   storage_type ustorage,
+   storage_type vstorage,
+   length_type  loop)
+ {
+   svd_cases<RtM, float>(ustorage, vstorage, loop);
+   svd_cases<RtM, double>(ustorage, vstorage, loop);
+   svd_cases<RtM, complex<float> >(ustorage, vstorage, loop);
+   svd_cases<RtM, complex<double> >(ustorage, vstorage, loop);
+ }
+ 
+ 
+ 
+ template <return_mechanism_type RtM>
+ void
+ svd_storage(
+   length_type  loop)
+ {
+   svd_types<RtM>(svd_uvfull, svd_uvfull, loop);
+   svd_types<RtM>(svd_uvpart, svd_uvfull, loop);
+   svd_types<RtM>(svd_uvnos,  svd_uvfull, loop);
+   svd_types<RtM>(svd_uvfull, svd_uvpart, loop);
+   svd_types<RtM>(svd_uvpart, svd_uvpart, loop);
+   svd_types<RtM>(svd_uvnos,  svd_uvpart, loop);
+   svd_types<RtM>(svd_uvfull, svd_uvnos, loop);
+   svd_types<RtM>(svd_uvpart, svd_uvnos, loop);
+   svd_types<RtM>(svd_uvnos,  svd_uvnos, loop);
+ }
+   
+ 
+ 
+ /***********************************************************************
+   Main
+ ***********************************************************************/
+ 
+ template <> float  Precision_traits<float>::eps = 0.0;
+ template <> double Precision_traits<double>::eps = 0.0;
+ 
+ 
+ 
+ int
+ main(int argc, char** argv)
+ {
+   vsipl init(argc, argv);
+ 
+   Precision_traits<float>::compute_eps();
+   Precision_traits<double>::compute_eps();
+ 
+   length_type loop = 2;
+ 
+   svd_storage<by_reference>(loop);
+   svd_storage<by_value>    (loop);
+ }