Actions
|
|
[ Date Prev][ Date Next][ Thread Prev][ Thread Next][ Date Index][ Thread Index]
Re: [vsipl++] [patch] matvec: outer, gem, cumsum
- To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
- Subject: Re: [vsipl++] [patch] matvec: outer, gem, cumsum
- From: Don McCoy <don@xxxxxxxxxxxxxxxx>
- Date: Mon, 03 Oct 2005 23:58:40 -0600
Jules Bergmann wrote:
Don, Looks good. Please check it in, modulo the two comments below.
thanks, -- Jules
Applied and checked in.
Thanks,
--
Don McCoy
CodeSourcery, LLC
Index: ChangeLog
===================================================================
RCS file: /home/cvs/Repository/vpp/ChangeLog,v
retrieving revision 1.285
diff -c -p -r1.285 ChangeLog
*** ChangeLog 3 Oct 2005 12:49:41 -0000 1.285
--- ChangeLog 4 Oct 2005 05:47:27 -0000
***************
*** 1,3 ****
--- 1,10 ----
+ 2005-10-03 Don McCoy <don@xxxxxxxxxxxxxxxx>
+
+ * src/vsip/impl/matvec.hpp: added outer product, gemp,
+ gems and cumsum.
+ * tests/matvec.cpp: added tests for gemp, gems and
+ cumsum which are not covered in ref-impl tests.
+
2005-10-03 Jules Bergmann <jules@xxxxxxxxxxxxxxxx>
Work arounds for ICC 9.0 compilation errors.
Index: src/vsip/impl/matvec.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/matvec.hpp,v
retrieving revision 1.3
diff -c -p -r1.3 matvec.hpp
*** src/vsip/impl/matvec.hpp 27 Sep 2005 22:44:41 -0000 1.3
--- src/vsip/impl/matvec.hpp 4 Oct 2005 05:47:27 -0000
*************** kron( T0 alpha, Matrix<T1, Block1> v, Ma
*** 94,104 ****
/// Class to perform transpose or hermetian (conjugate-transpose),
/// depending on value type.
/// Primary case - perform transpose.
-
template <typename T,
typename Block>
struct Trans_or_herm
--- 94,133 ----
+ template <typename T0, typename T1, typename T2, typename T3, typename T4,
+ typename Block1, typename Block2, typename Block4>
+ void
+ gemp( T0 alpha, const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B, T3 beta, Matrix<T4, Block4> C)
+ {
+ assert( A.size(0) == C.size(0) );
+ assert( B.size(1) == C.size(1) );
+ assert( A.size(1) == B.size(0) );
+
+ for ( index_type i = A.size(0); i-- > 0; )
+ for ( index_type j = B.size(1); j-- > 0; )
+ C.put(i, j, alpha * dot( A.row(i), B.col(j) ) + beta * C.get(i, j));
+ }
+
+
+ template <typename T0, typename T1, typename T3, typename T4,
+ typename Block1, typename Block4>
+ void
+ gems( T0 alpha, const_Matrix<T1, Block1> A, T3 beta, Matrix<T4, Block4> C)
+ {
+ assert( A.size(0) == C.size(0) );
+ assert( A.size(1) == C.size(1) );
+ C = alpha * A + beta * C;
+ }
+
+
+
+
+
/// Class to perform transpose or hermetian (conjugate-transpose),
/// depending on value type.
/// Primary case - perform transpose.
template <typename T,
typename Block>
struct Trans_or_herm
*************** struct Trans_or_herm
*** 113,119 ****
};
/// Complex specialization - perform hermetian.
-
template <typename T,
typename Block>
struct Trans_or_herm<complex<T>, Block>
--- 142,147 ----
*************** struct Trans_or_herm<complex<T>, Block>
*** 131,140 ****
}
};
-
-
/// Perform transpose or hermetian, depending on value type.
-
template <typename T,
typename Block>
inline
--- 159,165 ----
*************** trans_or_herm(const_Matrix<T, Block> m)
*** 144,149 ****
--- 169,354 ----
return Trans_or_herm<T, Block>::exec(m);
};
+
+
+ // generalized class used to invoke the correct matrix operator
+ template <mat_op_type OpT,
+ typename T,
+ typename Block>
+ struct Apply_mat_op;
+
+ // partial specializations:
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_ntrans, T, Block>
+ {
+ typedef const_Matrix<T, Block> result_type;
+
+ static result_type
+ exec(const_Matrix<T, Block> m) VSIP_NOTHROW
+ {
+ return m;
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_trans, T, Block>
+ {
+ typedef typename const_Matrix<T, Block>::transpose_type result_type;
+
+ static result_type
+ exec(const_Matrix<T, Block> m) VSIP_NOTHROW
+ {
+ return m.transpose();
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_herm, T, Block>
+ {
+ typedef typename const_Matrix<T, Block>::transpose_type result_type;
+
+ static result_type
+ exec(const_Matrix<T, Block> m) VSIP_NOTHROW
+ {
+ return impl::trans_or_herm(m);
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_herm, complex<T>, Block>
+ {
+ typedef typename const_Matrix<complex<T>, Block>::transpose_type
+ transpose_type;
+ typedef impl::Unary_func_view<impl::conj_functor, transpose_type>
+ functor_type;
+ typedef typename functor_type::result_type result_type;
+
+ static result_type
+ exec(const_Matrix<complex<T>, Block> m) VSIP_NOTHROW
+ {
+ return impl::trans_or_herm(m);
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_conj, T, Block>
+ {
+ typedef const_Matrix<T, Block> result_type;
+
+ static result_type
+ exec(const_Matrix<T, Block> m) VSIP_NOTHROW
+ {
+ return m;
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_conj, complex<T>, Block>
+ {
+ typedef impl::Unary_func_view<impl::conj_functor,
+ const_Matrix<complex<T>, Block> > functor_type;
+ typedef typename functor_type::result_type result_type;
+
+ static result_type
+ exec(const_Matrix<complex<T>, Block> m) VSIP_NOTHROW
+ {
+ return conj(m);
+ }
+ };
+
+
+ // convenience function to use Apply_mat_op:
+ template <mat_op_type OpT,
+ typename T,
+ typename Block>
+ typename Apply_mat_op<OpT, T, Block>::result_type
+ apply_mat_op(const_Matrix<T, Block> m)
+ {
+ return Apply_mat_op<OpT, T, Block>::exec(m);
+ }
+
+
+
+ template <dimension_type d,
+ typename T0,
+ typename T1,
+ typename Block0,
+ typename Block1>
+ void
+ cumsum(
+ const_Vector<T0, Block0> v,
+ Vector<T1, Block1> w)
+ VSIP_NOTHROW
+ {
+ // Effects: w has values equaling the cumulative sum of values in v.
+ //
+ // If View is Vector, d is ignored and, for
+ // 0 <= i < v.size(),
+ // w.get(i) equals the sum over 0 <= j <= i of v.get(j)
+ assert( v.size() == w.size() );
+
+ T1 sum = T0();
+ for ( index_type i = 0; i < v.size(); ++i )
+ {
+ sum += v.get(i);
+ w.put(i, sum);
+ }
+ }
+
+
+ template <dimension_type d,
+ typename T0,
+ typename T1,
+ typename Block0,
+ typename Block1>
+ void
+ cumsum(
+ const_Matrix<T0, Block0> v,
+ Matrix<T1, Block1> w)
+ VSIP_NOTHROW
+ {
+ if ( d == 0 )
+ {
+ // If View is Matrix and d == 0, then, for
+ // 0 <= m < v.size(0) and 0 <= i < v.size(1),
+ // w.get(m, i) equals the sum over 0 <= j <= i of v.get(m, j).
+
+ for ( index_type m = 0; m < v.size(0); ++m )
+ {
+ T1 sum = T0();
+ for ( index_type i = 0; i < v.size(1); ++i )
+ {
+ sum += v.get(m, i);
+ w.put(m, i, sum);
+ }
+ }
+ }
+ else
+ if ( d == 1 )
+ {
+ // If View is Matrix and d == 1, then, for
+ // 0 <= i < v.size(0) and 0 <= n < v.size(1),
+ // w.get(i, n) equals the sum over 0 <= j <= i of v.get(j, n).
+
+ for ( index_type n = 0; n < v.size(1); ++n )
+ {
+ T1 sum = T0();
+ for ( index_type i = 0; i < v.size(0); ++i )
+ {
+ sum += v.get(i, n);
+ w.put(i, n, sum);
+ }
+ }
+ }
+ }
+
+
} // namespace impl
*************** kron( T0 alpha, const_View<T1, Block1> v
*** 228,233 ****
--- 433,558 ----
}
+ // Outer product [math.matvec.outer]
+
+ /// outer product of two scalar vectors
+ template <typename T0,
+ typename T1,
+ typename T2,
+ typename Block1,
+ typename Block2>
+ const_Matrix<typename Promotion<T0, typename Promotion<T1, T2>::type>::type>
+ outer( T0 alpha, const_Vector<T1, Block1> v, const_Vector<T2, Block2> w )
+ VSIP_NOTHROW
+ {
+ typedef Matrix<typename Promotion<T0,
+ typename Promotion<T1, T2>::type>::type> return_type;
+ return_type r( v.size(), w.size(), alpha );
+
+ for ( index_type i = v.size(); i-- > 0; )
+ for ( index_type j = w.size(); j-- > 0; )
+ r.put( i, j, alpha * v.get(i) * w.get(j) );
+
+ return r;
+ }
+
+ /// outer product of two complex vectors
+ template <typename T0,
+ typename T1,
+ typename T2,
+ typename Block1,
+ typename Block2>
+ const_Matrix<typename Promotion<T0,
+ typename Promotion<std::complex<T1>, std::complex<T2> >::type>::type>
+ outer( T0 alpha, const_Vector<std::complex<T1>, Block1> v,
+ const_Vector<std::complex<T2>, Block2> w )
+ VSIP_NOTHROW
+ {
+ typedef Matrix<typename Promotion<T0,
+ typename Promotion<std::complex<T1>, std::complex<T2> >::type
+ >::type> return_type;
+ return_type r( v.size(), w.size(), alpha );
+
+ for ( index_type i = v.size(); i-- > 0; )
+ for ( index_type j = w.size(); j-- > 0; )
+ r.put( i, j, alpha * v.get(i) * conj(w.get(j)) );
+
+ return r;
+ }
+
+
+ // Generalized Matrix operations [math.matvec.gem]
+
+ /// generalized matrix product
+ template <mat_op_type OpA,
+ mat_op_type OpB,
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename Block1,
+ typename Block2,
+ typename Block4>
+ void
+ gemp(
+ T0 alpha,
+ const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B,
+ T3 beta,
+ Matrix<T4, Block4> C)
+ VSIP_NOTHROW
+ {
+ // equivalent to C = alpha * OpA(A) * OpB(B) + beta * C
+ impl::gemp( alpha,
+ impl::apply_mat_op<OpA>(A),
+ impl::apply_mat_op<OpB>(B),
+ beta,
+ C );
+ }
+
+
+ /// Generalized matrix sum
+ template <mat_op_type OpA,
+ typename T0,
+ typename T1,
+ typename T3,
+ typename T4,
+ typename Block1,
+ typename Block4>
+ void
+ gems(
+ T0 alpha,
+ const_Matrix<T1, Block1> A,
+ T3 beta,
+ Matrix<T4, Block4> C)
+ VSIP_NOTHROW
+ {
+ impl::gems( alpha,
+ impl::apply_mat_op<OpA>(A),
+ beta,
+ C );
+ }
+
+
+ // Miscellaneous functions [math.matvec.misc]
+
+ /// cumulative sum
+ template <dimension_type d,
+ typename T0,
+ typename T1,
+ template <typename, typename> class const_View,
+ template <typename, typename> class View,
+ typename Block0,
+ typename Block1>
+ void
+ cumsum(
+ const_View<T0, Block0> v,
+ View<T1, Block1> w)
+ VSIP_NOTHROW
+ {
+ impl::cumsum<d>(v, w);
+ }
} // namespace vsip
Index: tests/matvec.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/matvec.cpp,v
retrieving revision 1.1
diff -c -p -r1.1 matvec.cpp
*** tests/matvec.cpp 19 Sep 2005 21:06:46 -0000 1.1
--- tests/matvec.cpp 4 Oct 2005 05:47:27 -0000
***************
*** 11,17 ****
***********************************************************************/
#include <cassert>
- #include <iostream>
#include <vsip/initfin.hpp>
#include <vsip/support.hpp>
--- 11,16 ----
***************
*** 19,43 ****
#include <vsip/math.hpp>
#include "test.hpp"
#include "output.hpp"
using namespace std;
using namespace vsip;
/***********************************************************************
Main
***********************************************************************/
-
int
main(int argc, char** argv)
{
vsipl init(argc, argv);
-
// Test Matrix-Matrix Kronecker
Matrix<scalar_f>
--- 18,349 ----
#include <vsip/math.hpp>
#include "test.hpp"
+ #include "test-random.hpp"
#include "output.hpp"
using namespace std;
using namespace vsip;
+ /***********************************************************************
+ Definitions
+ ***********************************************************************/
+
+
+ template <typename T>
+ void
+ Check_gem_results( Matrix<T> actual, Matrix<T> expected )
+ {
+ for ( index_type row = 0; row < actual.size(0); ++row )
+ for ( index_type col = 0; col < actual.size(1); ++col )
+ assert( equal( actual.get(row, col), expected.get(row, col) ) );
+ }
+
+
+ template <typename T>
+ void
+ Test_gemp( T alpha, T beta, length_type M, length_type P, length_type N )
+ {
+ Matrix<T> a (M, P);
+ Matrix<T> b (P, N);
+ Matrix<T> c (M, N);
+ Matrix<T> d (M, N);
+
+ Matrix<T> a_t (P, M);
+ Matrix<T> b_t (N, P);
+
+ // fill in unique values for each element of a, b and c
+ randm(a);
+ randm(b);
+ randm(c);
+ a_t = a.transpose();
+ b_t = b.transpose();
+
+
+ // compute the expected result for d
+ index_type row;
+ index_type col;
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a.get(row, i) * b.get(i, col);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_ntrans, mat_ntrans>(alpha, a, b, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // re-compute the result with remaining types
+
+ // trans, no trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a_t.get(i, row) * b.get(i, col);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_trans, mat_ntrans>(alpha, a_t, b, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // no trans, trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a.get(row, i) * b_t.get(col, i);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_ntrans, mat_trans>(alpha, a, b_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // trans, trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a_t.get(i, row) * b_t.get(col, i);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_trans, mat_trans>(alpha, a_t, b_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // herm, trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += impl::impl_conj(a_t.get(i, row)) * b_t.get(col, i);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_herm, mat_trans>(alpha, a_t, b_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // ntrans, conj
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a.get(row, i) * impl::impl_conj(b.get(i, col));
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_ntrans, mat_conj>(alpha, a, b, beta, c);
+
+ Check_gem_results( c, d );
+ }
+
+
+ template <typename T>
+ void
+ Test_gems( T alpha, T beta, length_type M, length_type P, length_type N )
+ {
+ Matrix<T> a (M, N);
+ Matrix<T> b (M, N);
+ Matrix<T> c (M, N);
+ Matrix<T> d (M, N);
+
+ Matrix<T> a_t (N, M);
+
+
+ // fill in unique values for each element of a and c
+ randm(a);
+ randm(b); // save copy for later
+ c = b;
+
+
+ // without trans
+ // compute the expected result for d
+ for ( index_type row = 0; row < M; ++row )
+ for ( index_type col = 0; col < N; ++col )
+ d.put( row, col, alpha * a.get(row, col) + beta * c(row, col) );
+
+ // compute the actual result (updated in c)
+ gems<mat_ntrans>(alpha, a, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // create the transposes of a and restore c
+ c = b;
+ a_t = a.transpose();
+
+ // with trans
+ // expected result for d will stay the same because now we use
+ // the transpose of a and request that it take the transpose
+ // of that when computing the sum
+
+ // compute the actual result (updated in c)
+ gems<mat_trans>(alpha, a_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // restore c
+ c = b;
+
+ // with herm
+ // compute the expected result for d
+ for ( index_type row = 0; row < M; ++row )
+ for ( index_type col = 0; col < N; ++col )
+ d.put( row, col, alpha * impl::impl_conj(a_t.get(col, row)) + beta * c(row, col) );
+
+ // compute the actual result (updated in c)
+ gems<mat_herm>(alpha, a_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // restore c
+ c = b;
+
+ // with conj
+ // compute the expected result for d
+ for ( index_type row = 0; row < M; ++row )
+ for ( index_type col = 0; col < N; ++col )
+ d.put( row, col, alpha * impl::impl_conj(a.get(row, col)) + beta * c(row, col) );
+
+ // compute the actual result (updated in c)
+ gems<mat_conj>(alpha, a, beta, c);
+
+ Check_gem_results( c, d );
+ }
+
+
+ template <typename T>
+ void
+ Test_gem_types( T alpha, T beta )
+ {
+ // last 3 params are M, N, P (for M x N and N x P matricies)
+
+ // generalized matrix product
+ Test_gemp<T>( alpha, beta, 7, 3, 5 );
+ Test_gemp<T>( alpha, beta, 7, 9, 5 );
+ Test_gemp<T>( alpha, beta, 5, 9, 7 );
+ Test_gemp<T>( alpha, beta, 5, 3, 7 );
+
+ // generalized matrix sum
+ Test_gems<T>( alpha, beta, 7, 3, 5 );
+ Test_gems<T>( alpha, beta, 7, 9, 5 );
+ Test_gems<T>( alpha, beta, 5, 9, 7 );
+ Test_gems<T>( alpha, beta, 5, 3, 7 );
+ }
+
+
+
+ void
+ Test_cumsum()
+ {
+ // simple sum of a vector containing scalars
+ length_type const len = 5;
+ Vector<scalar_f> v1( len );
+ Vector<scalar_f> v2( len );
+ scalar_f sum = 0;
+
+ for ( index_type i = 0; i < len; ++i )
+ {
+ v1.put( i, i + 1 );
+ sum += i + 1;
+ }
+
+ cumsum<0>( v1, v2 );
+ assert( equal( sum, v2.get(len - 1) ) );
+
+
+ // simple sum of a vector containing complex<scalars>
+ Vector<cscalar_f> cv1( len );
+ Vector<cscalar_f> cv2( len );
+ cscalar_f csum = cscalar_f();
+
+ for ( index_type i = 0; i < len; ++i )
+ {
+ cv1.put( i, complex<float>( i + 1, i + 1 ) );
+ csum += complex<float>( i + 1, i + 1 );
+ }
+
+ cumsum<0>( cv1, cv2 );
+ assert( equal( csum, cv2.get(len - 1) ) );
+
+
+ // sum of a matrix using scalars
+ length_type const rows = 5;
+ length_type const cols = 7;
+ Matrix<scalar_f> m1( rows, cols );
+ Matrix<scalar_f> m2( rows, cols );
+ scalar_f colsum[7];
+ scalar_f rowsum[5];
+
+ for ( index_type i = 0; i < rows; ++i )
+ {
+ rowsum[i] = 0;
+ for ( index_type j = 0; j < cols; ++j )
+ {
+ m1.put( i, j, i + 1 + j * rows );
+ rowsum[i] += i + 1 + j * rows;
+ }
+ }
+
+ for ( index_type j = 0; j < cols; ++j )
+ {
+ colsum[j] = 0;
+ for ( index_type i = 0; i < rows; ++i )
+ colsum[j] += i + 1 + j * rows;
+ }
+
+ // sum across rows of a matrix
+ cumsum<0>( m1, m2 );
+ for ( index_type i = 0; i < rows; ++i )
+ assert( equal( rowsum[i], m2.get(i, cols - 1) ) );
+
+
+ // sum across columns of a matrix
+ cumsum<1>( m1, m2 );
+ for ( index_type j = 0; j < cols; ++j )
+ assert( equal( colsum[j], m2.get(rows - 1, j) ) );
+ }
/***********************************************************************
Main
***********************************************************************/
int
main(int argc, char** argv)
{
vsipl init(argc, argv);
// Test Matrix-Matrix Kronecker
Matrix<scalar_f>
*************** main(int argc, char** argv)
*** 55,59 ****
--- 361,385 ----
assert( equal( kron_mn.get( a, b ),
static_cast<scalar_f>(7 * 11 * 2.0) ) );
+
+ // Test generalized matrix operations
+
+ // params: alpha, beta
+ Test_gem_types<float>( M_E, M_PI );
+
+ Test_gem_types<double>( -M_E, -M_PI );
+
+ Test_gem_types<complex<float> >
+ ( complex<float>(M_LN2, -M_SQRT2), complex<float>(M_LOG2E, M_LN10) );
+
+ Test_gem_types<complex<double> >
+ ( complex<float>(M_LN2, -M_SQRT2), complex<float>(M_LOG2E, M_LN10) );
+
+
+ // misc functions
+
+ Test_cumsum();
+
return 0;
}
+
|