Actions
|
|
[ Date Prev][ Date Next][ Thread Prev][ Thread Next][ Date Index][ Thread Index]
[patch] matvec: outer, gem, cumsum
- To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
- Subject: [patch] matvec: outer, gem, cumsum
- From: Don McCoy <don@xxxxxxxxxxxxxxxx>
- Date: Tue, 27 Sep 2005 12:30:57 -0600
The attached patch rounds out the functionality of [math.matvec] with
the exception of a few of the matrix-vector product functions. Since
those are implemented in a separate file, this patch stands by itself
pretty well.
--
Don McCoy
CodeSourcery, LLC
2005-09-27 Don McCoy <don@xxxxxxxxxxxxxxxx>
* src/vsip/impl/matvec.hpp: added outer product, gemp,
gems and cumsum.
* tests/matvec.cpp: added tests for gemp, gems and
cumsum which are not covered in ref-impl tests.
Index: src/vsip/impl/matvec.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/matvec.hpp,v
retrieving revision 1.1
diff -c -p -r1.1 matvec.hpp
*** src/vsip/impl/matvec.hpp 19 Sep 2005 21:06:46 -0000 1.1
--- src/vsip/impl/matvec.hpp 27 Sep 2005 18:21:07 -0000
*************** kron( T0 alpha, Matrix<T1, Block1> v, Ma
*** 90,95 ****
--- 90,245 ----
return r;
}
+
+
+ template <typename T0, typename T1, typename T2, typename T3, typename T4,
+ typename Block1, typename Block2, typename Block4>
+ void
+ gemp( T0 alpha, const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B, T3 beta, Matrix<T4, Block4> C)
+ {
+ assert( A.size(0) == C.size(0) );
+ assert( B.size(1) == C.size(1) );
+
+ for ( index_type i = A.size(0); i-- > 0; )
+ for ( index_type j = B.size(1); j-- > 0; )
+ C.put(i, j, alpha * dot( A.row(i), B.col(j) ) + beta * C.get(i, j));
+ }
+
+
+
+ // The general cases...
+ template <mat_op_type OpA, mat_op_type OpB,
+ typename T0, typename T1, typename T2, typename T3, typename T4,
+ typename Block1, typename Block2, typename Block4>
+ struct Gemp
+ {
+ static void apply(T0 alpha, const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B, T3 beta, Matrix<T4, Block4> C) {}
+ };
+
+ // OpA and OpB are both mat_ntrans
+ template <typename T0, typename T1, typename T2, typename T3, typename T4,
+ typename Block1, typename Block2, typename Block4>
+ struct Gemp<mat_ntrans, mat_ntrans, T0, T1, T2, T3, T4, Block1, Block2, Block4>
+ {
+ static void apply(T0 alpha, const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B, T3 beta, Matrix<T4, Block4> C)
+ {
+ gemp( alpha, A, B, beta, C );
+ }
+ };
+
+ // OpA and OpB are both mat_trans
+ template <typename T0, typename T1, typename T2, typename T3, typename T4,
+ typename Block1, typename Block2, typename Block4>
+ struct Gemp<mat_trans, mat_trans, T0, T1, T2, T3, T4, Block1, Block2, Block4>
+ {
+ static void apply(T0 alpha, const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B, T3 beta, Matrix<T4, Block4> C)
+ {
+ gemp( alpha, A.transpose(), B.transpose(), beta, C );
+ }
+ };
+
+ // OpA is mat_trans and OpB is mat_ntrans
+ template <typename T0, typename T1, typename T2, typename T3, typename T4,
+ typename Block1, typename Block2, typename Block4>
+ struct Gemp<mat_trans, mat_ntrans, T0, T1, T2, T3, T4, Block1, Block2, Block4>
+ {
+ static void apply(T0 alpha, const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B, T3 beta, Matrix<T4, Block4> C)
+ {
+ gemp( alpha, A.transpose(), B, beta, C );
+ }
+ };
+
+ // OpA is mat_ntrans and OpB is mat_trans
+ template <typename T0, typename T1, typename T2, typename T3, typename T4,
+ typename Block1, typename Block2, typename Block4>
+ struct Gemp<mat_ntrans, mat_trans, T0, T1, T2, T3, T4, Block1, Block2, Block4>
+ {
+ static void apply(T0 alpha, const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B, T3 beta, Matrix<T4, Block4> C)
+ {
+ gemp( alpha, A, B.transpose(), beta, C );
+ }
+ };
+
+
+ template <dimension_type d,
+ typename T0,
+ typename T1,
+ typename Block0,
+ typename Block1>
+ void
+ cumsum(
+ const_Vector<T0, Block0> v,
+ Vector<T1, Block1> w)
+ VSIP_NOTHROW
+ {
+ // Effects: w has values equaling the cumulative sum of values in v.
+ //
+ // If View is Vector, d is ignored and, for
+ // 0 <= i < v.size(),
+ // w.get(i) equals the sum over 0 <= j <= i of v.get(j)
+ assert( v.size() == w.size() );
+
+ for ( index_type i = 0; i < v.size(); ++i )
+ {
+ T1 sum = T0();
+ for ( index_type j = 0; j <= i; ++j )
+ sum += v.get(j);
+ w.put(i, sum);
+ }
+ }
+
+
+ template <dimension_type d,
+ typename T0,
+ typename T1,
+ typename Block0,
+ typename Block1>
+ void
+ cumsum(
+ const_Matrix<T0, Block0> v,
+ Matrix<T1, Block1> w)
+ VSIP_NOTHROW
+ {
+ if ( d == 0 )
+ {
+ // If View is Matrix and d == 0, then, for
+ // 0 <= m < v.size(0) and 0 <= i < v.size(1),
+ // w.get(m, i) equals the sum over 0 <= j <= i of v.get(m, j).
+
+ for ( index_type m = 0; m < v.size(0); ++m )
+ for ( index_type i = 0; i < v.size(1); ++i )
+ {
+ T1 sum = T0();
+ for ( index_type j = 0; j <= i; ++j )
+ sum += v.get(m, j);
+ w.put(m, i, sum);
+ }
+ }
+ else
+ if ( d == 1 )
+ {
+ // If View is Matrix and d == 1, then, for
+ // 0 <= i < v.size(0) and 0 <= n < v.size(1),
+ // w.get(i, n) equals the sum over 0 <= j <= i of v.get(j, n).
+
+ for ( index_type i = 0; i < v.size(0); ++i )
+ for ( index_type n = 0; n < v.size(1); ++n )
+ {
+ T1 sum = T0();
+ for ( index_type j = 0; j <= i; ++j )
+ sum += v.get(j, n);
+ w.put(i, n, sum);
+ }
+ }
+ }
+
+
} // namespace impl
*************** kron( T0 alpha, const_View<T1, Block1> v
*** 174,179 ****
--- 324,455 ----
}
+ // Outer product [math.matvec.outer]
+
+ /// outer product of two scalar vectors
+ template <typename T0,
+ typename T1,
+ typename T2,
+ typename Block1,
+ typename Block2>
+ const_Matrix<typename Promotion<T0, typename Promotion<T1, T2>::type>::type>
+ outer( T0 alpha, const_Vector<T1, Block1> v, const_Vector<T2, Block2> w )
+ VSIP_NOTHROW
+ {
+ typedef Matrix<typename Promotion<T0,
+ typename Promotion<T1, T2>::type>::type> return_type;
+ return_type r( v.size(), w.size(), alpha );
+
+ for ( index_type i = v.size(); i-- > 0; )
+ for ( index_type j = w.size(); j-- > 0; )
+ r.put( i, j, alpha * v.get(i) * w.get(j) );
+
+ return r;
+ }
+
+ /// outer product of two complex vectors
+ template <typename T0,
+ typename T1,
+ typename T2,
+ typename Block1,
+ typename Block2>
+ const_Matrix<typename Promotion<T0, typename Promotion<T1, T2>::type>::type>
+ outer( T0 alpha, const_Vector<std::complex<T1>, Block1> v,
+ const_Vector<std::complex<T2>, Block2> w )
+ VSIP_NOTHROW
+ {
+ typedef Matrix<typename Promotion<T0,
+ typename Promotion<T1, T2>::type>::type> return_type;
+ return_type r( v.size(), w.size(), alpha );
+
+ for ( index_type i = v.size(); i-- > 0; )
+ for ( index_type j = w.size(); j-- > 0; )
+ r.put( i, j, alpha * v.get(i) * conj(w.get(j)) );
+
+ return r;
+ }
+
+
+
+ // Generalized Matrix operations [math.matvec.gem]
+
+ /// generalized matrix product
+ template <mat_op_type OpA,
+ mat_op_type OpB,
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename Block1,
+ typename Block2,
+ typename Block4>
+ void
+ gemp(
+ T0 alpha,
+ const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B,
+ T3 beta,
+ Matrix<T4, Block4> C)
+ VSIP_NOTHROW
+ {
+ // equivalent to C = alpha * OpA(A) * OpB(B) + beta * C
+ impl::Gemp<OpA, OpB, T0, T1, T2, T3, T4,
+ Block1, Block2, Block4>::apply(alpha, A, B, beta, C);
+ }
+
+
+
+ /// Generalized matrix sum
+ template <mat_op_type OpA,
+ typename T0,
+ typename T1,
+ typename T3,
+ typename T4,
+ typename Block1,
+ typename Block4>
+ void
+ gems(
+ T0 alpha,
+ const_Matrix<T1, Block1> A,
+ T3 beta,
+ Matrix<T4, Block4> C)
+ VSIP_NOTHROW
+ {
+ // equivalent to C = alpha * OpA(A) + beta * C
+ if ( OpA == mat_trans )
+ {
+ assert( A.size(1) == C.size(0) );
+ assert( A.size(0) == C.size(1) );
+ C = alpha * A.transpose() + beta * C;
+ }
+ else
+ {
+ assert( A.size(0) == C.size(0) );
+ assert( A.size(1) == C.size(1) );
+ C = alpha * A + beta * C;
+ }
+ }
+
+
+ // Miscellaneous functions [math.matvec.misc]
+
+ /// cumulative sum
+ template <dimension_type d,
+ typename T0,
+ typename T1,
+ template <typename, typename> class const_View,
+ template <typename, typename> class View,
+ typename Block0,
+ typename Block1>
+ void
+ cumsum(
+ const_View<T0, Block0> v,
+ View<T1, Block1> w)
+ VSIP_NOTHROW
+ {
+ impl::cumsum<d>(v, w);
+ }
} // namespace vsip
Index: tests/matvec.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/matvec.cpp,v
retrieving revision 1.1
diff -c -p -r1.1 matvec.cpp
*** tests/matvec.cpp 19 Sep 2005 21:06:46 -0000 1.1
--- tests/matvec.cpp 27 Sep 2005 18:21:07 -0000
*************** using namespace std;
*** 25,43 ****
using namespace vsip;
/***********************************************************************
Main
***********************************************************************/
-
int
main(int argc, char** argv)
{
vsipl init(argc, argv);
-
// Test Matrix-Matrix Kronecker
Matrix<scalar_f>
--- 25,219 ----
using namespace vsip;
+ /***********************************************************************
+ Definitions
+ ***********************************************************************/
+
+ #define M 4
+ #define N 5
+ #define P 3
+
+ template <typename T>
+ void
+ Check_gem_results( Matrix<T> actual, Matrix<T> expected )
+ {
+ for ( index_type row = 0; row < M; ++row )
+ for ( index_type col = 0; col < N; ++col )
+ assert( equal( actual.get(row, col), expected.get(row, col) ) );
+ }
+
+
+ template <typename T>
+ void
+ Test_gemp( T alpha, T beta, T a_base, T b_base, T c_base )
+ {
+ Matrix<T> a (M, P);
+ Matrix<T> b (P, N);
+ Matrix<T> c (M, N);
+ Matrix<T> d (M, N);
+
+ Matrix<T> a_t (P, M);
+ Matrix<T> b_t (N, P);
+
+ // fill in unique values for each element of a, b and c
+ index_type row, col;
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < P; ++col )
+ a.put( row, col, vsip::sqrt( T(col * M + row) + a_base ) );
+
+ for ( row = 0; row < P; ++row )
+ for ( col = 0; col < N; ++col )
+ b.put( row, col, vsip::log( T((P + row) * (N + col)) + b_base ) );
+
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ c.put( row, col, vsip::cos( T(row * N + col) + c_base ) );
+
+ for ( row = 0; row < P; ++row )
+ for ( col = 0; col < M; ++col )
+ a_t.put( row, col, vsip::sqrt( T(col * M + row) + a_base ) );
+
+ for ( row = 0; row < N; ++row )
+ for ( col = 0; col < P; ++col )
+ b_t.put( row, col, vsip::log( T((P + row) * (N + col)) + b_base ) );
+
+
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a.get(row, i) * b.get(i, col);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_ntrans, mat_ntrans>(alpha, a, b, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // re-compute the result with remaining types
+
+ // trans, no trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a_t.get(i, row) * b.get(i, col);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_trans, mat_ntrans>(alpha, a_t, b, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // no trans, trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a.get(row, i) * b_t.get(col, i);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_ntrans, mat_trans>(alpha, a, b_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // trans, trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a_t.get(i, row) * b_t.get(col, i);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_trans, mat_trans>(alpha, a_t, b_t, beta, c);
+
+ Check_gem_results( c, d );
+ }
+
+
+ template <typename T>
+ void
+ Test_gems( T alpha, T beta, T a_base, T c_base )
+ {
+ Matrix<T> a (M, N);
+ Matrix<T> c (M, N);
+ Matrix<T> d (M, N);
+
+ Matrix<T> a_t (N, M);
+
+ // first check gems without trans
+
+ // fill in unique values for each element of a and c
+ index_type row, col;
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ a.put( row, col, vsip::sqrt( T(col * M + row) + a_base ) );
+
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ c.put( row, col, vsip::cos( T(row * N + col) + c_base ) );
+
+
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ d.put( row, col, alpha * a.get(row, col) + beta * c(row, col) );
+
+ // compute the actual result (updated in c)
+ gems<mat_ntrans>(alpha, a, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // second check gems with trans
+
+ // create the transposes of a and restore c
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ a_t.put( col, row, vsip::sqrt( T(col * M + row) + a_base ) );
+
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ c.put( row, col, vsip::cos( T(row * N + col) + c_base ) );
+
+ // expected result for d will stay the same because now we use
+ // the transpose of a and request that it take the transpose
+ // of that when computing the sum
+
+ // compute the actual result (updated in c)
+ gems<mat_trans>(alpha, a_t, beta, c);
+
+ Check_gem_results( c, d );
+ }
+
/***********************************************************************
Main
***********************************************************************/
int
main(int argc, char** argv)
{
vsipl init(argc, argv);
// Test Matrix-Matrix Kronecker
Matrix<scalar_f>
*************** main(int argc, char** argv)
*** 55,59 ****
--- 231,296 ----
assert( equal( kron_mn.get( a, b ),
static_cast<scalar_f>(7 * 11 * 2.0) ) );
+
+ // Test generalized matrix product
+ // (alpha, beta, a_base, b_base, c_base)
+
+ Test_gemp<scalar_f>
+ ( 2.0, 3.5, 1.1, 2.2, 3.3 );
+
+ Test_gemp<double>
+ ( 2.0, 3.5, 1.1, 2.2, 3.3 );
+
+ Test_gemp<cscalar_f>
+ ( cscalar_f(2.0, -1.0), cscalar_f(3.5, 1),
+ cscalar_f(1.1, -1.1), cscalar_f(-2.2, 2.2),
+ cscalar_f(-3.3, -3.3) );
+
+ Test_gemp<complex<double> >
+ ( complex<double>(2.0, -1.0), complex<double>(3.5, 1),
+ complex<double>(1.1, -1.1), complex<double>(-2.2, 2.2),
+ complex<double>(-3.3, -3.3) );
+
+
+ Test_gems<scalar_f>
+ ( 2.0, 3.5, 1.1, 3.3 );
+
+
+ // misc functions
+
+ // simple sum of a vector containing scalars
+ scalar_f const val = 1.0;
+ length_type const len = 5;
+ const_Vector<scalar_f> v1( len, val );
+ Vector<scalar_f> v2( len );
+
+ cumsum<0>( v1, v2 );
+ assert( equal( len * val, v2.get(len - 1) ) );
+
+ // simple sum of a vector containing complex<scalars>
+ const_Vector<cscalar_f> cv1( len, cscalar_f(val, val) );
+ Vector<cscalar_f> cv2( len );
+
+ cumsum<0>( cv1, cv2 );
+ assert( equal( cscalar_f(len * val, len * val),
+ cv2.get(len - 1) ) );
+
+ // sum across rows of a matrix
+ length_type const rows = 5;
+ length_type const cols = 7;
+ const_Matrix<scalar_f> m1( rows, cols, val );
+ Matrix<scalar_f> m2( rows, cols );
+
+ cumsum<0>( m1, m2 );
+ for ( index_type i = 0; i < rows; ++i )
+ assert( equal( cols * val, m2.get(i, cols - 1) ) );
+
+ // sum across columns of a matrix
+ cumsum<1>( m1, m2 );
+ for ( index_type j = 0; j < cols; ++j )
+ assert( equal( rows * val, m2.get(rows - 1, j) ) );
+
+
return 0;
}
+
|