Actions
|
|
[ Date Prev][ Date Next][ Thread Prev][ Thread Next][ Date Index][ Thread Index]
Re: [vsipl++] [patch] matvec: outer, gem, cumsum
- To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
- Subject: Re: [vsipl++] [patch] matvec: outer, gem, cumsum
- From: Don McCoy <don@xxxxxxxxxxxxxxxx>
- Date: Fri, 30 Sep 2005 13:01:35 -0600
Suggested changes applied. Using a modified approach that applies the
'mat_op_type' makes the code more readable and it was easier to extend
to include op types mat_herm and mat_conj. Also includes
specializations that allow herm and conj to be performed on real types
(by doing transpose and nothing respectively).
Tested under GCC 3.4 successfully. ICPC 8.0 and 9.0 caused failures
related to handling of complex types.
--
Don McCoy
CodeSourcery, LLC
2005-09-28 Don McCoy <don@xxxxxxxxxxxxxxxx>
* src/vsip/impl/matvec.hpp: added outer product, gemp,
gems and cumsum.
* tests/matvec.cpp: added tests for gemp, gems and
cumsum which are not covered in ref-impl tests.
Index: src/vsip/impl/matvec.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/matvec.hpp,v
retrieving revision 1.3
diff -c -p -r1.3 matvec.hpp
*** src/vsip/impl/matvec.hpp 27 Sep 2005 22:44:41 -0000 1.3
--- src/vsip/impl/matvec.hpp 30 Sep 2005 18:50:25 -0000
*************** kron( T0 alpha, Matrix<T1, Block1> v, Ma
*** 94,104 ****
/// Class to perform transpose or hermetian (conjugate-transpose),
/// depending on value type.
/// Primary case - perform transpose.
-
template <typename T,
typename Block>
struct Trans_or_herm
--- 94,132 ----
+ template <typename T0, typename T1, typename T2, typename T3, typename T4,
+ typename Block1, typename Block2, typename Block4>
+ void
+ gemp( T0 alpha, const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B, T3 beta, Matrix<T4, Block4> C)
+ {
+ assert( A.size(0) == C.size(0) );
+ assert( B.size(1) == C.size(1) );
+
+ for ( index_type i = A.size(0); i-- > 0; )
+ for ( index_type j = B.size(1); j-- > 0; )
+ C.put(i, j, alpha * dot( A.row(i), B.col(j) ) + beta * C.get(i, j));
+ }
+
+
+ template <typename T0, typename T1, typename T3, typename T4,
+ typename Block1, typename Block4>
+ void
+ gems( T0 alpha, const_Matrix<T1, Block1> A, T3 beta, Matrix<T4, Block4> C)
+ {
+ assert( A.size(0) == C.size(0) );
+ assert( A.size(1) == C.size(1) );
+ C = alpha * A + beta * C;
+ }
+
+
+
+
+
/// Class to perform transpose or hermetian (conjugate-transpose),
/// depending on value type.
/// Primary case - perform transpose.
template <typename T,
typename Block>
struct Trans_or_herm
*************** struct Trans_or_herm
*** 113,119 ****
};
/// Complex specialization - perform hermetian.
-
template <typename T,
typename Block>
struct Trans_or_herm<complex<T>, Block>
--- 141,146 ----
*************** struct Trans_or_herm<complex<T>, Block>
*** 131,140 ****
}
};
-
-
/// Perform transpose or hermetian, depending on value type.
-
template <typename T,
typename Block>
inline
--- 158,164 ----
*************** trans_or_herm(const_Matrix<T, Block> m)
*** 144,149 ****
--- 168,353 ----
return Trans_or_herm<T, Block>::exec(m);
};
+
+
+ // generalized class used to invoke the correct matrix operator
+ template <mat_op_type OpT,
+ typename T,
+ typename Block>
+ struct Apply_mat_op;
+
+ // partial specializations:
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_ntrans, T, Block>
+ {
+ typedef const_Matrix<T, Block> result_type;
+
+ static result_type
+ exec(const_Matrix<T, Block> m) VSIP_NOTHROW
+ {
+ return m;
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_trans, T, Block>
+ {
+ typedef typename const_Matrix<T, Block>::transpose_type result_type;
+
+ static result_type
+ exec(const_Matrix<T, Block> m) VSIP_NOTHROW
+ {
+ return m.transpose();
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_herm, T, Block>
+ {
+ typedef typename const_Matrix<T, Block>::transpose_type result_type;
+
+ static result_type
+ exec(const_Matrix<T, Block> m) VSIP_NOTHROW
+ {
+ return impl::trans_or_herm(m);
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_herm, complex<T>, Block>
+ {
+ typedef typename const_Matrix<complex<T>, Block>::transpose_type
+ transpose_type;
+ typedef impl::Unary_func_view<impl::conj_functor, transpose_type>
+ functor_type;
+ typedef typename functor_type::result_type result_type;
+
+ static result_type
+ exec(const_Matrix<complex<T>, Block> m) VSIP_NOTHROW
+ {
+ return impl::trans_or_herm(m);
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_conj, T, Block>
+ {
+ typedef const_Matrix<T, Block> result_type;
+
+ static result_type
+ exec(const_Matrix<T, Block> m) VSIP_NOTHROW
+ {
+ return m;
+ }
+ };
+
+ template <typename T,
+ typename Block>
+ struct Apply_mat_op<mat_conj, complex<T>, Block>
+ {
+ typedef impl::Unary_func_view<impl::conj_functor,
+ const_Matrix<complex<T>, Block> > functor_type;
+ typedef typename functor_type::result_type result_type;
+
+ static result_type
+ exec(const_Matrix<complex<T>, Block> m) VSIP_NOTHROW
+ {
+ return conj(m);
+ }
+ };
+
+
+ // convenience function to use Apply_mat_op:
+ template <mat_op_type OpT,
+ typename T,
+ typename Block>
+ typename Apply_mat_op<OpT, T, Block>::result_type
+ apply_mat_op(const_Matrix<T, Block> m)
+ {
+ return Apply_mat_op<OpT, T, Block>::exec(m);
+ }
+
+
+
+ template <dimension_type d,
+ typename T0,
+ typename T1,
+ typename Block0,
+ typename Block1>
+ void
+ cumsum(
+ const_Vector<T0, Block0> v,
+ Vector<T1, Block1> w)
+ VSIP_NOTHROW
+ {
+ // Effects: w has values equaling the cumulative sum of values in v.
+ //
+ // If View is Vector, d is ignored and, for
+ // 0 <= i < v.size(),
+ // w.get(i) equals the sum over 0 <= j <= i of v.get(j)
+ assert( v.size() == w.size() );
+
+ T1 sum = T0();
+ for ( index_type i = 0; i < v.size(); ++i )
+ {
+ sum += v.get(i);
+ w.put(i, sum);
+ }
+ }
+
+
+ template <dimension_type d,
+ typename T0,
+ typename T1,
+ typename Block0,
+ typename Block1>
+ void
+ cumsum(
+ const_Matrix<T0, Block0> v,
+ Matrix<T1, Block1> w)
+ VSIP_NOTHROW
+ {
+ if ( d == 0 )
+ {
+ // If View is Matrix and d == 0, then, for
+ // 0 <= m < v.size(0) and 0 <= i < v.size(1),
+ // w.get(m, i) equals the sum over 0 <= j <= i of v.get(m, j).
+
+ for ( index_type m = 0; m < v.size(0); ++m )
+ {
+ T1 sum = T0();
+ for ( index_type i = 0; i < v.size(1); ++i )
+ {
+ sum += v.get(m, i);
+ w.put(m, i, sum);
+ }
+ }
+ }
+ else
+ if ( d == 1 )
+ {
+ // If View is Matrix and d == 1, then, for
+ // 0 <= i < v.size(0) and 0 <= n < v.size(1),
+ // w.get(i, n) equals the sum over 0 <= j <= i of v.get(j, n).
+
+ for ( index_type n = 0; n < v.size(1); ++n )
+ {
+ T1 sum = T0();
+ for ( index_type i = 0; i < v.size(0); ++i )
+ {
+ sum += v.get(i, n);
+ w.put(i, n, sum);
+ }
+ }
+ }
+ }
+
+
} // namespace impl
*************** kron( T0 alpha, const_View<T1, Block1> v
*** 228,233 ****
--- 432,555 ----
}
+ // Outer product [math.matvec.outer]
+
+ /// outer product of two scalar vectors
+ template <typename T0,
+ typename T1,
+ typename T2,
+ typename Block1,
+ typename Block2>
+ const_Matrix<typename Promotion<T0, typename Promotion<T1, T2>::type>::type>
+ outer( T0 alpha, const_Vector<T1, Block1> v, const_Vector<T2, Block2> w )
+ VSIP_NOTHROW
+ {
+ typedef Matrix<typename Promotion<T0,
+ typename Promotion<T1, T2>::type>::type> return_type;
+ return_type r( v.size(), w.size(), alpha );
+
+ for ( index_type i = v.size(); i-- > 0; )
+ for ( index_type j = w.size(); j-- > 0; )
+ r.put( i, j, alpha * v.get(i) * w.get(j) );
+
+ return r;
+ }
+
+ /// outer product of two complex vectors
+ template <typename T0,
+ typename T1,
+ typename T2,
+ typename Block1,
+ typename Block2>
+ const_Matrix<typename Promotion<T0, typename Promotion<T1, T2>::type>::type>
+ outer( T0 alpha, const_Vector<std::complex<T1>, Block1> v,
+ const_Vector<std::complex<T2>, Block2> w )
+ VSIP_NOTHROW
+ {
+ typedef Matrix<typename Promotion<T0,
+ typename Promotion<T1, T2>::type>::type> return_type;
+ return_type r( v.size(), w.size(), alpha );
+
+ for ( index_type i = v.size(); i-- > 0; )
+ for ( index_type j = w.size(); j-- > 0; )
+ r.put( i, j, alpha * v.get(i) * conj(w.get(j)) );
+
+ return r;
+ }
+
+
+ // Generalized Matrix operations [math.matvec.gem]
+
+ /// generalized matrix product
+ template <mat_op_type OpA,
+ mat_op_type OpB,
+ typename T0,
+ typename T1,
+ typename T2,
+ typename T3,
+ typename T4,
+ typename Block1,
+ typename Block2,
+ typename Block4>
+ void
+ gemp(
+ T0 alpha,
+ const_Matrix<T1, Block1> A,
+ const_Matrix<T2, Block2> B,
+ T3 beta,
+ Matrix<T4, Block4> C)
+ VSIP_NOTHROW
+ {
+ // equivalent to C = alpha * OpA(A) * OpB(B) + beta * C
+ impl::gemp( alpha,
+ impl::apply_mat_op<OpA>(A),
+ impl::apply_mat_op<OpB>(B),
+ beta,
+ C );
+ }
+
+
+ /// Generalized matrix sum
+ template <mat_op_type OpA,
+ typename T0,
+ typename T1,
+ typename T3,
+ typename T4,
+ typename Block1,
+ typename Block4>
+ void
+ gems(
+ T0 alpha,
+ const_Matrix<T1, Block1> A,
+ T3 beta,
+ Matrix<T4, Block4> C)
+ VSIP_NOTHROW
+ {
+ impl::gems( alpha,
+ impl::apply_mat_op<OpA>(A),
+ beta,
+ C );
+ }
+
+
+ // Miscellaneous functions [math.matvec.misc]
+
+ /// cumulative sum
+ template <dimension_type d,
+ typename T0,
+ typename T1,
+ template <typename, typename> class const_View,
+ template <typename, typename> class View,
+ typename Block0,
+ typename Block1>
+ void
+ cumsum(
+ const_View<T0, Block0> v,
+ View<T1, Block1> w)
+ VSIP_NOTHROW
+ {
+ impl::cumsum<d>(v, w);
+ }
} // namespace vsip
Index: tests/matvec.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/matvec.cpp,v
retrieving revision 1.1
diff -c -p -r1.1 matvec.cpp
*** tests/matvec.cpp 19 Sep 2005 21:06:46 -0000 1.1
--- tests/matvec.cpp 30 Sep 2005 18:50:25 -0000
***************
*** 11,17 ****
***********************************************************************/
#include <cassert>
- #include <iostream>
#include <vsip/initfin.hpp>
#include <vsip/support.hpp>
--- 11,16 ----
***************
*** 19,43 ****
#include <vsip/math.hpp>
#include "test.hpp"
#include "output.hpp"
using namespace std;
using namespace vsip;
/***********************************************************************
Main
***********************************************************************/
-
int
main(int argc, char** argv)
{
vsipl init(argc, argv);
-
// Test Matrix-Matrix Kronecker
Matrix<scalar_f>
--- 18,349 ----
#include <vsip/math.hpp>
#include "test.hpp"
+ #include "test-random.hpp"
#include "output.hpp"
using namespace std;
using namespace vsip;
+ /***********************************************************************
+ Definitions
+ ***********************************************************************/
+
+
+ template <typename T>
+ void
+ Check_gem_results( Matrix<T> actual, Matrix<T> expected )
+ {
+ for ( index_type row = 0; row < actual.size(0); ++row )
+ for ( index_type col = 0; col < actual.size(1); ++col )
+ assert( equal( actual.get(row, col), expected.get(row, col) ) );
+ }
+
+
+ template <typename T>
+ void
+ Test_gemp( T alpha, T beta, length_type M, length_type P, length_type N )
+ {
+ Matrix<T> a (M, P);
+ Matrix<T> b (P, N);
+ Matrix<T> c (M, N);
+ Matrix<T> d (M, N);
+
+ Matrix<T> a_t (P, M);
+ Matrix<T> b_t (N, P);
+
+ // fill in unique values for each element of a, b and c
+ randm(a);
+ randm(b);
+ randm(c);
+ a_t = a.transpose();
+ b_t = b.transpose();
+
+
+ // compute the expected result for d
+ index_type row;
+ index_type col;
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a.get(row, i) * b.get(i, col);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_ntrans, mat_ntrans>(alpha, a, b, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // re-compute the result with remaining types
+
+ // trans, no trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a_t.get(i, row) * b.get(i, col);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_trans, mat_ntrans>(alpha, a_t, b, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // no trans, trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a.get(row, i) * b_t.get(col, i);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_ntrans, mat_trans>(alpha, a, b_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // trans, trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a_t.get(i, row) * b_t.get(col, i);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_trans, mat_trans>(alpha, a_t, b_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // herm, trans
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += impl::impl_conj(a_t.get(i, row)) * b_t.get(col, i);
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_herm, mat_trans>(alpha, a_t, b_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // ntrans, conj
+ // compute the expected result for d
+ for ( row = 0; row < M; ++row )
+ for ( col = 0; col < N; ++col )
+ {
+ T dot = 0;
+ for ( index_type i = 0; i < P; ++i )
+ dot += a.get(row, i) * impl::impl_conj(b.get(i, col));
+ d.put( row, col, alpha * dot + beta * c(row, col) );
+ }
+
+ // compute the actual result (updated in c)
+ gemp<mat_ntrans, mat_conj>(alpha, a, b, beta, c);
+
+ Check_gem_results( c, d );
+ }
+
+
+ template <typename T>
+ void
+ Test_gems( T alpha, T beta, length_type M, length_type P, length_type N )
+ {
+ Matrix<T> a (M, N);
+ Matrix<T> b (M, N);
+ Matrix<T> c (M, N);
+ Matrix<T> d (M, N);
+
+ Matrix<T> a_t (N, M);
+
+
+ // fill in unique values for each element of a and c
+ randm(a);
+ randm(b); // save copy for later
+ c = b;
+
+
+ // without trans
+ // compute the expected result for d
+ for ( index_type row = 0; row < M; ++row )
+ for ( index_type col = 0; col < N; ++col )
+ d.put( row, col, alpha * a.get(row, col) + beta * c(row, col) );
+
+ // compute the actual result (updated in c)
+ gems<mat_ntrans>(alpha, a, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // create the transposes of a and restore c
+ c = b;
+ a_t = a.transpose();
+
+ // with trans
+ // expected result for d will stay the same because now we use
+ // the transpose of a and request that it take the transpose
+ // of that when computing the sum
+
+ // compute the actual result (updated in c)
+ gems<mat_trans>(alpha, a_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // restore c
+ c = b;
+
+ // with herm
+ // compute the expected result for d
+ for ( index_type row = 0; row < M; ++row )
+ for ( index_type col = 0; col < N; ++col )
+ d.put( row, col, alpha * impl::impl_conj(a_t.get(col, row)) + beta * c(row, col) );
+
+ // compute the actual result (updated in c)
+ gems<mat_herm>(alpha, a_t, beta, c);
+
+ Check_gem_results( c, d );
+
+
+ // restore c
+ c = b;
+
+ // with conj
+ // compute the expected result for d
+ for ( index_type row = 0; row < M; ++row )
+ for ( index_type col = 0; col < N; ++col )
+ d.put( row, col, alpha * impl::impl_conj(a.get(row, col)) + beta * c(row, col) );
+
+ // compute the actual result (updated in c)
+ gems<mat_conj>(alpha, a, beta, c);
+
+ Check_gem_results( c, d );
+ }
+
+
+ template <typename T>
+ void
+ Test_gem_types( T alpha, T beta )
+ {
+ // last 3 params are M, N, P (for M x N and N x P matricies)
+
+ // generalized matrix product
+ Test_gemp<T>( alpha, beta, 7, 3, 5 );
+ Test_gemp<T>( alpha, beta, 7, 9, 5 );
+ Test_gemp<T>( alpha, beta, 5, 9, 7 );
+ Test_gemp<T>( alpha, beta, 5, 3, 7 );
+
+ // generalized matrix sum
+ Test_gems<T>( alpha, beta, 7, 3, 5 );
+ Test_gems<T>( alpha, beta, 7, 9, 5 );
+ Test_gems<T>( alpha, beta, 5, 9, 7 );
+ Test_gems<T>( alpha, beta, 5, 3, 7 );
+ }
+
+
+
+ void
+ Test_cumsum()
+ {
+ // simple sum of a vector containing scalars
+ length_type const len = 5;
+ Vector<scalar_f> v1( len );
+ Vector<scalar_f> v2( len );
+ scalar_f sum = 0;
+
+ for ( index_type i = 0; i < len; ++i )
+ {
+ v1.put( i, i + 1 );
+ sum += i + 1;
+ }
+
+ cumsum<0>( v1, v2 );
+ assert( equal( sum, v2.get(len - 1) ) );
+
+
+ // simple sum of a vector containing complex<scalars>
+ Vector<cscalar_f> cv1( len );
+ Vector<cscalar_f> cv2( len );
+ cscalar_f csum = cscalar_f();
+
+ for ( index_type i = 0; i < len; ++i )
+ {
+ cv1.put( i, complex<float>( i + 1, i + 1 ) );
+ csum += complex<float>( i + 1, i + 1 );
+ }
+
+ cumsum<0>( cv1, cv2 );
+ assert( equal( csum, cv2.get(len - 1) ) );
+
+
+ // sum of a matrix using scalars
+ length_type const rows = 5;
+ length_type const cols = 7;
+ Matrix<scalar_f> m1( rows, cols );
+ Matrix<scalar_f> m2( rows, cols );
+ scalar_f colsum[7];
+ scalar_f rowsum[5];
+
+ for ( index_type i = 0; i < rows; ++i )
+ {
+ rowsum[i] = 0;
+ for ( index_type j = 0; j < cols; ++j )
+ {
+ m1.put( i, j, i + 1 + j * rows );
+ rowsum[i] += i + 1 + j * rows;
+ }
+ }
+
+ for ( index_type j = 0; j < cols; ++j )
+ {
+ colsum[j] = 0;
+ for ( index_type i = 0; i < rows; ++i )
+ colsum[j] += i + 1 + j * rows;
+ }
+
+ // sum across rows of a matrix
+ cumsum<0>( m1, m2 );
+ for ( index_type i = 0; i < rows; ++i )
+ assert( equal( rowsum[i], m2.get(i, cols - 1) ) );
+
+
+ // sum across columns of a matrix
+ cumsum<1>( m1, m2 );
+ for ( index_type j = 0; j < cols; ++j )
+ assert( equal( colsum[j], m2.get(rows - 1, j) ) );
+ }
/***********************************************************************
Main
***********************************************************************/
int
main(int argc, char** argv)
{
vsipl init(argc, argv);
// Test Matrix-Matrix Kronecker
Matrix<scalar_f>
*************** main(int argc, char** argv)
*** 55,59 ****
--- 361,385 ----
assert( equal( kron_mn.get( a, b ),
static_cast<scalar_f>(7 * 11 * 2.0) ) );
+
+ // Test generalized matrix operations
+
+ // params: alpha, beta
+ Test_gem_types<float>( M_E, M_PI );
+
+ Test_gem_types<double>( -M_E, -M_PI );
+
+ Test_gem_types<complex<float> >
+ ( complex<float>(M_LN2, -M_SQRT2), complex<float>(M_LOG2E, M_LN10) );
+
+ Test_gem_types<complex<double> >
+ ( complex<float>(M_LN2, -M_SQRT2), complex<float>(M_LOG2E, M_LN10) );
+
+
+ // misc functions
+
+ Test_cumsum();
+
return 0;
}
+
|