Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [vsipl++] [patch] matrix-vector and vector-matrix product dispatch to CML


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: Re: [vsipl++] [patch] matrix-vector and vector-matrix product dispatch to CML
  • From: Don McCoy <don@xxxxxxxxxxxxxxxx>
  • Date: Tue, 27 May 2008 16:16:04 -0600

Don McCoy wrote:
> This patch required adding a new pair of tests to the existing
> matrix-vector product tests to cover cases of non-unit strides allowed
> by CML.
>
>   
Bindings for split-complex versions were omitted from the previous
version of this patch. 

Ok to commit now?

-- 
Don McCoy
don (at) CodeSourcery
(888) 776-0262 / (650) 331-3385, x712

2008-05-24  Don McCoy  <don@xxxxxxxxxxxxxxxx>

	* src/vsip/opt/cbe/cml/matvec.hpp: Added evaluators for matrix-vector
	  and vector-matrix product dispatch to CML.
	* src/vsip/opt/cbe/cml/prod.hpp: Bindings for mv- and vm-products.
	* tests/matvec-prodmv.cpp: Added tests using subviews to cover cases
	  where vector strides are not one and matrix rows are not dense
	  (although rows must be unit stride).
Index: src/vsip/opt/cbe/cml/matvec.hpp
===================================================================
--- src/vsip/opt/cbe/cml/matvec.hpp	(revision 209326)
+++ src/vsip/opt/cbe/cml/matvec.hpp	(working copy)
@@ -37,7 +37,7 @@
 {
 
 
-/// CML evaluator for matrix-matrix products.
+/// CML evaluator for matrix-matrix products
 
 template <typename Block0,
           typename Block1,
@@ -122,7 +122,7 @@
 };
 
 
-/// CML evaluator for matrix-matrix conjugate products.
+/// CML evaluator for matrix-matrix conjugate products
 
 template <typename Block0,
           typename Block1,
@@ -207,6 +207,159 @@
 };
 
 
+/// CML evaluator for matrix-vector products
+
+template <typename Block0,
+          typename Block1,
+          typename Block2>
+struct Evaluator<Op_prod_mv, Block0, Op_list_2<Block1, Block2>,
+                 Cml_tag>
+{
+  typedef typename Block0::value_type T;
+  typedef typename Block_layout<Block0>::order_type order0_type;
+  typedef typename Block_layout<Block1>::order_type order1_type;
+  typedef typename Block_layout<Block2>::order_type order2_type;
+
+  static bool const ct_valid = 
+    // check that CML supports this data type and/or layout
+    impl::cml::Cml_supports_block<Block0>::valid &&
+    impl::cml::Cml_supports_block<Block1>::valid &&
+    impl::cml::Cml_supports_block<Block2>::valid &&
+    // check that all data types are equal
+    Type_equal<T, typename Block1::value_type>::value &&
+    Type_equal<T, typename Block2::value_type>::value &&
+    // check that the complex layouts are equal
+    Is_split_block<Block0>::value == Is_split_block<Block1>::value &&
+    Is_split_block<Block0>::value == Is_split_block<Block2>::value &&
+    // check that direct access is supported
+    Ext_data_cost<Block0>::value == 0 &&
+    Ext_data_cost<Block1>::value == 0 &&
+    Ext_data_cost<Block2>::value == 0;
+
+  static bool rt_valid(Block0& r, Block1 const& a, Block2 const& b)
+  {
+    Ext_data<Block1> ext_a(const_cast<Block1&>(a));
+
+    // For 'a', the dimension with the smallest stride must be one,
+    // which depends on whether it is row- or column-major.
+    bool is_a_row = Type_equal<order1_type, row2_type>::value;
+    stride_type a_stride = is_a_row ? ext_a.stride(1) : ext_a.stride(0);
+
+    return (a_stride == 1);
+  }
+
+  static void exec(Block0& r, Block1 const& a, Block2 const& b)
+  {
+    Ext_data<Block0> ext_r(const_cast<Block0&>(r));
+    Ext_data<Block1> ext_a(const_cast<Block1&>(a));
+    Ext_data<Block2> ext_b(const_cast<Block2&>(b));
+
+    // Either row- or column-major layouts are supported for the input 
+    // matrix by using the identity:
+    //   trans(r) = trans(b) * trans(a)
+    // or just
+    //   r = b * trans(a)  (since r and b are vectors)
+    if (Type_equal<order1_type, row2_type>::value)
+    {
+      cml::mvprod(
+        ext_a.data(), ext_a.stride(0),
+        ext_b.data(), ext_b.stride(0),
+        ext_r.data(), ext_r.stride(0),
+        a.size(2, 0),   // M
+        a.size(2, 1) ); // N
+    }
+    else if (Type_equal<order1_type, col2_type>::value)
+    {
+      cml::vmprod(
+        ext_b.data(), ext_b.stride(0),
+        ext_a.data(), ext_a.stride(1),
+        ext_r.data(), ext_r.stride(0),
+        a.size(2, 1),   // N
+        a.size(2, 0) ); // M
+    }
+    else
+      assert(0);
+  }
+};
+
+
+/// CML evaluator for vector-matrix products
+
+template <typename Block0,
+          typename Block1,
+          typename Block2>
+struct Evaluator<Op_prod_vm, Block0, Op_list_2<Block1, Block2>,
+                 Cml_tag>
+{
+  typedef typename Block0::value_type T;
+  typedef typename Block_layout<Block0>::order_type order0_type;
+  typedef typename Block_layout<Block1>::order_type order1_type;
+  typedef typename Block_layout<Block2>::order_type order2_type;
+
+  static bool const ct_valid = 
+    // check that CML supports this data type and/or layout
+    impl::cml::Cml_supports_block<Block0>::valid &&
+    impl::cml::Cml_supports_block<Block1>::valid &&
+    impl::cml::Cml_supports_block<Block2>::valid &&
+    // check that all data types are equal
+    Type_equal<T, typename Block1::value_type>::value &&
+    Type_equal<T, typename Block2::value_type>::value &&
+    // check that the complex layouts are equal
+    Is_split_block<Block0>::value == Is_split_block<Block1>::value &&
+    Is_split_block<Block0>::value == Is_split_block<Block2>::value &&
+    // check that direct access is supported
+    Ext_data_cost<Block0>::value == 0 &&
+    Ext_data_cost<Block1>::value == 0 &&
+    Ext_data_cost<Block2>::value == 0;
+
+  static bool rt_valid(Block0& r, Block1 const& a, Block2 const& b)
+  {
+    Ext_data<Block2> ext_b(const_cast<Block2&>(b));
+
+    // For 'b', the dimension with the smallest stride must be one,
+    // which depends on whether it is row- or column-major.
+    bool is_b_row = Type_equal<order2_type, row2_type>::value;
+    stride_type b_stride = is_b_row ? ext_b.stride(1) : ext_b.stride(0);
+
+    return (b_stride == 1);
+  }
+
+  static void exec(Block0& r, Block1 const& a, Block2 const& b)
+  {
+    Ext_data<Block0> ext_r(const_cast<Block0&>(r));
+    Ext_data<Block1> ext_a(const_cast<Block1&>(a));
+    Ext_data<Block2> ext_b(const_cast<Block2&>(b));
+
+    // Either row- or column-major layouts are supported for the input 
+    // matrix by using the identity:
+    //   trans(r) = trans(b) * trans(a)
+    // or just
+    //   r = b * trans(a)  (since r and b are vectors)
+    if (Type_equal<order2_type, row2_type>::value)
+    {
+      cml::vmprod(
+        ext_a.data(), ext_a.stride(0),
+        ext_b.data(), ext_b.stride(0),
+        ext_r.data(), ext_r.stride(0),
+        b.size(2, 0),   // M
+        b.size(2, 1) ); // N
+    }
+    else if (Type_equal<order2_type, col2_type>::value)
+    {
+      cml::mvprod(
+        ext_b.data(), ext_b.stride(1),
+        ext_a.data(), ext_a.stride(0),
+        ext_r.data(), ext_r.stride(0),
+        b.size(2, 1),   // N
+        b.size(2, 0) ); // M
+    }
+    else
+      assert(0);
+  }
+};
+
+
+
 } // namespace vsip::impl
 
 } // namespace vsip
Index: src/vsip/opt/cbe/cml/prod.hpp
===================================================================
--- src/vsip/opt/cbe/cml/prod.hpp	(revision 209326)
+++ src/vsip/opt/cbe/cml/prod.hpp	(working copy)
@@ -105,6 +105,63 @@
 #undef VSIP_IMPL_CML_ZMPROD
 
 
+// This macro supports scalar and interleaved complex types
+
+#define VSIP_IMPL_CML_MVPROD(T, FCN, CML_FCN)   \
+  inline void                                   \
+  FCN(                                          \
+    T *a, int lda,                              \
+    T *b, int ldb,                              \
+    T *z, int ldz,                              \
+    int m, int n)                               \
+  {                                             \
+    typedef Scalar_of<T>::type CML_T;           \
+    CML_FCN(                                    \
+      reinterpret_cast<CML_T*>(a),              \
+      static_cast<ptrdiff_t>(lda),              \
+      reinterpret_cast<CML_T*>(b),              \
+      static_cast<ptrdiff_t>(ldb),              \
+      reinterpret_cast<CML_T*>(z),              \
+      static_cast<ptrdiff_t>(ldz),              \
+      static_cast<size_t>(m),                   \
+      static_cast<size_t>(n) );                 \
+  }
+
+VSIP_IMPL_CML_MVPROD(float,               mvprod,  cml_mvprod1_f)
+VSIP_IMPL_CML_MVPROD(std::complex<float>, mvprod,  cml_cmvprod1_f)
+VSIP_IMPL_CML_MVPROD(float,               vmprod,  cml_vmprod1_f)
+VSIP_IMPL_CML_MVPROD(std::complex<float>, vmprod,  cml_cvmprod1_f)
+#undef VSIP_IMPL_CML_MVPROD
+
+
+// This version is for split complex only.
+
+#define VSIP_IMPL_CML_ZMVPROD(T, FCN, CML_FCN)  \
+  inline void                                   \
+  FCN(                                          \
+    std::pair<T*, T*> a, int lda,               \
+    std::pair<T*, T*> b, int ldb,               \
+    std::pair<T*, T*> z, int ldz,               \
+    int m, int n)                               \
+  {                                             \
+    typedef Scalar_of<T>::type CML_T;           \
+    CML_FCN(                                    \
+      a.first, a.second,                        \
+      static_cast<ptrdiff_t>(lda),              \
+      b.first, b.second,                        \
+      static_cast<ptrdiff_t>(ldb),              \
+      z.first, z.second,                        \
+      static_cast<ptrdiff_t>(ldz),              \
+      static_cast<size_t>(m),                   \
+      static_cast<size_t>(n) );                 \
+  }
+
+VSIP_IMPL_CML_ZMVPROD(float, mvprod, cml_zmvprod1_f)
+VSIP_IMPL_CML_ZMVPROD(float, vmprod, cml_zvmprod1_f)
+#undef VSIP_IMPL_CML_ZMVPROD
+
+
+
 } // namespace vsip::impl::cml
 
 } // namespace vsip::impl
Index: tests/matvec-prodmv.cpp
===================================================================
--- tests/matvec-prodmv.cpp	(revision 209326)
+++ tests/matvec-prodmv.cpp	(working copy)
@@ -18,6 +18,7 @@
 
 #include <vsip/initfin.hpp>
 #include <vsip/support.hpp>
+#include <vsip/matrix.hpp>
 #include <vsip/tensor.hpp>
 #include <vsip/math.hpp>
 
@@ -144,7 +145,138 @@
 }
 
 
+/// Test matrix-matrix products using sub-views
 
+template <typename T>
+void
+test_mv_prod_subview( const length_type m, 
+                      const length_type n )
+{
+  typedef typename Matrix<T>::subview_type matrix_subview_type;
+  typedef typename Vector<T>::subview_type vector_subview_type;
+  typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
+
+  {
+    // non-unit strides - non-dense rows/vectors, dense columns
+    Matrix<T> aa(m*2, n, T());
+    Vector<T> bb(n*3, T());
+    matrix_subview_type a = aa(Domain<2>(
+                                Domain<1>(0, 2, m), Domain<1>(0, 1, n)));
+    vector_subview_type b = bb(Domain<1>(0, 3, n));
+
+    randm(a);
+    randv(b);
+
+    Vector<T> res(m);
+    Vector<T> chk(m);
+    Vector<scalar_type> gauge(m);
+
+    res = prod( a, b );
+    chk = ref::prod( a, b );
+    gauge = ref::prod(mag(a), mag(b));
+
+    for (index_type i=0; i<gauge.size(0); ++i)
+      if (!(gauge(i) > scalar_type()))
+        gauge(i) = scalar_type(1);
+
+    check_prod( res, chk, gauge );
+  }
+
+  {
+    // non-unit strides - dense rows, non-dense columns/vectors
+    Matrix<T> aa(m*2, n, T());
+    Vector<T> bb(m*3, T());
+    matrix_subview_type a = aa(Domain<2>(
+                                Domain<1>(0, 2, m), Domain<1>(0, 1, n)));
+    vector_subview_type b = bb(Domain<1>(0, 3, m));
+
+    randm(a);
+    randv(b);
+
+    Vector<T> res(n);
+    Vector<T> chk(n);
+    Vector<scalar_type> gauge(n);
+
+    res = prod( trans(a), b );
+    chk = ref::prod( trans(a), b );
+    gauge = ref::prod(mag(trans(a)), mag(b));
+
+    for (index_type i=0; i<gauge.size(0); ++i)
+      if (!(gauge(i) > scalar_type()))
+        gauge(i) = scalar_type(1);
+
+    check_prod( res, chk, gauge );
+  }
+}
+
+
+/// Test matrix-matrix products using sub-views
+
+template <typename T>
+void
+test_vm_prod_subview( const length_type m, 
+                      const length_type n )
+{
+  typedef typename Matrix<T>::subview_type matrix_subview_type;
+  typedef typename Vector<T>::subview_type vector_subview_type;
+  typedef typename vsip::impl::Scalar_of<T>::type scalar_type;
+
+  {
+    // non-unit strides - non-dense rows/vectors, dense columns
+    Vector<T> aa(m*3, T());
+    Matrix<T> bb(m*2, n, T());
+    vector_subview_type a = aa(Domain<1>(0, 3, m));
+    matrix_subview_type b = bb(Domain<2>(
+                                Domain<1>(0, 2, m), Domain<1>(0, 1, n)));
+
+    randv(a);
+    randm(b);
+
+    Vector<T> res(n);
+    Vector<T> chk(n);
+    Vector<scalar_type> gauge(n);
+
+    res = prod( a, b );
+    chk = ref::prod( a, b );
+    gauge = ref::prod(mag(a), mag(b));
+
+    for (index_type i=0; i<gauge.size(0); ++i)
+      if (!(gauge(i) > scalar_type()))
+        gauge(i) = scalar_type(1);
+
+    check_prod( res, chk, gauge );
+  }
+
+  {
+    // non-unit strides - dense rows, non-dense columns/vectors
+    Vector<T> aa(n*3, T());
+    Matrix<T> bb(m*2, n, T());
+    vector_subview_type a = aa(Domain<1>(0, 3, n));
+    matrix_subview_type b = bb(Domain<2>(
+                                Domain<1>(0, 2, m), Domain<1>(0, 1, n)));
+
+    randv(a);
+    randm(b);
+
+    Vector<T> res(m);
+    Vector<T> chk(m);
+    Vector<scalar_type> gauge(m);
+
+    res = prod( a, trans(b) );
+    chk = ref::prod( a, trans(b) );
+    gauge = ref::prod(mag(a), mag(trans(b)));
+
+    for (index_type i=0; i<gauge.size(0); ++i)
+      if (!(gauge(i) > scalar_type()))
+        gauge(i) = scalar_type(1);
+
+    check_prod( res, chk, gauge );
+  }
+}
+
+
+
+
 template <typename T0,
 	  typename T1>
 void
@@ -155,7 +287,16 @@
 }
 
 
+template <typename T>
+void
+prod_subview_cases()
+{
+  test_mv_prod_subview<T>(5, 7);
+  test_vm_prod_subview<T>(5, 7);
+}
 
+
+
 /***********************************************************************
   Main
 ***********************************************************************/
@@ -175,6 +316,9 @@
   prod_cases<float,          complex<float> >();
   prod_cases<complex<float>, float          >();
 
+  prod_subview_cases<float>();
+  prod_subview_cases<complex<float> >();
+
 #if VSIP_IMPL_TEST_DOUBLE
   prod_cases<double, double>();
   prod_cases<float,  double>();