Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[vsipl++] [patch] Misc fixes and work arounds


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: [vsipl++] [patch] Misc fixes and work arounds
  • From: Jules Bergmann <jules@xxxxxxxxxxxxxxxx>
  • Date: Fri, 11 Jul 2008 03:05:45 -0400

This patch fixes several test failures when using CML, and works around two cases where CML breaks (non-unit-stride+dense transpose, and non-decimation=1 FIR).

Patch applied.

--
Jules Bergmann
CodeSourcery
jules@xxxxxxxxxxxxxxxx
(650) 331-3385 x705
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 214460)
+++ ChangeLog	(working copy)
@@ -1,3 +1,24 @@
+2008-07-10  Jules Bergmann  <jules@xxxxxxxxxxxxxxxx>
+
+	* src/vsip/opt/cbe/spu/alf_pwarp_ub.cpp: Put ALF decls in extern "C"
+	  block.
+	* src/vsip/opt/cbe/spu/GNUmakefile.inc.in: Turn off unnecessary
+	  C++ features for C++ ALF kernels.
+	* src/vsip/opt/cbe/spu/alf_fconvm_c.c: Add missing ALF EXPORT API.
+	* src/vsip/opt/cbe/cml/transpose.hpp: Disable transpose for
+	  non-unit-stride.  Fix unit-stride+dense check.  Fix Wall warnings.
+	* src/vsip/opt/cbe/cml/matvec.hpp: Fix Wall warnings.
+	* src/vsip/opt/cbe/cml/fir.hpp: Throw on copy constructor.
+	  Disable BE if decimation != 1.
+	* src/vsip/opt/cbe/cml/conv.hpp: Fix Wall warnings.
+	* src/vsip/opt/cbe/cml/corr.hpp: Fix Wall warnings.
+	* src/vsip_csl/stencil/boundary_factory.hpp: Fix Wall warnings.
+	* tests/fft_be.cpp: XFail CML for double precision FFT.
+	* tests/test-prod.hpp: Add adjustable threshold.
+	* tests/ref-impl/signal-fir.cpp: Use equal.
+	* tests/regressions/par_transpose.cpp: Fix conversion.
+	* tests/matvec-prod.cpp: Adjust threshold on CBE.
+
 2008-07-10  Brooks Moses  <brooks@xxxxxxxxxxxxxxxx>
 
 	* README.cbe: New file.
Index: src/vsip/opt/cbe/spu/alf_pwarp_ub.cpp
===================================================================
--- src/vsip/opt/cbe/spu/alf_pwarp_ub.cpp	(revision 214460)
+++ src/vsip/opt/cbe/spu/alf_pwarp_ub.cpp	(working copy)
@@ -716,8 +716,11 @@
   return 0;
 }
 
+extern "C"
+{
 ALF_ACCEL_EXPORT_API_LIST_BEGIN
   ALF_ACCEL_EXPORT_API ("input", input);
   ALF_ACCEL_EXPORT_API ("output", output); 
   ALF_ACCEL_EXPORT_API ("kernel", kernel);
 ALF_ACCEL_EXPORT_API_LIST_END
+}
Index: src/vsip/opt/cbe/spu/GNUmakefile.inc.in
===================================================================
--- src/vsip/opt/cbe/spu/GNUmakefile.inc.in	(revision 214460)
+++ src/vsip/opt/cbe/spu/GNUmakefile.inc.in	(working copy)
@@ -45,7 +45,7 @@
 CPP_SPU_FLAGS += -I $(CBE_SDK_SYSROOT)/usr/spu/include
 CPP_SPU_FLAGS += -I $(CBE_SDK_SYSROOT)/opt/cell/sdk/usr/spu/include
 C_SPU_FLAGS := -O3
-CXX_SPU_FLAGS := -O3
+CXX_SPU_FLAGS := -O3 -fno-threadsafe-statics -fno-rtti -fno-exceptions
 LD_SPU_FLAGS += -Wl,-N -L$(CBE_SDK_SYSROOT)/usr/spu/lib
 LD_SPU_FLAGS += -L$(CBE_SDK_SYSROOT)/opt/cell/sdk/usr/spu/lib
 
Index: src/vsip/opt/cbe/spu/alf_fconvm_c.c
===================================================================
--- src/vsip/opt/cbe/spu/alf_fconvm_c.c	(revision 214460)
+++ src/vsip/opt/cbe/spu/alf_fconvm_c.c	(working copy)
@@ -131,3 +131,9 @@
 
   return 0;
 }
+
+ALF_ACCEL_EXPORT_API_LIST_BEGIN
+  ALF_ACCEL_EXPORT_API ("input", input);
+  ALF_ACCEL_EXPORT_API ("output", output); 
+  ALF_ACCEL_EXPORT_API ("kernel", kernel);
+ALF_ACCEL_EXPORT_API_LIST_END
Index: src/vsip/opt/cbe/cml/transpose.hpp
===================================================================
--- src/vsip/opt/cbe/cml/transpose.hpp	(revision 214460)
+++ src/vsip/opt/cbe/cml/transpose.hpp	(working copy)
@@ -191,7 +191,7 @@
   static int const  lhs_cost      = Ext_data_cost<DstBlock>::value;
   static int const  rhs_cost      = Ext_data_cost<SrcBlock>::value;
 
-  static bool const ct_valid = 
+  static bool const ct_valid =
     // check that CML supports this data type and/or layout
     cml::Cml_supports_block<SrcBlock>::valid &&
     cml::Cml_supports_block<DstBlock>::valid &&
@@ -210,16 +210,25 @@
 
     // If performing a copy, both source and destination blocks
     // must be unit stride and dense.
-    if (Type_equal<src_order_type, dst_order_type>::value)
+    //
+    // 080710: CML cannot handle non-unit stride transpose
+    //         (regressions/transpose-nonunit.cpp fails),
+    //         temporarily enforce unit-stride requirement.
+    //
+    // if (Type_equal<src_order_type, dst_order_type>::value)
     {
       Ext_data<DstBlock> dst_ext(dst, SYNC_OUT);
       Ext_data<SrcBlock> src_ext(src, SYNC_IN);
 
+      dimension_type const s_dim0 = src_order_type::impl_dim0;
       dimension_type const s_dim1 = src_order_type::impl_dim1;
+      dimension_type const d_dim0 = dst_order_type::impl_dim0;
       dimension_type const d_dim1 = dst_order_type::impl_dim1;
 
-      if (dst_ext.stride(d_dim1) != 1 || dst_ext.stride(0) != dst.size(2, 1) ||
-          src_ext.stride(s_dim1) != 1 || src_ext.stride(0) != src.size(2, 1))
+      if (dst_ext.stride(d_dim1) != 1 ||
+	  dst_ext.stride(d_dim0) != static_cast<stride_type>(dst.size(2, d_dim1)) ||
+	  src_ext.stride(s_dim1) != 1 ||
+	  src_ext.stride(s_dim0) != static_cast<stride_type>(src.size(2, s_dim1)))
         rt = false;
     }
 
@@ -233,8 +242,8 @@
 
     if (dst_ext.stride(1) == 1 && src_ext.stride(1) == 1)
     {
-      assert(dst_ext.stride(0) == dst.size(2, 1));
-      assert(src_ext.stride(0) == src.size(2, 1));
+      assert(dst_ext.stride(0) == static_cast<stride_type>(dst.size(2, 1)));
+      assert(src_ext.stride(0) == static_cast<stride_type>(src.size(2, 1)));
 
       cml::vcopy(
         src_ext.data(), 1,
@@ -252,8 +261,8 @@
 
     if (dst_ext.stride(0) == 1 && src_ext.stride(0) == 1)
     {
-      assert(dst_ext.stride(1) == dst.size(2, 0));
-      assert(src_ext.stride(1) == src.size(2, 0));
+      assert(dst_ext.stride(1) == static_cast<stride_type>(dst.size(2, 0)));
+      assert(src_ext.stride(1) == static_cast<stride_type>(src.size(2, 0)));
 
       cml::vcopy(
         src_ext.data(), 1,
Index: src/vsip/opt/cbe/cml/matvec.hpp
===================================================================
--- src/vsip/opt/cbe/cml/matvec.hpp	(revision 214460)
+++ src/vsip/opt/cbe/cml/matvec.hpp	(working copy)
@@ -362,7 +362,7 @@
     Ext_data_cost<Block1>::value == 0 &&
     Ext_data_cost<Block2>::value == 0;
 
-  static bool rt_valid(Block0& r, Block1 const& a, Block2 const& b)
+  static bool rt_valid(Block0& /*r*/, Block1 const& /*a*/, Block2 const& b)
   {
     Ext_data<Block2> ext_b(const_cast<Block2&>(b));
 
Index: src/vsip/opt/cbe/cml/fir.hpp
===================================================================
--- src/vsip/opt/cbe/cml/fir.hpp	(revision 214460)
+++ src/vsip/opt/cbe/cml/fir.hpp	(working copy)
@@ -129,6 +129,8 @@
       fir_obj_ptr_(NULL),
       filter_state_(fir.filter_state_)
   {
+    VSIP_IMPL_THROW(vsip::impl::unimplemented
+                    ("CML BE copy-construction broken."));
     fir_create(
       &fir_obj_ptr_,
       fir.fir_obj_ptr_->K,
@@ -200,6 +202,9 @@
     assert(o + 1 > d); // M >= decimation
     assert(i >= o);    // input_size >= M 
 
+    // 080710: CML BE has trouble with D != 1 (fir.cpp)
+    if (d != 1) return false;
+
     // CML FIR objects have fixed output size, whereas VSIPL++ FIR objects
     // have fixed input size.  If input size is not a multiple of the
     // decimation, output size will vary from frame to frame.  The 
Index: src/vsip/opt/cbe/cml/conv.hpp
===================================================================
--- src/vsip/opt/cbe/cml/conv.hpp	(revision 214460)
+++ src/vsip/opt/cbe/cml/conv.hpp	(working copy)
@@ -56,7 +56,7 @@
 void
 conv(
   float const* coeff, length_type c_size,
-  float const* in,    length_type i_size, stride_type s_in,
+  float const* in,    length_type /*i_size*/, stride_type s_in,
   float*       out,   length_type o_size, stride_type s_out,
   length_type decimation)
 {
@@ -67,7 +67,7 @@
 void
 conv(
   std::complex<float> const* coeff, length_type c_size,
-  std::complex<float> const* in,    length_type i_size, stride_type s_in,
+  std::complex<float> const* in,    length_type /*i_size*/, stride_type s_in,
   std::complex<float>*       out,   length_type o_size, stride_type s_out,
   length_type decimation)
 {
@@ -81,7 +81,7 @@
 void
 conv(
   std::pair<float*,float*> coeff, length_type c_size,
-  std::pair<float*,float*> in,    length_type i_size, stride_type s_in,
+  std::pair<float*,float*> in,    length_type /*i_size*/, stride_type s_in,
   std::pair<float*,float*> out,   length_type o_size, stride_type s_out,
   length_type decimation)
 {
Index: src/vsip/opt/cbe/cml/corr.hpp
===================================================================
--- src/vsip/opt/cbe/cml/corr.hpp	(revision 214460)
+++ src/vsip/opt/cbe/cml/corr.hpp	(working copy)
@@ -56,7 +56,7 @@
 void
 corr(
   float const* coeff, length_type c_size,
-  float const* in,    length_type i_size, stride_type s_in,
+  float const* in,    length_type /*i_size*/, stride_type s_in,
   float*       out,   length_type o_size, stride_type s_out,
   length_type decimation)
 {
@@ -67,7 +67,7 @@
 void
 corr(
   std::complex<float> const* coeff, length_type c_size,
-  std::complex<float> const* in,    length_type i_size, stride_type s_in,
+  std::complex<float> const* in,    length_type /*i_size*/, stride_type s_in,
   std::complex<float>*       out,   length_type o_size, stride_type s_out,
   length_type decimation)
 {
@@ -81,7 +81,7 @@
 void
 corr(
   std::pair<float*,float*> coeff, length_type c_size,
-  std::pair<float*,float*> in,    length_type i_size, stride_type s_in,
+  std::pair<float*,float*> in,    length_type /*i_size*/, stride_type s_in,
   std::pair<float*,float*> out,   length_type o_size, stride_type s_out,
   length_type decimation)
 {
Index: src/vsip_csl/stencil/boundary_factory.hpp
===================================================================
--- src/vsip_csl/stencil/boundary_factory.hpp	(revision 214460)
+++ src/vsip_csl/stencil/boundary_factory.hpp	(working copy)
@@ -241,7 +241,7 @@
        vsip::Domain<1>(b.size(2, 1) - k.size(1) + 1, 1, k.size(1) - 1));
   }
   // the destination subblock containing the mirror.
-  static vsip::Domain<2> dst_sub_domain(B const &b, K const &k)
+  static vsip::Domain<2> dst_sub_domain(B const& /*b*/, K const &k)
   {
     return vsip::Domain<2>(k.size(0) - 1, k.size(1) - 1);
   }
Index: tests/fft_be.cpp
===================================================================
--- tests/fft_be.cpp	(revision 214460)
+++ tests/fft_be.cpp	(working copy)
@@ -305,6 +305,12 @@
 template <typename F, return_mechanism_type R, typename O, unsigned int S>
 struct XFail<ipp, F, 2, R, std::complex<double>, O, S> { static bool const value = true;};
 
+// CBE doesn't support double FFTs
+template <typename F, return_mechanism_type R, typename O, unsigned int S>
+struct XFail<cbe, F, 1, R, double, O, S> { static bool const value = true;};
+template <typename F, return_mechanism_type R, typename O, unsigned int S>
+struct XFail<cbe, F, 1, R, complex<double>, O, S> { static bool const value = true;};
+
 bool has_errors = false;
 
 template <typename T, typename B, dimension_type D>
Index: tests/test-prod.hpp
===================================================================
--- tests/test-prod.hpp	(revision 214460)
+++ tests/test-prod.hpp	(working copy)
@@ -45,7 +45,8 @@
 check_prod(
   vsip::Matrix<T0, Block0> test,
   vsip::Matrix<T1, Block1> chk,
-  vsip::Matrix<T2, Block2> gauge)
+  vsip::Matrix<T2, Block2> gauge,
+  float                    threshold = 10.0)
 {
   typedef typename vsip::Promotion<T0, T1>::type return_type;
   typedef typename vsip::impl::Scalar_of<return_type>::type scalar_type;
@@ -56,14 +57,17 @@
 			    / gauge),
 			   idx);
 
-#if VERBOSE
-  std::cout << "test  =\n" << test;
-  std::cout << "chk   =\n" << chk;
-  std::cout << "gauge =\n" << gauge;
-  std::cout << "err = " << err << std::endl;
+#if 1 || VERBOSE
+  if (err >= threshold)
+  {
+    std::cout << "test  =\n" << test;
+    std::cout << "chk   =\n" << chk;
+    std::cout << "gauge =\n" << gauge;
+    std::cout << "err = " << err << std::endl;
+  }
 #endif
 
-  test_assert(err < 10.0);
+  test_assert(err < threshold);
 }
 
 
Index: tests/ref-impl/signal-fir.cpp
===================================================================
--- tests/ref-impl/signal-fir.cpp	(revision 214460)
+++ tests/ref-impl/signal-fir.cpp	(working copy)
@@ -65,7 +65,7 @@
     return false;
 
   for (vsip::index_type idx = 0; idx < vec.size (); ++idx)
-    if (answer.get (idx) != vec.get (idx))
+    if (!equal(answer.get (idx), vec.get (idx)))
       return false;
 
   return true;
Index: tests/regressions/par_transpose.cpp
===================================================================
--- tests/regressions/par_transpose.cpp	(revision 214460)
+++ tests/regressions/par_transpose.cpp	(working copy)
@@ -89,10 +89,10 @@
   for (row = 0; row < rows; row++)
     for (col = 0; col < cols; col++)
     {
-      test_assert(in1.get(row, col).real() == +(100*row + 1*col));
-      test_assert(in1.get(row, col).imag() == -(100*row + 1*col));
-      test_assert(in2.get(row, col).real() == +(1*row + 100*col));
-      test_assert(in2.get(row, col).imag() == -(1*row + 100*col));
+      test_assert(in1.get(row, col).real() == +(100.0*row + 1.0*col));
+      test_assert(in1.get(row, col).imag() == -(100.0*row + 1.0*col));
+      test_assert(in2.get(row, col).real() == +(1.0*row + 100.0*col));
+      test_assert(in2.get(row, col).imag() == -(1.0*row + 100.0*col));
 
       test_assert(tp1.get(col, row) == in1.get(row, col));
       test_assert(tp2.get(col, row) == 
Index: tests/matvec-prod.cpp
===================================================================
--- tests/matvec-prod.cpp	(revision 214460)
+++ tests/matvec-prod.cpp	(working copy)
@@ -33,7 +33,14 @@
 using namespace vsip;
 using namespace vsip_csl;
 
+// 070810: CML has larger precision differeneces for large matrices.
+#ifdef VSIP_IMPL_CBE_SDK
+#  define THRESHOLD 15.0
+#else
+#  define THRESHOLD 10.0
+#endif
 
+
 /***********************************************************************
   Test Definitions
 ***********************************************************************/
@@ -92,9 +99,9 @@
   cout << "b     =\n" << b;
 #endif
 
-  check_prod( res1, chk, gauge );
-  check_prod( res2, chk, gauge );
-  check_prod( res3, chk, gauge );
+  check_prod( res1, chk, gauge, THRESHOLD );
+  check_prod( res2, chk, gauge, THRESHOLD );
+  check_prod( res3, chk, gauge, THRESHOLD );
 }
 
 
@@ -140,7 +147,6 @@
   Precision_traits<float>::compute_eps();
   Precision_traits<double>::compute_eps();
 
-
   prod_cases_with_order<float,  float>();
 
   prod_cases_with_order<complex<float>, complex<float> >();