Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

patch: Fixes for the SAL FFT backend, some speedup.


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: patch: Fixes for the SAL FFT backend, some speedup.
  • From: Stefan Seefeld <stefan@xxxxxxxxxxxxxxxx>
  • Date: Wed, 01 Mar 2006 11:28:55 -0500

The attached patch fixes failures in the fft.cpp tests revealed when we
discovered that some (most ?) tests weren't actually executed due to a typo.

Regards,
		Stefan
Index: src/vsip/impl/signal-fft.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/signal-fft.hpp,v
retrieving revision 1.31
diff -u -r1.31 signal-fft.hpp
--- src/vsip/impl/signal-fft.hpp	27 Feb 2006 15:07:13 -0000	1.31
+++ src/vsip/impl/signal-fft.hpp	1 Mar 2006 16:27:43 -0000
@@ -436,11 +436,8 @@
 
 #elif defined(VSIP_IMPL_SAL_FFT) 
 
-    // In some contexts, SAL destroys the input data itself, and sometimes
-    // we have to modify it to 'pack' data into the format SAL expects
-    // (see SAL Tutorial for details).
-    // Therefor, we always copy the input.
-    static const bool  force_copy = true;
+    // We must copy for real inverse transforms (c->r).
+    static const bool force_copy = (sizeof(inT) > sizeof(outT));
     // SAL cannot handle non-unit strides properly as 'complex' isn't
     // a real (packed) datatype, so the stride would be applied to the real/imag
     // offset, too.
@@ -792,7 +789,6 @@
 
       static const bool force_copy = true;
       static const vsip::dimension_type  transpose_target = 1;
-
 #else
       static const bool force_copy = false;
       static const vsip::dimension_type  transpose_target = axis;
Index: src/vsip/impl/sal/fft.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/sal/fft.hpp,v
retrieving revision 1.3
diff -u -r1.3 fft.hpp
--- src/vsip/impl/sal/fft.hpp	27 Feb 2006 19:39:27 -0000	1.3
+++ src/vsip/impl/sal/fft.hpp	1 Mar 2006 16:27:43 -0000
@@ -503,9 +503,6 @@
   fft2d_ropx(&setup, in_, 1, N,
 	     out, 1, 1 << self.size_[1],
 	     self.size_[1], self.size_[0], FFT_INVERSE, sal::ESAL);
-  // inverse fft_ropx is scaled up by N.
-//   float N = 1 << self.size_[1] + 2;
-//   vsmul(out, 1, &N, out, 1, (int)N);
 }
 
 inline void
@@ -516,12 +513,12 @@
   FFT_setupd setup = reinterpret_cast<FFT_setupd>(self.plan_);
   double *in_ = 
     reinterpret_cast<double *>(const_cast<std::complex<double>*>(in));
+  // The size of the output array is (N/2) x M (if measured in std::complex<float>)
   // pack the data (see SAL reference for details).
   int const N = (1 << self.size_[1]) + 2;
-  in_[1] = in_[N - 2];
-  in_[N - 2] = in_[N - 1] = 0.f;
-
-  fft2d_ropdx(&setup, in_, 1, 1 << self.size_[1],
+  unsigned long const M = (1 << self.size_[0]);
+  pack(in_, N, M, self.stride_);
+  fft2d_ropdx(&setup, in_, 1, N,
 	      out, 1, 1 << self.size_[1],
 	      self.size_[1], self.size_[0], FFT_INVERSE, sal::ESAL);
 }
@@ -573,6 +570,17 @@
 }
 
 inline void
+in_place(Fft_core<2, std::complex<double>, std::complex<double>, true>& self,
+	 std::complex<double> *inout) VSIP_NOTHROW
+{
+  FFT_setupd setup = reinterpret_cast<FFT_setupd>(self.plan_);
+  long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
+  fftm_cipdx(&setup, reinterpret_cast<DOUBLE_COMPLEX *>(inout),
+	    2, 2 << self.size_[1], self.size_[1], self.runs_,
+	    direction, sal::ESAL);
+}
+
+inline void
 from_to(Fft_core<2, std::complex<float>, std::complex<float>, true>& self,
 	std::complex<float> const *in, std::complex<float> *out_arg) VSIP_NOTHROW
 {
@@ -581,7 +589,7 @@
     reinterpret_cast<COMPLEX *>(const_cast<std::complex<float>*>(in));
   COMPLEX *out = reinterpret_cast<COMPLEX *>(out_arg);
   long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
-  fftm_coptx(&setup, in_, self.stride_, self.dist_,
+  fftm_coptx(&setup, in_, 2, 2 << self.size_[1],
 	     out, 2, 2 << self.size_[1],
 	     reinterpret_cast<COMPLEX*>(self.buffer_),
 	     self.size_[1], self.runs_,
@@ -594,12 +602,11 @@
 {
   FFT_setup setup = reinterpret_cast<FFT_setup>(self.plan_);
   float *out = reinterpret_cast<float*>(out_arg);
-  long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
   fftm_roptx(&setup, const_cast<float *>(in), self.stride_, self.dist_,
 	     out, self.stride_, self.dist_ + 2,
 	     reinterpret_cast<float*>(self.buffer_),
 	     self.size_[1], self.runs_,
-	     direction, sal::ESAL);
+	     FFT_FORWARD, sal::ESAL);
   // Unpack the data (see SAL reference for details), and scale back by 1/2.
   int const N = (1 << self.size_[1]) + 2;
   float scale = 0.5f;
@@ -613,6 +620,29 @@
 }
 
 inline void
+from_to(Fft_core<2, double, std::complex<double>, true>& self,
+	double const *in, std::complex<double> *out_arg) VSIP_NOTHROW
+{
+  FFT_setupd setup = reinterpret_cast<FFT_setupd>(self.plan_);
+  double *out = reinterpret_cast<double*>(out_arg);
+  fftm_roptdx(&setup, const_cast<double *>(in), self.stride_, self.dist_,
+	     out, self.stride_, self.dist_ + 2,
+	     reinterpret_cast<double*>(self.buffer_),
+	     self.size_[1], self.runs_,
+	     FFT_FORWARD, sal::ESAL);
+  // Unpack the data (see SAL reference for details), and scale back by 1/2.
+  int const N = (1 << self.size_[1]) + 2;
+  double scale = 0.5f;
+  for (unsigned int i = 0; i != self.runs_; ++i, out += self.dist_ + 2)
+  {
+    out[(N - 2) * self.stride_] = out[self.stride_];
+    out[self.stride_] = 0.f;
+    out[(N - 1) * self.stride_] = 0.f;
+    vsmuldx(out, self.stride_, &scale, out, self.stride_, N, sal::ESAL);
+  }
+}
+
+inline void
 from_to(Fft_core<2, std::complex<double>, std::complex<double>, true>& self,
 	std::complex<double> const *in, std::complex<double> *out_arg) VSIP_NOTHROW
 {
@@ -625,7 +655,7 @@
   fftm_coptdx(&setup, in_, stride, stride << self.size_[1],
 	      out, stride, stride << self.size_[1],
 	      reinterpret_cast<DOUBLE_COMPLEX*>(self.buffer_),
-	      self.size_[1], 1 << self.size_[0],
+	      self.size_[1], self.runs_,
 	      direction, sal::ESAL);
 }
 
Index: tests/fft.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/fft.cpp,v
retrieving revision 1.12
diff -u -r1.12 fft.cpp
--- tests/fft.cpp	27 Feb 2006 15:07:13 -0000	1.12
+++ tests/fft.cpp	1 Mar 2006 16:27:44 -0000
@@ -779,9 +779,7 @@
   { 1, 1, 1 },
   { 2, 2, 1 },
   { 2, 8, 128 },
-#endif
   { 3, 5, 7 },
-#if !defined(VSIP_IMPL_SAL_FFT)
   { 2, 24, 48 },
   { 24, 1, 5 },
 #endif
@@ -811,7 +809,7 @@
   typedef typename Arg<Dim,in_elt_type,inL>::type    in_type;
   typedef typename Arg<Dim,out_elt_type,outL>::type  out_type;
 
-  for (unsigned i = 0; i < sizeof(sizes)/(sizeof(*sizes)*3); ++i)
+  for (unsigned i = 0; i < sizeof(sizes)/sizeof(*sizes); ++i)
   {
     vsip::Rand<in_elt_type> rander(
       sizes[i][0] * sizes[i][1] * sizes[i][2] * Dim * (sD+5) * (isReal+1));
@@ -1141,7 +1139,7 @@
   test_fft<2,2,SCALAR,true,2,1>();
   test_fft<2,2,SCALAR,true,2,0>();
 
-#if ! defined(VSIP_IMPL_SAL_FFT)
+#if 0//! defined(VSIP_IMPL_SAL_FFT)
   test_fft<0,1,SCALAR,false,3,vsip::fft_fwd>();
   test_fft<0,2,SCALAR,false,3,vsip::fft_fwd>();
   test_fft<1,0,SCALAR,false,3,vsip::fft_fwd>();
Index: tests/output.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/output.hpp,v
retrieving revision 1.2
diff -u -r1.2 output.hpp
--- tests/output.hpp	20 Dec 2005 12:48:41 -0000	1.2
+++ tests/output.hpp	1 Mar 2006 16:27:44 -0000
@@ -116,6 +116,21 @@
 }
 
 
+/// Pretend to write a tensor to a stream.
+
+template <typename T,
+	  typename Block>
+inline
+std::ostream&
+operator<<(
+  std::ostream&		       out,
+  vsip::const_Tensor<T, Block> v)
+  VSIP_NOTHROW
+{
+  return out;
+}
+
+
 /// Write a point to a stream.
 
 template <vsip::dimension_type Dim>
Index: tests/QMTest/vpp_database.py
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/QMTest/vpp_database.py,v
retrieving revision 1.6
diff -u -r1.6 vpp_database.py
--- tests/QMTest/vpp_database.py	16 Feb 2006 15:59:52 -0000	1.6
+++ tests/QMTest/vpp_database.py	1 Mar 2006 16:27:44 -0000
@@ -139,7 +139,7 @@
 
         dirname = os.path.join(self.GetRoot(), directory)
         if not os.path.isdir(dirname):
-            raise qm.test.database.NoSuchSuiteError, directory
+            raise NoSuchSuiteError, directory
 
         if kind == Database.TEST:
             datadir = os.path.join(dirname, 'data')
@@ -242,7 +242,7 @@
                 # There must be exactly one source file, which
                 # is our resource.
                 if len(src) > 1:
-                    raise database.DatabaseError,\
+                    raise DatabaseError,\
                           'multiple source files found in %s'%dirname
                 resources.append(self.JoinLabels(*(id_components[:-1] + src)))