Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patch] Some enhancements to the FFT code.


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: [patch] Some enhancements to the FFT code.
  • From: Stefan Seefeld <stefan@xxxxxxxxxxxxxxxx>
  • Date: Fri, 24 Feb 2006 17:26:58 -0500

Please find attached two patches (folded into one):

The SAL FFT backend uses a number of different
functions that take temporary workspace, to avoid
the input array to be clobbered. This is a first
step to allow the frontend not to copy data in
some (the most frequent ?) cases.

The second part of the patch is the addition of
tests to tests/fft.cpp that specifically targets
the Complex_split_fmt. It passes with builtin fft
(i.e. FFTW), and I'm planning to add support for
split format to the SAR backend in the coming
days.

Regards,
		Stefan
Index: src/vsip/impl/signal-fft.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/signal-fft.hpp,v
retrieving revision 1.30
diff -u -r1.30 signal-fft.hpp
--- src/vsip/impl/signal-fft.hpp	17 Feb 2006 20:23:44 -0000	1.30
+++ src/vsip/impl/signal-fft.hpp	24 Feb 2006 22:24:26 -0000
@@ -92,6 +92,7 @@
 
   int  stride_; // 1 for sd_ == 0, length of row for sd_ == 1.
   int  dist_;   // 1 for sd_ == 1, length of column for sd_ == 0.
+  void *buffer_;
   // used only for Fftm
   int  sd_;     // 0: compute FFTs of rows; 1: of columns
   int  runs_;   // number of 1D FFTs to perform; varies by map
Index: src/vsip/impl/sal/fft.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/src/vsip/impl/sal/fft.hpp,v
retrieving revision 1.1
diff -u -r1.1 fft.hpp
--- src/vsip/impl/sal/fft.hpp	17 Feb 2006 20:23:44 -0000	1.1
+++ src/vsip/impl/sal/fft.hpp	24 Feb 2006 22:24:26 -0000
@@ -193,6 +193,7 @@
   VSIP_THROW((std::bad_alloc))
 {
   self.is_forward_ = (expn == -1);
+  self.buffer_ = new Complex_of<inT>::type[dom.size()];
   unsigned long max = sal::log2n<D>::translate(dom, sd, self.size_);
   sal::fft_planner<D, inT, outT>::create(self.plan_, max);
 }
@@ -202,6 +203,7 @@
 destroy(Fft_core<D, inT, outT, doFftm>& self) VSIP_THROW((std::bad_alloc))
 {
   sal::fft_planner<D, inT, outT>::destroy(self.plan_);
+  delete [] self.buffer_;
 }
 
 inline void
@@ -266,8 +268,9 @@
 {
   FFT_setup setup = reinterpret_cast<FFT_setup>(self.plan_);
   float *out = reinterpret_cast<float*>(out_arg);
-  fft_ropx(&setup, const_cast<float*>(in), 1, out, 1,
-	   self.size_[0], FFT_FORWARD, sal::ESAL);
+  fft_roptx(&setup, const_cast<float*>(in), 1, out, 1,
+	    reinterpret_cast<float*>(self.buffer_),
+	    self.size_[0], FFT_FORWARD, sal::ESAL);
   // unpack the data (see SAL reference for details).
   int const N = (1 << self.size_[0]) + 2;
   out[N - 2] = out[1];
@@ -306,8 +309,9 @@
 {
   FFT_setupd setup = reinterpret_cast<FFT_setupd>(self.plan_);
   double *out = reinterpret_cast<double*>(out_arg);
-  fft_ropdx(&setup, const_cast<double*>(in), 1, out, 1,
-	    self.size_[0], FFT_FORWARD, sal::ESAL);
+  fft_ropdtx(&setup, const_cast<double*>(in), 1, out, 1,
+	     reinterpret_cast<double*>(self.buffer_),
+	     self.size_[0], FFT_FORWARD, sal::ESAL);
   // unpack the data (see SAL reference for details).
   int const N = (1 << self.size_[0]) + 2;
   out[N - 2] = out[1];
@@ -346,8 +350,9 @@
   COMPLEX *out = reinterpret_cast<COMPLEX *>(out_arg);
   long stride = 2;
   long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
-  fft_copx(&setup, in_, stride, out, stride, self.size_[0],
-	   direction, sal::ESAL);
+  fft_coptx(&setup, in_, stride, out, stride, 
+	    reinterpret_cast<COMPLEX*>(self.buffer_),
+	    self.size_[0], direction, sal::ESAL);
 }
 
 inline void
@@ -360,8 +365,9 @@
   DOUBLE_COMPLEX *out = reinterpret_cast<DOUBLE_COMPLEX *>(out_arg);
   long stride = 2;
   long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
-  fft_copdx(&setup, in_, stride, out, stride, self.size_[0],
-	    direction, sal::ESAL);
+  fft_copdtx(&setup, in_, stride, out, stride,
+	     reinterpret_cast<DOUBLE_COMPLEX*>(self.buffer_),
+	     self.size_[0], direction, sal::ESAL);
 }
 
 // 2D real -> complex forward fft
@@ -418,9 +424,10 @@
   // The size of the output array is (N/2) x M (if measured in std::complex<float>)
   unsigned long const N = (1 << self.size_[1]) + 2;
   unsigned long const M = (1 << self.size_[0]);
-  fft2d_ropx(&setup, const_cast<float*>(in), self.stride_, self.dist_,
-	     out, self.stride_, N,
-	     self.size_[1], self.size_[0], FFT_FORWARD, sal::ESAL);
+  fft2d_roptx(&setup, const_cast<float*>(in), self.stride_, self.dist_,
+	      out, self.stride_, N,
+	      reinterpret_cast<float*>(self.buffer_),
+	      self.size_[1], self.size_[0], FFT_FORWARD, sal::ESAL);
 
   // unpack the data (see SAL reference, figure 3.6, for details).
   unpack(out, N, M, self.stride_);
@@ -440,9 +447,10 @@
   // The size of the output array is (N/2) x M (if measured in std::complex<float>)
   unsigned long const N = (1 << self.size_[1]) + 2;
   unsigned long const M = (1 << self.size_[0]);
-  fft2d_ropdx(&setup, const_cast<double*>(in), self.stride_, self.dist_,
-	      out, self.stride_, N,
-	      self.size_[1], self.size_[0], FFT_FORWARD, sal::ESAL);
+  fft2d_ropdtx(&setup, const_cast<double*>(in), self.stride_, self.dist_,
+	       out, self.stride_, N,
+	       reinterpret_cast<double*>(self.buffer_),
+	       self.size_[1], self.size_[0], FFT_FORWARD, sal::ESAL);
 
   // unpack the data (see SAL reference, figure 3.6, for details).
   unpack(out, N, M, self.stride_);
@@ -527,10 +535,11 @@
   COMPLEX *out = reinterpret_cast<COMPLEX *>(out_arg);
   long stride = 2;
   long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
-  fft2d_copx(&setup, in_, stride, 2 << self.size_[1],
-	     out, stride, 2 << self.size_[1],
-	     self.size_[1], self.size_[0],
-	     direction, sal::ESAL);
+  fft2d_coptx(&setup, in_, stride, 2 << self.size_[1],
+	      out, stride, 2 << self.size_[1],
+	      reinterpret_cast<COMPLEX*>(self.buffer_),
+	      self.size_[1], self.size_[0],
+	      direction, sal::ESAL);
 }
 
 inline void
@@ -545,6 +554,7 @@
   long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
   fft2d_copdx(&setup, in_, stride, stride << self.size_[1],
 	      out, stride, stride << self.size_[1],
+	      reinterpret_cast<LONG_COMPLEX*>(self.buffer_),
 	      self.size_[1], self.size_[0],
 	      direction, sal::ESAL);
 }
@@ -571,9 +581,11 @@
     reinterpret_cast<COMPLEX *>(const_cast<std::complex<float>*>(in));
   COMPLEX *out = reinterpret_cast<COMPLEX *>(out_arg);
   long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
-  fftm_copx(&setup, in_, self.stride_, self.dist_,
-	    out, 2, 2 << self.size_[1], self.size_[1], self.runs_,
-	    direction, sal::ESAL);
+  fftm_coptx(&setup, in_, self.stride_, self.dist_,
+	     out, 2, 2 << self.size_[1],
+	     reinterpret_cast<COMPLEX*>(self.buffer_),
+	     self.size_[1], self.runs_,
+	     direction, sal::ESAL);
 }
 
 inline void
@@ -583,10 +595,11 @@
   FFT_setup setup = reinterpret_cast<FFT_setup>(self.plan_);
   float *out = reinterpret_cast<float*>(out_arg);
   long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
-  fftm_ropx(&setup, const_cast<float *>(in), self.stride_, self.dist_,
- 	    out, self.stride_, self.dist_ + 2,
- 	    self.size_[1], self.runs_,
- 	    direction, sal::ESAL);
+  fftm_roptx(&setup, const_cast<float *>(in), self.stride_, self.dist_,
+	     out, self.stride_, self.dist_ + 2,
+	     reinterpret_cast<float*>(self.buffer_),
+	     self.size_[1], self.runs_,
+	     direction, sal::ESAL);
   // Unpack the data (see SAL reference for details), and scale back by 1/2.
   int const N = (1 << self.size_[1]) + 2;
   float scale = 0.5f;
@@ -609,10 +622,11 @@
   DOUBLE_COMPLEX *out = reinterpret_cast<DOUBLE_COMPLEX *>(out_arg);
   long stride = 2;
   long direction = self.is_forward_ ? FFT_FORWARD : FFT_INVERSE;
-  fftm_copdx(&setup, in_, stride, stride << self.size_[1],
-	     out, stride, stride << self.size_[1],
-	     self.size_[1], 1 << self.size_[0],
-	     direction, sal::ESAL);
+  fftm_copdtx(&setup, in_, stride, stride << self.size_[1],
+	      out, stride, stride << self.size_[1],
+	      reinterpret_cast<DOUBLE_COMPLEX*>(self.buffer_),
+	      self.size_[1], 1 << self.size_[0],
+	      direction, sal::ESAL);
 }
 
 } // namespace vsip::impl
Index: tests/fft.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/fft.cpp,v
retrieving revision 1.11
diff -u -r1.11 fft.cpp
--- tests/fft.cpp	17 Feb 2006 20:23:44 -0000	1.11
+++ tests/fft.cpp	24 Feb 2006 22:24:27 -0000
@@ -219,15 +219,16 @@
 
 
 
-/// Test by-reference Fft (out-of-place and in-place).
+/// Test complex by-reference Fft (out-of-place and in-place).
 
-template <typename T>
+template <typename T, typename Complex_format>
 void
-test_by_ref(int set, length_type N)
+test_complex_by_ref(int set, length_type N)
 {
-  typedef Fft<const_Vector, T, T, fft_fwd, by_reference, 1, alg_space>
+  typedef std::complex<T> CT;
+  typedef Fft<const_Vector, CT, CT, fft_fwd, by_reference, 1, alg_space>
 	f_fft_type;
-  typedef Fft<const_Vector, T, T, fft_inv, by_reference, 1, alg_space>
+  typedef Fft<const_Vector, CT, CT, fft_inv, by_reference, 1, alg_space>
 	i_fft_type;
 
   f_fft_type f_fft(Domain<1>(N), 1.0);
@@ -239,10 +240,14 @@
   test_assert(i_fft.input_size().size() == N);
   test_assert(i_fft.output_size().size() == N);
 
-  Vector<T> in(N, T());
-  Vector<T> out(N);
-  Vector<T> ref(N);
-  Vector<T> inv(N);
+  typedef impl::Fast_block<1, CT,
+    impl::Layout<1, row1_type, impl::Stride_unit_dense, Complex_format> >
+    block_type;
+
+  Vector<CT, block_type> in(N, CT());
+  Vector<CT, block_type> out(N);
+  Vector<CT, block_type> ref(N);
+  Vector<CT, block_type> inv(N);
 
   setup_data(set, in);
 
@@ -262,15 +267,16 @@
 
 
 
-/// Test by-value Fft.
+/// Test complex by-value Fft.
 
-template <typename T>
+template <typename T, typename Complex_format>
 void
-test_by_val(int set, length_type N)
+test_complex_by_val(int set, length_type N)
 {
-  typedef Fft<const_Vector, T, T, fft_fwd, by_value, 1, alg_space>
+  typedef std::complex<T> CT;
+  typedef Fft<const_Vector, CT, CT, fft_fwd, by_value, 1, alg_space>
 	f_fft_type;
-  typedef Fft<const_Vector, T, T, fft_inv, by_value, 1, alg_space>
+  typedef Fft<const_Vector, CT, CT, fft_inv, by_value, 1, alg_space>
 	i_fft_type;
 
   f_fft_type f_fft(Domain<1>(N), 1.0);
@@ -282,10 +288,14 @@
   test_assert(i_fft.input_size().size() == N);
   test_assert(i_fft.output_size().size() == N);
 
-  Vector<T> in(N, T());
-  Vector<T> out(N);
-  Vector<T> ref(N);
-  Vector<T> inv(N);
+  typedef impl::Fast_block<1, CT,
+    impl::Layout<1, row1_type, impl::Stride_unit_dense, Complex_format> >
+    block_type;
+
+  Vector<CT, block_type> in(N, CT());
+  Vector<CT, block_type> out(N);
+  Vector<CT, block_type> ref(N);
+  Vector<CT, block_type> inv(N);
 
   setup_data(set, in);
 
@@ -347,7 +357,6 @@
 
   ref = out;
   inv = i_fft(out);
-
   test_assert(error_db(inv, in) < -100);
 
   // make sure out has not been scribbled in during the conversion.
@@ -929,19 +938,27 @@
 //
 #if defined(VSIP_IMPL_FFT_USE_FLOAT)
 
-  test_by_ref<complex<float> >(2, 64);
+  test_complex_by_ref<float, impl::Cmplx_inter_fmt>(2, 64);
+  test_complex_by_ref<float, impl::Cmplx_split_fmt>(2, 64);
 #if !defined(VSIP_IMPL_SAL_FFT)
-  test_by_ref<complex<float> >(1, 68);
+  test_complex_by_ref<float, impl::Cmplx_inter_fmt>(1, 68);
+  test_complex_by_ref<float, impl::Cmplx_split_fmt>(1, 68);
 #endif
-  test_by_ref<complex<float> >(2, 256);
+  test_complex_by_ref<float, impl::Cmplx_inter_fmt>(2, 256);
+  test_complex_by_ref<float, impl::Cmplx_split_fmt>(2, 256);
 #if !defined(VSIP_IMPL_SAL_FFT)
-  test_by_ref<complex<float> >(2, 252);
-  test_by_ref<complex<float> >(3, 17);
-#endif
-
-  test_by_val<complex<float> >(1, 128);
-  test_by_val<complex<float> >(2, 256);
-  test_by_val<complex<float> >(3, 512);
+  test_complex_by_ref<float, impl::Cmplx_inter_fmt>(2, 252);
+  test_complex_by_ref<float, impl::Cmplx_split_fmt>(2, 252);
+  test_complex_by_ref<float, impl::Cmplx_inter_fmt>(3, 17);
+  test_complex_by_ref<float, impl::Cmplx_split_fmt>(3, 17);
+#endif
+
+  test_complex_by_val<float, impl::Cmplx_inter_fmt>(1, 128);
+  test_complex_by_val<float, impl::Cmplx_split_fmt>(1, 128);
+  test_complex_by_val<float, impl::Cmplx_inter_fmt>(2, 256);
+  test_complex_by_val<float, impl::Cmplx_split_fmt>(2, 256);
+  test_complex_by_val<float, impl::Cmplx_inter_fmt>(3, 512);
+  test_complex_by_val<float, impl::Cmplx_split_fmt>(3, 512);
 
   test_real<float>(1, 128);
 #if !defined(VSIP_IMPL_SAL_FFT)
@@ -953,19 +970,27 @@
 
 #if defined(VSIP_IMPL_FFT_USE_DOUBLE)
 
-  test_by_ref<complex<double> >(2, 64);
+  test_complex_by_ref<double, impl::Cmplx_inter_fmt>(2, 64);
+  test_complex_by_ref<double, impl::Cmplx_split_fmt>(2, 64);
 #if !defined(VSIP_IMPL_SAL_FFT)
-  test_by_ref<complex<double> >(1, 68);
+  test_complex_by_ref<double, impl::Cmplx_inter_fmt>(1, 68);
+  test_complex_by_ref<double, impl::Cmplx_split_fmt>(1, 68);
 #endif
-  test_by_ref<complex<double> >(2, 256);
+  test_complex_by_ref<double, impl::Cmplx_inter_fmt>(2, 256);
+  test_complex_by_ref<double, impl::Cmplx_split_fmt>(2, 256);
 #if !defined(VSIP_IMPL_SAL_FFT)
-  test_by_ref<complex<double> >(2, 252);
-  test_by_ref<complex<double> >(3, 17);
-#endif
-
-  test_by_val<complex<double> >(1, 128);
-  test_by_val<complex<double> >(2, 256);
-  test_by_val<complex<double> >(3, 512);
+  test_complex_by_ref<double, impl::Cmplx_inter_fmt>(2, 252);
+  test_complex_by_ref<double, impl::Cmplx_split_fmt>(2, 252);
+  test_complex_by_ref<double, impl::Cmplx_inter_fmt>(3, 17);
+  test_complex_by_ref<double, impl::Cmplx_split_fmt>(3, 17);
+#endif
+
+  test_complex_by_val<double, impl::Cmplx_inter_fmt>(1, 128);
+  test_complex_by_val<double, impl::Cmplx_split_fmt>(1, 128);
+  test_complex_by_val<double, impl::Cmplx_inter_fmt>(2, 256);
+  test_complex_by_val<double, impl::Cmplx_split_fmt>(2, 256);
+  test_complex_by_val<double, impl::Cmplx_inter_fmt>(3, 512);
+  test_complex_by_val<double, impl::Cmplx_split_fmt>(3, 512);
 
   test_real<double>(1, 128);
 #if !defined(VSIP_IMPL_SAL_FFT)
@@ -979,18 +1004,26 @@
 
 #if ! defined(VSIP_IMPL_IPP_FFT)
 #if !defined(VSIP_IMPL_SAL_FFT)
-  test_by_ref<complex<long double> >(2, 64);
+  test_complex_by_ref<long double, impl::Cmplx_inter_fmt>(2, 64);
+  test_complex_by_ref<long double, impl::Cmplx_split_fmt>(2, 64);
 #endif 
-  test_by_ref<complex<long double> >(1, 68);
-  test_by_ref<complex<long double> >(2, 256);
-#if !defined(VSIP_IMPL_SAL_FFT)
-  test_by_ref<complex<long double> >(2, 252);
-  test_by_ref<complex<long double> >(3, 17);
+  test_complex_by_ref<long double, impl::Cmplx_inter_fmt>(1, 68);
+  test_complex_by_ref<long double, impl::Cmplx_split_fmt>(1, 68);
+  test_complex_by_ref<long double, impl::Cmplx_inter_fmt>(2, 256);
+  test_complex_by_ref<long double, impl::Cmplx_split_fmt>(2, 256);
+#if !defined(VSIP_IMPL_SAL_FFT)
+  test_complex_by_ref<long double, impl::Cmplx_inter_fmt>(2, 252);
+  test_complex_by_ref<long double, impl::Cmplx_split_fmt>(2, 252);
+  test_complex_by_ref<long double, impl::Cmplx_inter_fmt>(3, 17);
+  test_complex_by_ref<long double, impl::Cmplx_split_fmt>(3, 17);
 #endif 
 
-  test_by_val<complex<long double> >(1, 128);
-  test_by_val<complex<long double> >(2, 256);
-  test_by_val<complex<long double> >(3, 512);
+  test_complex_by_val<long double, impl::Cmplx_inter_fmt>(1, 128);
+  test_complex_by_val<long double, impl::Cmplx_split_fmt>(1, 128);
+  test_complex_by_val<long double, impl::Cmplx_inter_fmt>(2, 256);
+  test_complex_by_val<long double, impl::Cmplx_split_fmt>(2, 256);
+  test_complex_by_val<long double, impl::Cmplx_inter_fmt>(3, 512);
+  test_complex_by_val<long double, impl::Cmplx_split_fmt>(3, 512);
 
   test_real<long double>(1, 128);
 #if !defined(VSIP_IMPL_SAL_FFT)