Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [vsipl++] patch: fix merge conflicts


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: Re: [vsipl++] patch: fix merge conflicts
  • From: Stefan Seefeld <stefan@xxxxxxxxxxxxxxxx>
  • Date: Tue, 12 Jun 2007 17:20:48 -0400

Jules Bergmann wrote:
> 
>>
>> Indeed. Should I add my suggested change above to the patch before
>> checking
>> it in ?
> 
> Yes, that sounds good.  I suspect we'll have to do something different
> if people ever start using multi-dim FFTs, but for now let's avoid the
> copy.  -- Jules

Here is a new patch, incorporating the changes we discussed. 1D FFT as
well as FFTM now use / require aligned blocks if the block size is a multiple
of the alignment size (and thus individual rows operations can be vectorized).

(Since the patch is slightly more involved than I originally assumed, I'd
 prefer another round of review.)

Thanks,
		Stefan

-- 
Stefan Seefeld
CodeSourcery
stefan@xxxxxxxxxxxxxxxx
(650) 331-3385 x718
Index: src/vsip/opt/fftw3/fft_impl.cpp
===================================================================
--- src/vsip/opt/fftw3/fft_impl.cpp	(revision 173873)
+++ src/vsip/opt/fftw3/fft_impl.cpp	(working copy)
@@ -38,11 +38,13 @@
 template <dimension_type D>
 struct Fft_base<D, std::complex<SCALAR_TYPE>, std::complex<SCALAR_TYPE> >
 {
-  Fft_base(Domain<D> const& dom, int exp, int flags)
+  Fft_base(Domain<D> const& dom, int exp, int flags, bool aligned = false)
     VSIP_THROW((std::bad_alloc))
       : in_buffer_(dom.size()),
-	out_buffer_(dom.size())
+	out_buffer_(dom.size()),
+        aligned_(aligned)
   {
+    if (!aligned) flags |= FFTW_UNALIGNED;
     // For multi-dimensional transforms, these plans assume both
     // input and output data is dense, row-major, interleave-complex
     // format.
@@ -76,15 +78,17 @@
   FFTW(plan) plan_in_place_;
   FFTW(plan) plan_by_reference_;
   int size_[D];
+  bool aligned_;
 };
 
 template <vsip::dimension_type D>
 struct Fft_base<D, SCALAR_TYPE, std::complex<SCALAR_TYPE> >
 {
-  Fft_base(Domain<D> const& dom, int A, int flags)
+  Fft_base(Domain<D> const& dom, int A, int flags, bool aligned = false)
     VSIP_THROW((std::bad_alloc))
     : in_buffer_(32, dom.size()),
-      out_buffer_(dom.size())
+      out_buffer_(dom.size()),
+      aligned_(aligned)
   { 
     for (vsip::dimension_type i = 0; i < D; ++i) size_[i] = dom[i].size();  
     // FFTW3 assumes A == D - 1.
@@ -104,15 +108,17 @@
   Cmplx_buffer<dense_complex_type, SCALAR_TYPE> out_buffer_;
   FFTW(plan) plan_by_reference_;
   int size_[D];
+  bool aligned_;
 };
 
 template <vsip::dimension_type D>
 struct Fft_base<D, std::complex<SCALAR_TYPE>, SCALAR_TYPE>
 {
-  Fft_base(Domain<D> const& dom, int A, int flags)
+  Fft_base(Domain<D> const& dom, int A, int flags, bool aligned = false)
     VSIP_THROW((std::bad_alloc))
     : in_buffer_(dom.size()),
-      out_buffer_(32, dom.size())
+      out_buffer_(32, dom.size()),
+      aligned_(aligned)
   {
     for (vsip::dimension_type i = 0; i < D; ++i) size_[i] = dom[i].size();
     // FFTW3 assumes A == D - 1.
@@ -133,6 +139,7 @@
   aligned_array<SCALAR_TYPE>              out_buffer_;
   FFTW(plan) plan_by_reference_;
   int size_[D];
+  bool aligned_;
 };
 
 // 1D complex -> complex FFT
@@ -150,14 +157,13 @@
 
 public:
   Fft_impl(Domain<1> const &dom, unsigned number)
-    : Fft_base<1, ctype, ctype>(dom, E, convert_NoT(number))
+    : Fft_base<1, ctype, ctype>(dom, E, convert_NoT(number),
+                                !(dom.length() % VSIP_IMPL_ALLOC_ALIGNMENT))
   {}
   virtual char const* name() { return "fft-fftw3-1D-complex"; }
-<<<<<<< .mine
   virtual void query_layout(Rt_layout<1> &rtl_inout)
   {
-    // By default use unit_stride, tuple<0, 1, 2>
-    rtl_inout.pack = stride_unit_dense;
+    rtl_inout.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
     rtl_inout.align = VSIP_IMPL_ALLOC_ALIGNMENT;
     rtl_inout.order = tuple<0, 1, 2>();
     // make default based on library
@@ -165,14 +171,12 @@
   }
   virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
   {
-    // By default use unit_stride, tuple<0, 1, 2>
-    rtl_in.pack = rtl_out.pack = stride_unit_dense;
-    rtl_inout.align = VSIP_IMPL_ALLOC_ALIGNMENT;
-    rtl_in.order = rtl_out.order = tuple<0, 1, 2>();
-    // make default based on library
-    rtl_in.complex = rtl_out.complex = Create_plan<dense_complex_type>::format;
+    rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
+    rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
+    rtl_in.order = tuple<0, 1, 2>();
+    rtl_in.complex = Create_plan<dense_complex_type>::format;
+    rtl_out = rtl_in;
   }
-
   virtual void in_place(ctype *inout, stride_type s, length_type l)
   {
     assert(s == 1 && static_cast<int>(l) == this->size_[0]);
@@ -222,15 +226,16 @@
 
 public:
   Fft_impl(Domain<1> const &dom, unsigned number)
-    : Fft_base<1, rtype, ctype>(dom, A, convert_NoT(number))
+    : Fft_base<1, rtype, ctype>(dom, A, convert_NoT(number),
+                                !(dom.length() % VSIP_IMPL_ALLOC_ALIGNMENT))
   {}
   virtual char const* name() { return "fft-fftw3-1D-real-forward"; }
   virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
   {
-    rtl_in.pack = stride_unit_align;
+    rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
     rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
     rtl_in.order = tuple<0, 1, 2>();
-    rtl_in.complex = cmplx_inter_fmt;
+    rtl_in.complex = Create_plan<dense_complex_type>::format;
     rtl_out = rtl_in;
   }
   virtual void by_reference(rtype *in, stride_type,
@@ -247,23 +252,6 @@
     FFTW(execute_split_dft_r2c)(plan_by_reference_, 
 			  in, out.first, out.second);
   }
-  virtual void query_layout(Rt_layout<1> &rtl_inout)
-  {
-    // By default use unit_stride, tuple<0, 1, 2>
-    rtl_inout.pack = stride_unit_dense;
-    rtl_inout.order = tuple<0, 1, 2>();
-    // make default based on library
-    rtl_inout.complex = Create_plan<dense_complex_type>::format;
-  }
-  virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
-  {
-    // By default use unit_stride, tuple<0, 1, 2>
-    rtl_in.pack = rtl_out.pack = stride_unit_dense;
-    rtl_in.order = rtl_out.order = tuple<0, 1, 2>();
-    // make default based on library
-    rtl_in.complex = rtl_out.complex = Create_plan<dense_complex_type>::format;
-  }
-
 };
 
 // 1D complex -> real FFT
@@ -279,16 +267,17 @@
 
 public:
   Fft_impl(Domain<1> const &dom, unsigned number)
-    : Fft_base<1, ctype, rtype>(dom, A, convert_NoT(number))
+    : Fft_base<1, ctype, rtype>(dom, A, convert_NoT(number),
+                                !(dom.length() % VSIP_IMPL_ALLOC_ALIGNMENT))
   {}
 
   virtual char const* name() { return "fft-fftw3-1D-real-inverse"; }
   virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
   {
-    rtl_in.pack = stride_unit_align;
+    rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
     rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
     rtl_in.order = tuple<0, 1, 2>();
-    rtl_in.complex = cmplx_inter_fmt;
+    rtl_in.complex = Create_plan<dense_complex_type>::format;
     rtl_out = rtl_in;
   }
 
@@ -308,23 +297,6 @@
     FFTW(execute_split_dft_c2r)(plan_by_reference_,
 			  in.first, in.second, out);
   }
-  virtual void query_layout(Rt_layout<1> &rtl_inout)
-  {
-    // By default use unit_stride, tuple<0, 1, 2>
-    rtl_inout.pack = stride_unit_dense;
-    rtl_inout.order = tuple<0, 1, 2>();
-    // make default based on library
-    rtl_inout.complex = Create_plan<dense_complex_type>::format;
-  }
-  virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
-  {
-    // By default use unit_stride, tuple<0, 1, 2>, cmplx_inter_fmt
-    rtl_in.pack = rtl_out.pack = stride_unit_dense;
-    rtl_in.order = rtl_out.order = tuple<0, 1, 2>();
-    // make default based on library
-    rtl_in.complex = rtl_out.complex = Create_plan<dense_complex_type>::format;
-  }
-
 };
 
 // 2D complex -> complex FFT
@@ -781,14 +753,16 @@
 public:
   Fftm_impl(Domain<2> const &dom, unsigned number)
     : Fft_base<1, SCALAR_TYPE, std::complex<SCALAR_TYPE> >
-      (dom[A], 0, convert_NoT(number) | FFTW_UNALIGNED),
-      mult_(dom[1-A].size()) 
+      (dom[A], 0, convert_NoT(number),
+       !(dom[A].length() % VSIP_IMPL_ALLOC_ALIGNMENT)),
+      mult_(dom[1-A].size())
   {
   }
   virtual char const* name() { return "fftm-fftw3-real-forward"; }
   virtual void query_layout(Rt_layout<2> &rtl_in, Rt_layout<2> &rtl_out)
   {
-    rtl_in.pack = stride_unit_dense;
+    rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
+    rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
     if (A == 0) rtl_in.order = tuple<1, 0, 2>();
     else  rtl_in.order = tuple<0, 1, 2>();
     rtl_in.complex = cmplx_inter_fmt;
@@ -837,8 +811,9 @@
 public:
   Fftm_impl(Domain<2> const &dom, unsigned number)
     : Fft_base<1, std::complex<SCALAR_TYPE>, SCALAR_TYPE>
-      (dom[A], 0, convert_NoT(number) | FFTW_UNALIGNED),
-      mult_(dom[1-A].size()) 
+      (dom[A], 0, convert_NoT(number),
+       !(dom[A].length() % VSIP_IMPL_ALLOC_ALIGNMENT)),
+      mult_(dom[1-A].size())
   {
   }
 
@@ -846,7 +821,8 @@
 
   virtual void query_layout(Rt_layout<2> &rtl_in, Rt_layout<2> &rtl_out)
   {
-    rtl_in.pack = stride_unit_dense;
+    rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
+    rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
     if (A == 0) rtl_in.order = tuple<1, 0, 2>();
     else  rtl_in.order = tuple<0, 1, 2>();
     rtl_in.complex = cmplx_inter_fmt;
@@ -897,14 +873,18 @@
 public:
   Fftm_impl(Domain<2> const &dom, int number)
     : Fft_base<1, ctype, ctype>
-  (dom[A], E, convert_NoT(number) | FFTW_UNALIGNED),
-      mult_(dom[1-A].size()) {}
+      (dom[A], E, convert_NoT(number),
+       !(dom[A].length() % VSIP_IMPL_ALLOC_ALIGNMENT)),
+      mult_(dom[1-A].size())
+  {
+  }
 
   virtual char const* name() { return "fftm-fftw3-complex"; }
 
   virtual void query_layout(Rt_layout<2> &rtl_in, Rt_layout<2> &rtl_out)
   {
-    rtl_in.pack = stride_unit_dense;
+    rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
+    rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
     if (A == 0) rtl_in.order = tuple<1, 0, 2>();
     else  rtl_in.order = tuple<0, 1, 2>();
     rtl_in.complex = cmplx_inter_fmt;
Index: src/vsip/opt/fftw3/create_plan.hpp
===================================================================
--- src/vsip/opt/fftw3/create_plan.hpp	(revision 173873)
+++ src/vsip/opt/fftw3/create_plan.hpp	(working copy)
@@ -146,7 +146,8 @@
     IodimT iodims[Dim];
     int i;
     Applied_layout<Layout<Dim, typename Row_major<Dim>::type,
-                   Stride_unit_dense, Cmplx_split_fmt> >
+                          Stride_unit_align<VSIP_IMPL_ALLOC_ALIGNMENT>,
+                          Cmplx_split_fmt> >
     app_layout(size);
 
     for(i=0;i<Dim;i++) 
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 173873)
+++ ChangeLog	(working copy)
@@ -1,3 +1,9 @@
+2007-06-12  Stefan Seefeld  <stefan@xxxxxxxxxxxxxxxx>
+
+	* src/vsip/opt/fftw3/fft_impl.cpp: Resolve various conflicts and relax 
+	alignment requirements.
+	* src/vsip/opt/fftw3/create_plan.hpp: Require Stride_unit_align everywhere.
+	
 2007-06-12  Don McCoy  <don@xxxxxxxxxxxxxxxx>
 
 	* benchmarks/dot.cpp: Adds a compile time check to