[
Date Prev][
Date Next][
Thread Prev][
Thread Next][
Date Index][
Thread Index]
Re: [vsipl++] patch: fix merge conflicts
- To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
- Subject: Re: [vsipl++] patch: fix merge conflicts
- From: Stefan Seefeld <stefan@xxxxxxxxxxxxxxxx>
- Date: Tue, 12 Jun 2007 17:20:48 -0400
Jules Bergmann wrote:
>
>>
>> Indeed. Should I add my suggested change above to the patch before
>> checking
>> it in ?
>
> Yes, that sounds good. I suspect we'll have to do something different
> if people ever start using multi-dim FFTs, but for now let's avoid the
> copy. -- Jules
Here is a new patch, incorporating the changes we discussed. 1D FFT as
well as FFTM now use / require aligned blocks if the block size is a multiple
of the alignment size (and thus individual rows operations can be vectorized).
(Since the patch is slightly more involved than I originally assumed, I'd
prefer another round of review.)
Thanks,
Stefan
--
Stefan Seefeld
CodeSourcery
stefan@xxxxxxxxxxxxxxxx
(650) 331-3385 x718
Index: src/vsip/opt/fftw3/fft_impl.cpp
===================================================================
--- src/vsip/opt/fftw3/fft_impl.cpp (revision 173873)
+++ src/vsip/opt/fftw3/fft_impl.cpp (working copy)
@@ -38,11 +38,13 @@
template <dimension_type D>
struct Fft_base<D, std::complex<SCALAR_TYPE>, std::complex<SCALAR_TYPE> >
{
- Fft_base(Domain<D> const& dom, int exp, int flags)
+ Fft_base(Domain<D> const& dom, int exp, int flags, bool aligned = false)
VSIP_THROW((std::bad_alloc))
: in_buffer_(dom.size()),
- out_buffer_(dom.size())
+ out_buffer_(dom.size()),
+ aligned_(aligned)
{
+ if (!aligned) flags |= FFTW_UNALIGNED;
// For multi-dimensional transforms, these plans assume both
// input and output data is dense, row-major, interleave-complex
// format.
@@ -76,15 +78,17 @@
FFTW(plan) plan_in_place_;
FFTW(plan) plan_by_reference_;
int size_[D];
+ bool aligned_;
};
template <vsip::dimension_type D>
struct Fft_base<D, SCALAR_TYPE, std::complex<SCALAR_TYPE> >
{
- Fft_base(Domain<D> const& dom, int A, int flags)
+ Fft_base(Domain<D> const& dom, int A, int flags, bool aligned = false)
VSIP_THROW((std::bad_alloc))
: in_buffer_(32, dom.size()),
- out_buffer_(dom.size())
+ out_buffer_(dom.size()),
+ aligned_(aligned)
{
for (vsip::dimension_type i = 0; i < D; ++i) size_[i] = dom[i].size();
// FFTW3 assumes A == D - 1.
@@ -104,15 +108,17 @@
Cmplx_buffer<dense_complex_type, SCALAR_TYPE> out_buffer_;
FFTW(plan) plan_by_reference_;
int size_[D];
+ bool aligned_;
};
template <vsip::dimension_type D>
struct Fft_base<D, std::complex<SCALAR_TYPE>, SCALAR_TYPE>
{
- Fft_base(Domain<D> const& dom, int A, int flags)
+ Fft_base(Domain<D> const& dom, int A, int flags, bool aligned = false)
VSIP_THROW((std::bad_alloc))
: in_buffer_(dom.size()),
- out_buffer_(32, dom.size())
+ out_buffer_(32, dom.size()),
+ aligned_(aligned)
{
for (vsip::dimension_type i = 0; i < D; ++i) size_[i] = dom[i].size();
// FFTW3 assumes A == D - 1.
@@ -133,6 +139,7 @@
aligned_array<SCALAR_TYPE> out_buffer_;
FFTW(plan) plan_by_reference_;
int size_[D];
+ bool aligned_;
};
// 1D complex -> complex FFT
@@ -150,14 +157,13 @@
public:
Fft_impl(Domain<1> const &dom, unsigned number)
- : Fft_base<1, ctype, ctype>(dom, E, convert_NoT(number))
+ : Fft_base<1, ctype, ctype>(dom, E, convert_NoT(number),
+ !(dom.length() % VSIP_IMPL_ALLOC_ALIGNMENT))
{}
virtual char const* name() { return "fft-fftw3-1D-complex"; }
-<<<<<<< .mine
virtual void query_layout(Rt_layout<1> &rtl_inout)
{
- // By default use unit_stride, tuple<0, 1, 2>
- rtl_inout.pack = stride_unit_dense;
+ rtl_inout.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
rtl_inout.align = VSIP_IMPL_ALLOC_ALIGNMENT;
rtl_inout.order = tuple<0, 1, 2>();
// make default based on library
@@ -165,14 +171,12 @@
}
virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
{
- // By default use unit_stride, tuple<0, 1, 2>
- rtl_in.pack = rtl_out.pack = stride_unit_dense;
- rtl_inout.align = VSIP_IMPL_ALLOC_ALIGNMENT;
- rtl_in.order = rtl_out.order = tuple<0, 1, 2>();
- // make default based on library
- rtl_in.complex = rtl_out.complex = Create_plan<dense_complex_type>::format;
+ rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
+ rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
+ rtl_in.order = tuple<0, 1, 2>();
+ rtl_in.complex = Create_plan<dense_complex_type>::format;
+ rtl_out = rtl_in;
}
-
virtual void in_place(ctype *inout, stride_type s, length_type l)
{
assert(s == 1 && static_cast<int>(l) == this->size_[0]);
@@ -222,15 +226,16 @@
public:
Fft_impl(Domain<1> const &dom, unsigned number)
- : Fft_base<1, rtype, ctype>(dom, A, convert_NoT(number))
+ : Fft_base<1, rtype, ctype>(dom, A, convert_NoT(number),
+ !(dom.length() % VSIP_IMPL_ALLOC_ALIGNMENT))
{}
virtual char const* name() { return "fft-fftw3-1D-real-forward"; }
virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
{
- rtl_in.pack = stride_unit_align;
+ rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
rtl_in.order = tuple<0, 1, 2>();
- rtl_in.complex = cmplx_inter_fmt;
+ rtl_in.complex = Create_plan<dense_complex_type>::format;
rtl_out = rtl_in;
}
virtual void by_reference(rtype *in, stride_type,
@@ -247,23 +252,6 @@
FFTW(execute_split_dft_r2c)(plan_by_reference_,
in, out.first, out.second);
}
- virtual void query_layout(Rt_layout<1> &rtl_inout)
- {
- // By default use unit_stride, tuple<0, 1, 2>
- rtl_inout.pack = stride_unit_dense;
- rtl_inout.order = tuple<0, 1, 2>();
- // make default based on library
- rtl_inout.complex = Create_plan<dense_complex_type>::format;
- }
- virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
- {
- // By default use unit_stride, tuple<0, 1, 2>
- rtl_in.pack = rtl_out.pack = stride_unit_dense;
- rtl_in.order = rtl_out.order = tuple<0, 1, 2>();
- // make default based on library
- rtl_in.complex = rtl_out.complex = Create_plan<dense_complex_type>::format;
- }
-
};
// 1D complex -> real FFT
@@ -279,16 +267,17 @@
public:
Fft_impl(Domain<1> const &dom, unsigned number)
- : Fft_base<1, ctype, rtype>(dom, A, convert_NoT(number))
+ : Fft_base<1, ctype, rtype>(dom, A, convert_NoT(number),
+ !(dom.length() % VSIP_IMPL_ALLOC_ALIGNMENT))
{}
virtual char const* name() { return "fft-fftw3-1D-real-inverse"; }
virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
{
- rtl_in.pack = stride_unit_align;
+ rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
rtl_in.order = tuple<0, 1, 2>();
- rtl_in.complex = cmplx_inter_fmt;
+ rtl_in.complex = Create_plan<dense_complex_type>::format;
rtl_out = rtl_in;
}
@@ -308,23 +297,6 @@
FFTW(execute_split_dft_c2r)(plan_by_reference_,
in.first, in.second, out);
}
- virtual void query_layout(Rt_layout<1> &rtl_inout)
- {
- // By default use unit_stride, tuple<0, 1, 2>
- rtl_inout.pack = stride_unit_dense;
- rtl_inout.order = tuple<0, 1, 2>();
- // make default based on library
- rtl_inout.complex = Create_plan<dense_complex_type>::format;
- }
- virtual void query_layout(Rt_layout<1> &rtl_in, Rt_layout<1> &rtl_out)
- {
- // By default use unit_stride, tuple<0, 1, 2>, cmplx_inter_fmt
- rtl_in.pack = rtl_out.pack = stride_unit_dense;
- rtl_in.order = rtl_out.order = tuple<0, 1, 2>();
- // make default based on library
- rtl_in.complex = rtl_out.complex = Create_plan<dense_complex_type>::format;
- }
-
};
// 2D complex -> complex FFT
@@ -781,14 +753,16 @@
public:
Fftm_impl(Domain<2> const &dom, unsigned number)
: Fft_base<1, SCALAR_TYPE, std::complex<SCALAR_TYPE> >
- (dom[A], 0, convert_NoT(number) | FFTW_UNALIGNED),
- mult_(dom[1-A].size())
+ (dom[A], 0, convert_NoT(number),
+ !(dom[A].length() % VSIP_IMPL_ALLOC_ALIGNMENT)),
+ mult_(dom[1-A].size())
{
}
virtual char const* name() { return "fftm-fftw3-real-forward"; }
virtual void query_layout(Rt_layout<2> &rtl_in, Rt_layout<2> &rtl_out)
{
- rtl_in.pack = stride_unit_dense;
+ rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
+ rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
if (A == 0) rtl_in.order = tuple<1, 0, 2>();
else rtl_in.order = tuple<0, 1, 2>();
rtl_in.complex = cmplx_inter_fmt;
@@ -837,8 +811,9 @@
public:
Fftm_impl(Domain<2> const &dom, unsigned number)
: Fft_base<1, std::complex<SCALAR_TYPE>, SCALAR_TYPE>
- (dom[A], 0, convert_NoT(number) | FFTW_UNALIGNED),
- mult_(dom[1-A].size())
+ (dom[A], 0, convert_NoT(number),
+ !(dom[A].length() % VSIP_IMPL_ALLOC_ALIGNMENT)),
+ mult_(dom[1-A].size())
{
}
@@ -846,7 +821,8 @@
virtual void query_layout(Rt_layout<2> &rtl_in, Rt_layout<2> &rtl_out)
{
- rtl_in.pack = stride_unit_dense;
+ rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
+ rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
if (A == 0) rtl_in.order = tuple<1, 0, 2>();
else rtl_in.order = tuple<0, 1, 2>();
rtl_in.complex = cmplx_inter_fmt;
@@ -897,14 +873,18 @@
public:
Fftm_impl(Domain<2> const &dom, int number)
: Fft_base<1, ctype, ctype>
- (dom[A], E, convert_NoT(number) | FFTW_UNALIGNED),
- mult_(dom[1-A].size()) {}
+ (dom[A], E, convert_NoT(number),
+ !(dom[A].length() % VSIP_IMPL_ALLOC_ALIGNMENT)),
+ mult_(dom[1-A].size())
+ {
+ }
virtual char const* name() { return "fftm-fftw3-complex"; }
virtual void query_layout(Rt_layout<2> &rtl_in, Rt_layout<2> &rtl_out)
{
- rtl_in.pack = stride_unit_dense;
+ rtl_in.pack = this->aligned_ ? stride_unit_align : stride_unit_dense;
+ rtl_in.align = VSIP_IMPL_ALLOC_ALIGNMENT;
if (A == 0) rtl_in.order = tuple<1, 0, 2>();
else rtl_in.order = tuple<0, 1, 2>();
rtl_in.complex = cmplx_inter_fmt;
Index: src/vsip/opt/fftw3/create_plan.hpp
===================================================================
--- src/vsip/opt/fftw3/create_plan.hpp (revision 173873)
+++ src/vsip/opt/fftw3/create_plan.hpp (working copy)
@@ -146,7 +146,8 @@
IodimT iodims[Dim];
int i;
Applied_layout<Layout<Dim, typename Row_major<Dim>::type,
- Stride_unit_dense, Cmplx_split_fmt> >
+ Stride_unit_align<VSIP_IMPL_ALLOC_ALIGNMENT>,
+ Cmplx_split_fmt> >
app_layout(size);
for(i=0;i<Dim;i++)
Index: ChangeLog
===================================================================
--- ChangeLog (revision 173873)
+++ ChangeLog (working copy)
@@ -1,3 +1,9 @@
+2007-06-12 Stefan Seefeld <stefan@xxxxxxxxxxxxxxxx>
+
+ * src/vsip/opt/fftw3/fft_impl.cpp: Resolve various conflicts and relax
+ alignment requirements.
+ * src/vsip/opt/fftw3/create_plan.hpp: Require Stride_unit_align everywhere.
+
2007-06-12 Don McCoy <don@xxxxxxxxxxxxxxxx>
* benchmarks/dot.cpp: Adds a compile time check to