[
Date Prev][
Date Next][
Thread Prev][
Thread Next][
Date Index][
Thread Index]
[patch] ATLAS/FFTW config updates
- To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
- Subject: [patch] ATLAS/FFTW config updates
- From: Jules Bergmann <jules@xxxxxxxxxxxxxxxx>
- Date: Sun, 22 Jan 2006 03:50:27 -0500
Patch applied.
Index: ChangeLog
===================================================================
RCS file: /home/cvs/Repository/vpp/ChangeLog,v
retrieving revision 1.384
diff -u -r1.384 ChangeLog
--- ChangeLog 20 Jan 2006 21:49:09 -0000 1.384
+++ ChangeLog 22 Jan 2006 08:50:48 -0000
@@ -1,3 +1,19 @@
+2006-01-22 Jules Bergmann <jules@xxxxxxxxxxxxxxxx>
+
+ * configure.ac (with-fftw3-cflags): New options to pass CFLAGS
+ for builtin FFTW3. Avoid passing CFLAGS to FFTW3 unless
+ asked.
+ (with-atlas-tarball): New option to use ATLAS tarball for
+ builtin LAPACK.
+ * scripts/config: Enable profile timer. Provide explicit
+ cflags for i686 builtin FFTW (probing while cross-compiling
+ doesn't find right flags).
+ * tests/matvec-prod.cpp: Add large matrix-multiply test to
+ cover ATLAS' mainline case (small matrix-multiplies are
+ handled by cleanup code).
+ * vendor/atlas/tune/blas/gemv/mvsearch.c (compiler): Fix missing
+ pointer dereference.
+
2006-01-20 Jules Bergmann <jules@xxxxxxxxxxxxxxxx>
* VERSIONS: Document V_1_0 tag for 1.0 release.
Index: configure.ac
===================================================================
RCS file: /home/cvs/Repository/vpp/configure.ac,v
retrieving revision 1.77
diff -u -r1.77 configure.ac
--- configure.ac 20 Jan 2006 03:58:54 -0000 1.77
+++ configure.ac 22 Jan 2006 08:50:48 -0000
@@ -143,6 +143,11 @@
[Omit support for FFT applied to long double elements]),,
[enable_fft_long_double=yes])
+AC_ARG_WITH(fftw3_cflags,
+ AS_HELP_STRING([--with-fftw3-cflags=CFLAGS],
+ [Specify CFLAGS to use when building built-inFFTW3.
+ Only used if --with-fft=buildint.]))
+
# LAPACK and related libraries (Intel MKL)
@@ -176,6 +181,11 @@
AS_HELP_STRING([--with-atlas-cfg-opts=OPTS],
[specify additional options for ATLAS configure.]))
+AC_ARG_WITH(atlas_tarball,
+ AS_HELP_STRING([--with-atlas-tarball=PATH],
+ [specify an existing ATLAS tarball to be used as basis
+ for builtin LAPACK library. (Enables LAPACK).]))
+
AC_ARG_WITH(mkl_prefix,
AS_HELP_STRING([--with-mkl-prefix=PATH],
[specify the installation prefix of the MKL library. Headers
@@ -525,20 +535,39 @@
esac
AC_MSG_NOTICE([fftw3 config options: $fftw3_opts $fftw3_simd.])
+ # We don't export CFLAGS to FFTW configure because this overrides its
+ # choice of optimization flags (unless the --with-fftw3-cflags options
+ # is given). Because of this, we need to pass -m32/-m64 as part of CC.
+ if expr "$CFLAGS" : ".*-m32" > /dev/null; then
+ fftw_CC="$CC -m32"
+ elif expr "$CFLAGS" : ".*-m64" > /dev/null; then
+ fftw_CC="$CC -m64"
+ else
+ fftw_CC="$CC"
+ fi
+
+ keep_CFLAGS="$CFLAGS"
+
+ if test "x$with_fftw3_cflags" != "x"; then
+ CFLAGS="$with_fftw3_cflags"
+ else
+ unset CFLAGS
+ fi
+
echo "==============================================================="
if test "$enable_fft_float" = yes; then
mkdir -p vendor/fftw3f
AC_MSG_NOTICE([Configuring fftw3f (float).])
AC_MSG_NOTICE([extra config options: '$fftw3_f_simd'.])
- (cd vendor/fftw3f; $fftw3_configure CC=$CC CFLAGS="$CFLAGS" $fftw3_f_simd $fftw3_opts --enable-float)
+ (cd vendor/fftw3f; $fftw3_configure CC="$fftw_CC" $fftw3_f_simd $fftw3_opts --enable-float)
libs="$libs -lfftw3f"
fi
if test "$enable_fft_double" = yes; then
mkdir -p vendor/fftw3
AC_MSG_NOTICE([Configuring fftw3 (double).])
AC_MSG_NOTICE([extra config options: '$fftw3_d_simd'.])
- (cd vendor/fftw3; $fftw3_configure CC=$CC CFLAGS="$CFLAGS" $fftw3_d_simd $fftw3_opts )
+ (cd vendor/fftw3; $fftw3_configure CC="$fftw_CC" $fftw3_d_simd $fftw3_opts )
libs="$libs -lfftw3"
fi
if test "$enable_fft_long_double" = yes; then
@@ -546,12 +575,14 @@
mkdir -p vendor/fftw3l
AC_MSG_NOTICE([Configuring fftw3l (long double).])
AC_MSG_NOTICE([extra config options: '$fftw3_l_simd'.])
- (cd vendor/fftw3l; $fftw3_configure CC=$CC CFLAGS="$CFLAGS" $fftw3_l_simd $fftw3_opts --enable-long-double)
+ (cd vendor/fftw3l; $fftw3_configure CC="$fftw_CC" $fftw3_l_simd $fftw3_opts --enable-long-double)
libs="$libs -lfftw3l"
fi
echo "==============================================================="
+ export CFLAGS="$keep_CFLAGS"
+
rm -rf vendor/fftw/include
mkdir -p vendor/fftw/include
fftw3_src_prefix="`(cd $srcdir/vendor/fftw; echo \"$PWD\")`"
@@ -1150,6 +1181,15 @@
fi
AC_MSG_RESULT([found])
+ if test "x$with_atlas_tarball" != "x"; then
+ AC_MSG_RESULT([using ATLAS tarball])
+ mkdir atlas_untar
+ mkdir -p vendor/atlas/lib
+ tar xfz $with_atlas_tarball -C atlas_untar
+ mv `find atlas_untar -name "*.a"` vendor/atlas/lib
+ mv vendor/atlas/lib/liblapack.a vendor/atlas/lib/libprelapack.a
+ rm -rf atlas_untar
+ else
# assert(NOT CROSS-COMPILING)
echo "==============================================================="
@@ -1212,6 +1252,7 @@
else
AC_MSG_ERROR([built-in ATLAS configure FAILED.])
fi
+ fi
AC_SUBST(USE_BUILTIN_ATLAS, 1)
Index: scripts/config
===================================================================
RCS file: /home/cvs/Repository/vpp/scripts/config,v
retrieving revision 1.9
diff -u -r1.9 config
--- scripts/config 20 Jan 2006 21:49:59 -0000 1.9
+++ scripts/config 22 Jan 2006 08:50:49 -0000
@@ -1,5 +1,14 @@
-# config -- Options for building Sourcery VSIPL++ Packages.
+########################################################################
#
+# File: scripts/config
+# Author: Stefan Seefeld
+# Date: 2006-1-11
+#
+# Contents:
+# Options for building Sourcery VSIPL++ Packages.
+#
+########################################################################
+
# This file contains the configuration options used to build Sourcery
# VSIPL++ binary packages.
#
@@ -29,6 +38,11 @@
# Parallel Builtin amd64 64_amd64
# Parallel Intel 64 64_generic
+
+########################################################################
+# Compiler flags
+########################################################################
+
release = ['-O2', '-DNDEBUG',
'-funswitch-loops',
'-fgcse-after-reload',
@@ -48,11 +62,26 @@
flags_64_amd64 = ['-m64', '-mtune=opteron', '-mmmx', '-msse', '-msse2',
'-m3dnow']
+
+########################################################################
+# Configure flags
+########################################################################
+
+common_32 = ['--enable-profile-timer=pentiumtsc']
+common_64 = ['--enable-profile-timer=x86_64_tsc']
+
cross = ['--host=i686-pc-linux-gnu',
'--build=x86_64-unknown-linux-gnu',
'--target=i686-pc-linux-gnu']
+builtin_fft_32 = ['--with-fft=builtin',
+ '--with-fftw3-cflags="-O3 -fomit-frame-pointer -fno-schedule-insns -malign-double -fstrict-aliasing -mpreferred-stack-boundary=4 -mcpu=pentiumpro"',
+ 'CODELET_OPTIM=-O']
+builtin_fft_em64t = ['--with-fft=builtin']
+builtin_fft_amd64 = ['--with-fft=builtin']
+
builtin_lapack_32 = ['--with-lapack=builtin',
+ '--with-atlas-tarball=/home/jules/csl/atlas/atlas3.6.0_Linux_P4SSE2.tar.gz',
'--with-atlas-cfg-opts="--with-mach=P4 --with-isa=SSE2 --with-int-type=int --with-string-convention=sun"']
builtin_lapack_em64t = ['--with-lapack=builtin',
@@ -83,8 +112,8 @@
'CFLAGS="%s"'%' '.join(flags_32_p4sse2),
'FFLAGS="%s"'%' '.join(flags_32_p4sse2),
'LDFLAGS="%s"'%' '.join(flags_32_p4sse2),
- '--with-g2c-copy=%s'%g2c32,
- '--with-fft=builtin'] + builtin_lapack_32 + nompi
+ '--with-g2c-copy=%s'%g2c32
+ ] + builtin_fft_32 + builtin_lapack_32 + nompi + common_32
class Debug(Configuration):
suffix = '-debug'
@@ -93,7 +122,7 @@
'FFLAGS="%s"'%' '.join(m32),
'LDFLAGS="%s"'%' '.join(m32),
'--with-g2c-copy=%s'%g2c32,
- '--with-fft=builtin'] + builtin_lapack_32 + nompi
+ ] + builtin_fft_32 + builtin_lapack_32 + nompi + common_32
suffix = '-serial-builtin'
host = 'i686'
@@ -106,13 +135,13 @@
suffix = ''
options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_em64t),
'--with-g2c-copy=%s'%g2c64,
- '--with-fft=builtin'] + builtin_lapack_em64t + nompi
+ ] + builtin_fft_em64t + builtin_lapack_em64t + nompi + common_64
class Debug(Configuration):
suffix = '-debug'
options = ['CXXFLAGS="%s"'%' '.join(debug),
'--with-g2c-copy=%s'%g2c64,
- '--with-fft=builtin'] + builtin_lapack_em64t + nompi
+ ] + builtin_fft_em64t + builtin_lapack_em64t + nompi + common_64
suffix = '-serial-builtin'
host = 'em64t'
@@ -125,13 +154,13 @@
suffix = ''
options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_amd64),
'--with-g2c-copy=%s'%g2c64,
- '--with-fft=builtin'] + builtin_lapack_amd64 + nompi
+ ] + builtin_fft_amd64 + builtin_lapack_amd64 + nompi + common_64
class Debug(Configuration):
suffix = '-debug'
options = ['CXXFLAGS="%s"'%' '.join(debug),
'--with-g2c-copy=%s'%g2c64,
- '--with-fft=builtin'] + builtin_lapack_amd64 + nompi
+ ] + builtin_fft_amd64 + builtin_lapack_amd64 + nompi + common_64
suffix = '-serial-builtin'
host = 'amd64'
@@ -148,7 +177,7 @@
'LDFLAGS="%s"'%' '.join(flags_32_generic),
'--with-g2c-copy=%s'%g2c32,
'--with-ipp-prefix=%s/ia32_itanium'%ipp_dir, '--with-fft=ipp'
- ] + mkl_32 + nompi
+ ] + mkl_32 + nompi + common_32
class Debug(Configuration):
suffix = '-debug'
@@ -158,7 +187,7 @@
'LDFLAGS="%s"'%' '.join(m32),
'--with-g2c-copy=%s'%g2c32,
'--with-ipp-prefix=%s/ia32_itanium'%ipp_dir, '--with-fft=ipp'
- ] + mkl_32 + nompi
+ ] + mkl_32 + nompi + common_32
suffix = '-serial-intel'
host = 'i686'
@@ -172,14 +201,14 @@
options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_generic),
'--with-g2c-copy=%s'%g2c64,
'--with-ipp-prefix=%s/em64t'%ipp_dir, '--with-fft=ipp'
- ] + mkl_64 + nompi
+ ] + mkl_64 + nompi + common_64
class Debug(Configuration):
suffix = '-debug'
options = ['CXXFLAGS="%s"'%' '.join(debug),
'--with-g2c-copy=%s'%g2c64,
'--with-ipp-prefix=%s/em64t'%ipp_dir, '--with-fft=ipp'
- ] + mkl_64 + nompi
+ ] + mkl_64 + nompi + common_64
suffix = '-serial-intel'
host = 'em64t'
@@ -195,7 +224,7 @@
'FFLAGS="%s"'%' '.join(flags_32_p4sse2),
'LDFLAGS="%s"'%' '.join(flags_32_p4sse2),
'--with-g2c-copy=%s'%g2c32,
- '--with-fft=builtin'] + builtin_lapack_32 + mpi
+ ] + builtin_fft_32 + builtin_lapack_32 + mpi + common_32
class Debug(Configuration):
suffix = '-debug'
@@ -204,7 +233,7 @@
'FFLAGS="%s"'%' '.join(m32),
'LDFLAGS="%s"'%' '.join(m32),
'--with-g2c-copy=%s'%g2c32,
- '--with-fft=builtin'] + builtin_lapack_32 + mpi
+ ] + builtin_fft_32 + builtin_lapack_32 + mpi + common_32
suffix = '-parallel-builtin'
host = 'i686'
@@ -217,13 +246,13 @@
suffix = ''
options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_em64t),
'--with-g2c-copy=%s'%g2c64,
- '--with-fft=builtin'] + builtin_lapack_em64t + mpi
+ ] + builtin_fft_em64t + builtin_lapack_em64t + mpi + common_64
class Debug(Configuration):
suffix = '-debug'
options = ['CXXFLAGS="%s"'%' '.join(debug),
'--with-g2c-copy=%s'%g2c64,
- '--with-fft=builtin'] + builtin_lapack_em64t + mpi
+ ] + builtin_fft_em64t + builtin_lapack_em64t + mpi + common_64
suffix = '-parallel-builtin'
host = 'em64t'
@@ -236,13 +265,13 @@
suffix = ''
options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_amd64),
'--with-g2c-copy=%s'%g2c64,
- '--with-fft=builtin'] + builtin_lapack_amd64 + mpi
+ ] + builtin_fft_amd64 + builtin_lapack_amd64 + mpi + common_64
class Debug(Configuration):
suffix = '-debug'
options = ['CXXFLAGS="%s"'%' '.join(debug),
'--with-g2c-copy=%s'%g2c64,
- '--with-fft=builtin'] + builtin_lapack_amd64 + mpi
+ ] + builtin_fft_amd64 + builtin_lapack_amd64 + mpi + common_64
suffix = '-parallel-builtin'
host = 'amd64'
@@ -259,7 +288,7 @@
'LDFLAGS="%s"'%' '.join(flags_32_generic),
'--with-g2c-copy=%s'%g2c32,
'--with-ipp-prefix=%s/ia32_itanium'%ipp_dir, '--with-fft=ipp'
- ] + mkl_32 + mpi
+ ] + mkl_32 + mpi + common_32
class Debug(Configuration):
suffix = '-debug'
@@ -269,7 +298,7 @@
'LDFLAGS="%s"'%' '.join(m32),
'--with-g2c-copy=%s'%g2c32,
'--with-ipp-prefix=%s/ia32_itanium'%ipp_dir, '--with-fft=ipp'
- ] + mkl_32 + mpi
+ ] + mkl_32 + mpi + common_32
suffix = '-parallel-intel'
host = 'i686'
@@ -283,14 +312,14 @@
options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_generic),
'--with-g2c-copy=%s'%g2c64,
'--with-ipp-prefix=%s/em64t'%ipp_dir, '--with-fft=ipp'
- ] + mkl_64 + mpi
+ ] + mkl_64 + mpi + common_64
class Debug(Configuration):
suffix = '-debug'
options = ['CXXFLAGS="%s"'%' '.join(debug),
'--with-g2c-copy=%s'%g2c64,
'--with-ipp-prefix=%s/em64t'%ipp_dir, '--with-fft=ipp'
- ] + mkl_64 + mpi
+ ] + mkl_64 + mpi + common_64
suffix = '-parallel-intel'
host = 'em64t'
Index: tests/matvec-prod.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/matvec-prod.cpp,v
retrieving revision 1.7
diff -u -r1.7 matvec-prod.cpp
--- tests/matvec-prod.cpp 22 Dec 2005 01:29:25 -0000 1.7
+++ tests/matvec-prod.cpp 22 Jan 2006 08:50:49 -0000
@@ -696,4 +696,9 @@
prod_cases_complex_only<complex<float>, complex<float> >();
prod_special_cases();
+
+ // Test a large matrix-matrix product (order > 80) to trigger
+ // ATLAS blocking code. If order < NB, only the cleanup code
+ // gets exercised.
+ test_prod_rand<float, float, row2_type, row2_type, row2_type>(256, 256, 256);
}
Index: vendor/atlas/tune/blas/gemv/mvsearch.c
===================================================================
RCS file: /home/cvs/Repository/atlas/tune/blas/gemv/mvsearch.c,v
retrieving revision 1.3
diff -u -r1.3 mvsearch.c
--- vendor/atlas/tune/blas/gemv/mvsearch.c 19 Jan 2006 00:49:25 -0000 1.3
+++ vendor/atlas/tune/blas/gemv/mvsearch.c 22 Jan 2006 08:50:52 -0000
@@ -42,7 +42,7 @@
char* compiler(char* cc)
{
- while (isspace(cc) && *cc)
+ while (isspace(*cc) && *cc != '\0')
++cc;
return (strcmp(cc, "gcc") == 0) ? "$(GOODGCC)" : cc;
}