Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patch] ATLAS/FFTW config updates


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: [patch] ATLAS/FFTW config updates
  • From: Jules Bergmann <jules@xxxxxxxxxxxxxxxx>
  • Date: Sun, 22 Jan 2006 03:50:27 -0500

Patch applied.
Index: ChangeLog
===================================================================
RCS file: /home/cvs/Repository/vpp/ChangeLog,v
retrieving revision 1.384
diff -u -r1.384 ChangeLog
--- ChangeLog	20 Jan 2006 21:49:09 -0000	1.384
+++ ChangeLog	22 Jan 2006 08:50:48 -0000
@@ -1,3 +1,19 @@
+2006-01-22  Jules Bergmann  <jules@xxxxxxxxxxxxxxxx>
+
+	* configure.ac (with-fftw3-cflags): New options to pass CFLAGS
+	  for builtin FFTW3.  Avoid passing CFLAGS to FFTW3 unless
+	  asked.
+	  (with-atlas-tarball): New option to use ATLAS tarball for
+	  builtin LAPACK.
+	* scripts/config: Enable profile timer.  Provide explicit
+	  cflags for i686 builtin FFTW (probing while cross-compiling
+	  doesn't find right flags).
+	* tests/matvec-prod.cpp: Add large matrix-multiply test to
+	  cover ATLAS' mainline case (small matrix-multiplies are
+	  handled by cleanup code).
+	* vendor/atlas/tune/blas/gemv/mvsearch.c (compiler): Fix missing
+	  pointer dereference.
+
 2006-01-20  Jules Bergmann  <jules@xxxxxxxxxxxxxxxx>
 
 	* VERSIONS: Document V_1_0 tag for 1.0 release.
Index: configure.ac
===================================================================
RCS file: /home/cvs/Repository/vpp/configure.ac,v
retrieving revision 1.77
diff -u -r1.77 configure.ac
--- configure.ac	20 Jan 2006 03:58:54 -0000	1.77
+++ configure.ac	22 Jan 2006 08:50:48 -0000
@@ -143,6 +143,11 @@
                  [Omit support for FFT applied to long double elements]),,
   [enable_fft_long_double=yes])
 
+AC_ARG_WITH(fftw3_cflags,
+  AS_HELP_STRING([--with-fftw3-cflags=CFLAGS],
+                 [Specify CFLAGS to use when building built-inFFTW3.
+		  Only used if --with-fft=buildint.]))
+
 
 
 # LAPACK and related libraries (Intel MKL)
@@ -176,6 +181,11 @@
   AS_HELP_STRING([--with-atlas-cfg-opts=OPTS],
                  [specify additional options for ATLAS configure.]))
 
+AC_ARG_WITH(atlas_tarball,
+  AS_HELP_STRING([--with-atlas-tarball=PATH],
+                 [specify an existing ATLAS tarball to be used as basis
+	          for builtin LAPACK library. (Enables LAPACK).]))
+
 AC_ARG_WITH(mkl_prefix,
   AS_HELP_STRING([--with-mkl-prefix=PATH],
                  [specify the installation prefix of the MKL library.  Headers
@@ -525,20 +535,39 @@
     esac
     AC_MSG_NOTICE([fftw3 config options: $fftw3_opts $fftw3_simd.])
 
+    # We don't export CFLAGS to FFTW configure because this overrides its
+    # choice of optimization flags (unless the --with-fftw3-cflags options
+    # is given).  Because of this, we need to pass -m32/-m64 as part of CC.
+    if expr "$CFLAGS" : ".*-m32" > /dev/null; then
+      fftw_CC="$CC -m32"
+    elif expr "$CFLAGS" : ".*-m64" > /dev/null; then
+      fftw_CC="$CC -m64"
+    else
+      fftw_CC="$CC"
+    fi
+
+    keep_CFLAGS="$CFLAGS"
+
+    if test "x$with_fftw3_cflags" != "x"; then
+      CFLAGS="$with_fftw3_cflags"
+    else
+      unset CFLAGS
+    fi
+
     echo "==============================================================="
 
     if test "$enable_fft_float" = yes; then
       mkdir -p vendor/fftw3f
       AC_MSG_NOTICE([Configuring fftw3f (float).])
       AC_MSG_NOTICE([extra config options: '$fftw3_f_simd'.])
-      (cd vendor/fftw3f; $fftw3_configure CC=$CC CFLAGS="$CFLAGS" $fftw3_f_simd $fftw3_opts --enable-float)
+      (cd vendor/fftw3f; $fftw3_configure CC="$fftw_CC" $fftw3_f_simd $fftw3_opts --enable-float)
       libs="$libs -lfftw3f"
     fi
     if test "$enable_fft_double" = yes; then
       mkdir -p vendor/fftw3
       AC_MSG_NOTICE([Configuring fftw3 (double).])
       AC_MSG_NOTICE([extra config options: '$fftw3_d_simd'.])
-      (cd vendor/fftw3; $fftw3_configure CC=$CC CFLAGS="$CFLAGS" $fftw3_d_simd $fftw3_opts )
+      (cd vendor/fftw3; $fftw3_configure CC="$fftw_CC" $fftw3_d_simd $fftw3_opts )
       libs="$libs -lfftw3"
     fi
     if test "$enable_fft_long_double" = yes; then
@@ -546,12 +575,14 @@
       mkdir -p vendor/fftw3l
       AC_MSG_NOTICE([Configuring fftw3l (long double).])
       AC_MSG_NOTICE([extra config options: '$fftw3_l_simd'.])
-      (cd vendor/fftw3l; $fftw3_configure CC=$CC CFLAGS="$CFLAGS" $fftw3_l_simd $fftw3_opts --enable-long-double)
+      (cd vendor/fftw3l; $fftw3_configure CC="$fftw_CC" $fftw3_l_simd $fftw3_opts --enable-long-double)
       libs="$libs -lfftw3l"
     fi
 
     echo "==============================================================="
 
+    export CFLAGS="$keep_CFLAGS"
+
     rm -rf vendor/fftw/include
     mkdir -p vendor/fftw/include
     fftw3_src_prefix="`(cd $srcdir/vendor/fftw; echo \"$PWD\")`"
@@ -1150,6 +1181,15 @@
         fi
         AC_MSG_RESULT([found])
 
+        if test "x$with_atlas_tarball" != "x"; then
+          AC_MSG_RESULT([using ATLAS tarball])
+	  mkdir atlas_untar
+	  mkdir -p vendor/atlas/lib
+          tar xfz $with_atlas_tarball -C atlas_untar
+	  mv `find atlas_untar -name "*.a"` vendor/atlas/lib
+	  mv vendor/atlas/lib/liblapack.a vendor/atlas/lib/libprelapack.a
+	  rm -rf atlas_untar
+        else
         # assert(NOT CROSS-COMPILING)
 
         echo "==============================================================="
@@ -1212,6 +1252,7 @@
 	else
           AC_MSG_ERROR([built-in ATLAS configure FAILED.])
 	fi
+        fi
 
 
         AC_SUBST(USE_BUILTIN_ATLAS, 1)
Index: scripts/config
===================================================================
RCS file: /home/cvs/Repository/vpp/scripts/config,v
retrieving revision 1.9
diff -u -r1.9 config
--- scripts/config	20 Jan 2006 21:49:59 -0000	1.9
+++ scripts/config	22 Jan 2006 08:50:49 -0000
@@ -1,5 +1,14 @@
-# config -- Options for building Sourcery VSIPL++ Packages.
+########################################################################
 #
+# File:	  scripts/config 
+# Author: Stefan Seefeld
+# Date:   2006-1-11
+#
+# Contents:
+#   Options for building Sourcery VSIPL++ Packages.
+#
+########################################################################
+
 # This file contains the configuration options used to build Sourcery
 # VSIPL++ binary packages.
 #
@@ -29,6 +38,11 @@
 # Parallel Builtin amd64  64_amd64
 # Parallel Intel   64     64_generic
 
+
+########################################################################
+# Compiler flags
+########################################################################
+
 release = ['-O2', '-DNDEBUG',
            '-funswitch-loops',
            '-fgcse-after-reload',
@@ -48,11 +62,26 @@
 flags_64_amd64   = ['-m64', '-mtune=opteron',  '-mmmx', '-msse', '-msse2',
                                                '-m3dnow']
 
+
+########################################################################
+# Configure flags
+########################################################################
+
+common_32 = ['--enable-profile-timer=pentiumtsc']
+common_64 = ['--enable-profile-timer=x86_64_tsc']
+
 cross = ['--host=i686-pc-linux-gnu',
          '--build=x86_64-unknown-linux-gnu',
          '--target=i686-pc-linux-gnu']
 
+builtin_fft_32    = ['--with-fft=builtin',
+	             '--with-fftw3-cflags="-O3 -fomit-frame-pointer -fno-schedule-insns -malign-double -fstrict-aliasing -mpreferred-stack-boundary=4 -mcpu=pentiumpro"',
+		     'CODELET_OPTIM=-O']
+builtin_fft_em64t = ['--with-fft=builtin']
+builtin_fft_amd64 = ['--with-fft=builtin']
+
 builtin_lapack_32 = ['--with-lapack=builtin',
+		     '--with-atlas-tarball=/home/jules/csl/atlas/atlas3.6.0_Linux_P4SSE2.tar.gz',
 	             '--with-atlas-cfg-opts="--with-mach=P4 --with-isa=SSE2 --with-int-type=int --with-string-convention=sun"']
 
 builtin_lapack_em64t = ['--with-lapack=builtin',
@@ -83,8 +112,8 @@
                    'CFLAGS="%s"'%' '.join(flags_32_p4sse2),
                    'FFLAGS="%s"'%' '.join(flags_32_p4sse2),
                    'LDFLAGS="%s"'%' '.join(flags_32_p4sse2),
-                   '--with-g2c-copy=%s'%g2c32,
-                   '--with-fft=builtin'] + builtin_lapack_32 + nompi
+                   '--with-g2c-copy=%s'%g2c32
+                  ] + builtin_fft_32 + builtin_lapack_32 + nompi + common_32
 
     class Debug(Configuration):
         suffix = '-debug'
@@ -93,7 +122,7 @@
                    'FFLAGS="%s"'%' '.join(m32),
                    'LDFLAGS="%s"'%' '.join(m32),
                    '--with-g2c-copy=%s'%g2c32,
-                   '--with-fft=builtin'] + builtin_lapack_32 + nompi
+                  ] + builtin_fft_32 + builtin_lapack_32 + nompi + common_32
 
     suffix = '-serial-builtin'
     host = 'i686'
@@ -106,13 +135,13 @@
         suffix = ''
         options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_em64t),
                    '--with-g2c-copy=%s'%g2c64,
-                   '--with-fft=builtin'] + builtin_lapack_em64t + nompi
+                  ] + builtin_fft_em64t + builtin_lapack_em64t + nompi + common_64
 
     class Debug(Configuration):
         suffix = '-debug'
         options = ['CXXFLAGS="%s"'%' '.join(debug),
                    '--with-g2c-copy=%s'%g2c64,
-                   '--with-fft=builtin'] + builtin_lapack_em64t + nompi
+                  ] + builtin_fft_em64t + builtin_lapack_em64t + nompi + common_64
 
     suffix = '-serial-builtin'
     host = 'em64t'
@@ -125,13 +154,13 @@
         suffix = ''
         options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_amd64),
                    '--with-g2c-copy=%s'%g2c64,
-                   '--with-fft=builtin'] + builtin_lapack_amd64 + nompi
+                  ] + builtin_fft_amd64 + builtin_lapack_amd64 + nompi + common_64
 
     class Debug(Configuration):
         suffix = '-debug'
         options = ['CXXFLAGS="%s"'%' '.join(debug),
                    '--with-g2c-copy=%s'%g2c64,
-                   '--with-fft=builtin'] + builtin_lapack_amd64 + nompi
+                  ] + builtin_fft_amd64 + builtin_lapack_amd64 + nompi + common_64
 
     suffix = '-serial-builtin'
     host = 'amd64'
@@ -148,7 +177,7 @@
                    'LDFLAGS="%s"'%' '.join(flags_32_generic),
                    '--with-g2c-copy=%s'%g2c32,
                    '--with-ipp-prefix=%s/ia32_itanium'%ipp_dir, '--with-fft=ipp'
-                  ] + mkl_32 + nompi
+                  ] + mkl_32 + nompi + common_32
 
     class Debug(Configuration):
         suffix = '-debug'
@@ -158,7 +187,7 @@
                    'LDFLAGS="%s"'%' '.join(m32),
                    '--with-g2c-copy=%s'%g2c32,
                    '--with-ipp-prefix=%s/ia32_itanium'%ipp_dir, '--with-fft=ipp'
-		  ] + mkl_32 + nompi
+		  ] + mkl_32 + nompi + common_32
 
     suffix = '-serial-intel'
     host = 'i686'
@@ -172,14 +201,14 @@
         options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_generic),
                    '--with-g2c-copy=%s'%g2c64,
                    '--with-ipp-prefix=%s/em64t'%ipp_dir, '--with-fft=ipp'
-		  ] + mkl_64 + nompi
+		  ] + mkl_64 + nompi + common_64
 
     class Debug(Configuration):
         suffix = '-debug'
         options = ['CXXFLAGS="%s"'%' '.join(debug),
                    '--with-g2c-copy=%s'%g2c64,
                    '--with-ipp-prefix=%s/em64t'%ipp_dir, '--with-fft=ipp'
-		  ] + mkl_64 + nompi
+		  ] + mkl_64 + nompi + common_64
 
     suffix = '-serial-intel'
     host = 'em64t'
@@ -195,7 +224,7 @@
                    'FFLAGS="%s"'%' '.join(flags_32_p4sse2),
                    'LDFLAGS="%s"'%' '.join(flags_32_p4sse2),
                    '--with-g2c-copy=%s'%g2c32,
-                   '--with-fft=builtin'] + builtin_lapack_32 + mpi
+                  ] + builtin_fft_32 + builtin_lapack_32 + mpi + common_32
 
     class Debug(Configuration):
         suffix = '-debug'
@@ -204,7 +233,7 @@
                    'FFLAGS="%s"'%' '.join(m32),
                    'LDFLAGS="%s"'%' '.join(m32),
                    '--with-g2c-copy=%s'%g2c32,
-                   '--with-fft=builtin'] + builtin_lapack_32 + mpi
+                  ] + builtin_fft_32 + builtin_lapack_32 + mpi + common_32
 
     suffix = '-parallel-builtin'
     host = 'i686'
@@ -217,13 +246,13 @@
         suffix = ''
         options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_em64t),
                    '--with-g2c-copy=%s'%g2c64,
-                   '--with-fft=builtin'] + builtin_lapack_em64t + mpi
+                  ] + builtin_fft_em64t + builtin_lapack_em64t + mpi + common_64
 
     class Debug(Configuration):
         suffix = '-debug'
         options = ['CXXFLAGS="%s"'%' '.join(debug),
                    '--with-g2c-copy=%s'%g2c64,
-                   '--with-fft=builtin'] + builtin_lapack_em64t + mpi
+                  ] + builtin_fft_em64t + builtin_lapack_em64t + mpi + common_64
 
     suffix = '-parallel-builtin'
     host = 'em64t'
@@ -236,13 +265,13 @@
         suffix = ''
         options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_amd64),
                    '--with-g2c-copy=%s'%g2c64,
-                   '--with-fft=builtin'] + builtin_lapack_amd64 + mpi
+                  ] + builtin_fft_amd64 + builtin_lapack_amd64 + mpi + common_64
 
     class Debug(Configuration):
         suffix = '-debug'
         options = ['CXXFLAGS="%s"'%' '.join(debug),
                    '--with-g2c-copy=%s'%g2c64,
-                   '--with-fft=builtin'] + builtin_lapack_amd64 + mpi
+                  ] + builtin_fft_amd64 + builtin_lapack_amd64 + mpi + common_64
 
     suffix = '-parallel-builtin'
     host = 'amd64'
@@ -259,7 +288,7 @@
                    'LDFLAGS="%s"'%' '.join(flags_32_generic),
                    '--with-g2c-copy=%s'%g2c32,
                    '--with-ipp-prefix=%s/ia32_itanium'%ipp_dir, '--with-fft=ipp'
-		  ] + mkl_32 + mpi
+		  ] + mkl_32 + mpi + common_32
 
     class Debug(Configuration):
         suffix = '-debug'
@@ -269,7 +298,7 @@
                    'LDFLAGS="%s"'%' '.join(m32),
                    '--with-g2c-copy=%s'%g2c32,
                    '--with-ipp-prefix=%s/ia32_itanium'%ipp_dir, '--with-fft=ipp'
-		  ] + mkl_32 + mpi
+		  ] + mkl_32 + mpi + common_32
 
     suffix = '-parallel-intel'
     host = 'i686'
@@ -283,14 +312,14 @@
         options = ['CXXFLAGS="%s"'%' '.join(release + flags_64_generic),
                    '--with-g2c-copy=%s'%g2c64,
                    '--with-ipp-prefix=%s/em64t'%ipp_dir, '--with-fft=ipp'
-		  ] + mkl_64 + mpi
+		  ] + mkl_64 + mpi + common_64
 
     class Debug(Configuration):
         suffix = '-debug'
         options = ['CXXFLAGS="%s"'%' '.join(debug),
                    '--with-g2c-copy=%s'%g2c64,
                    '--with-ipp-prefix=%s/em64t'%ipp_dir, '--with-fft=ipp'
-		  ] + mkl_64 + mpi
+		  ] + mkl_64 + mpi + common_64
 
     suffix = '-parallel-intel'
     host = 'em64t'
Index: tests/matvec-prod.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/tests/matvec-prod.cpp,v
retrieving revision 1.7
diff -u -r1.7 matvec-prod.cpp
--- tests/matvec-prod.cpp	22 Dec 2005 01:29:25 -0000	1.7
+++ tests/matvec-prod.cpp	22 Jan 2006 08:50:49 -0000
@@ -696,4 +696,9 @@
   prod_cases_complex_only<complex<float>, complex<float> >();
 
   prod_special_cases();
+
+  // Test a large matrix-matrix product (order > 80) to trigger
+  // ATLAS blocking code.  If order < NB, only the cleanup code
+  // gets exercised.
+  test_prod_rand<float, float, row2_type, row2_type, row2_type>(256, 256, 256);
 }
Index: vendor/atlas/tune/blas/gemv/mvsearch.c
===================================================================
RCS file: /home/cvs/Repository/atlas/tune/blas/gemv/mvsearch.c,v
retrieving revision 1.3
diff -u -r1.3 mvsearch.c
--- vendor/atlas/tune/blas/gemv/mvsearch.c	19 Jan 2006 00:49:25 -0000	1.3
+++ vendor/atlas/tune/blas/gemv/mvsearch.c	22 Jan 2006 08:50:52 -0000
@@ -42,7 +42,7 @@
 
 char* compiler(char* cc)
 {
-  while (isspace(cc) && *cc)
+  while (isspace(*cc) && *cc != '\0')
     ++cc;
   return (strcmp(cc, "gcc") == 0) ? "$(GOODGCC)" : cc;
 }