[
Date Prev][
Date Next][
Thread Prev][
Thread Next][
Date Index][
Thread Index]
[patch] vmul benchmark reorganization
- To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
- Subject: [patch] vmul benchmark reorganization
- From: Don McCoy <don@xxxxxxxxxxxxxxxx>
- Date: Sun, 19 Mar 2006 16:01:39 -0700
The primary purpose of this patch is to separate out some
implementation-specific functionality that was making it difficult to
compile/run tests against the reference implementation of VSIPL++.
This reorganization includes several ideas discussed recently, such as
the new macro VSIP_IMPL_SOURCERY_VPP to allow applications to check if
they are running CodeSourcery's version of the library.
This macro is also used to choose between the parallel version of
benchmarks/loop.hpp and a new serial-only version (loop_ser.hpp). I
have some reservations about doing it this way, but it seemed the best
in that it keeps the code in loop.hpp readable.
This patch does not yet include the splitting of this benchmark into
serial, parallel and impl-specific parts.
Regards,
--
Don McCoy
don (at) CodeSourcery
(888) 776-0262 / (650) 331-3385, x712
2006-03-19 Don McCoy <don@xxxxxxxxxxxxxxxx>
* configure.ac: added #define for VSIP_IMPL_SOURCERY_VPP.
* benchmarks/benchmarks.hpp: new file. encapsulates resources
needed to run benchmarks. provides some resources for
linking against the reference implementation.
* benchmarks/loop_ser.hpp: new file with parallel-specific
functionality removed (based on loop.hpp). also used for
linking against the reference implementation.
* benchmarks/main.cpp: change to use benchmarks.hpp instead
of several separate includes.
* benchmarks/make.standalone: Fixed a bug where it would
not recognize that PREFIX was set on the command line.
Fixed include paths and build targets.
* benchmarks/vmul.cpp: change to use benchmarks.hpp instead
of several separate includes. removed implementation-
specific functionality where possible and used the new
SOURCERY_VPP macro where not.
Index: configure.ac
===================================================================
RCS file: /home/cvs/Repository/vpp/configure.ac,v
retrieving revision 1.87
diff -c -p -r1.87 configure.ac
*** configure.ac 9 Mar 2006 05:44:58 -0000 1.87
--- configure.ac 19 Mar 2006 22:40:25 -0000
*************** mkdir -p src/vsip/impl/simd
*** 1637,1642 ****
--- 1637,1648 ----
#
+ # set to allow apps to test which VSIPL++ they are using
+ #
+ AC_DEFINE([VSIP_IMPL_SOURCERY_VPP], [],
+ [Define to indicate this is CodeSourcery's VSIPL++.])
+
+ #
# library
#
ARFLAGS="r"
Index: benchmarks/benchmarks.hpp
===================================================================
RCS file: benchmarks/benchmarks.hpp
diff -N benchmarks/benchmarks.hpp
*** /dev/null 1 Jan 1970 00:00:00 -0000
--- benchmarks/benchmarks.hpp 19 Mar 2006 22:40:25 -0000
***************
*** 0 ****
--- 1,276 ----
+ /* Copyright (c) 2006 by CodeSourcery. All rights reserved. */
+
+ /** @file benchmarks/benchmarks.hpp
+ @author Don McCoy
+ @date 2006-03-16
+ @brief VSIPL++ Library: Benchmark common definitions
+
+ */
+
+ #ifndef VSIP_IMPL_BENCHMARKS_HPP
+ #define VSIP_IMPL_BENCHMARKS_HPP
+
+ #ifdef VSIP_IMPL_SOURCERY_VPP
+
+ // Sourcery VSIPL++ provides certain resources such as system
+ // timers that are needed for running the benchmarks.
+
+ #include <vsip/impl/profile.hpp>
+ #include <../tests/test.hpp>
+ #include "loop.hpp"
+ #include "ops_info.hpp"
+
+ #else
+
+ // when linking with non-sourcery versions of the lib, the
+ // definitions below provide a minimal set of these resources.
+
+ #include <time.h>
+
+ #include <cstdlib>
+ #include <cassert>
+
+ #include <vsip/support.hpp>
+ #include <vsip/complex.hpp>
+ #include <vsip/math.hpp>
+
+ #include "loop_ser.hpp"
+ #include "ops_info.hpp"
+
+
+ namespace vsip
+ {
+ namespace impl
+ {
+ namespace profile
+ {
+
+ struct Posix_time
+ {
+ static bool const valid = true;
+ static char* name() { return "Posix_time"; }
+ static void init() { clocks_per_sec = CLOCKS_PER_SEC; }
+
+ typedef clock_t stamp_type;
+ static void sample(stamp_type& time) { time = clock(); }
+ static stamp_type zero() { return stamp_type(); }
+ static stamp_type f_clocks_per_sec() { return CLOCKS_PER_SEC; }
+ static stamp_type add(stamp_type A, stamp_type B) { return A + B; }
+ static stamp_type sub(stamp_type A, stamp_type B) { return A - B; }
+ static float seconds(stamp_type time) { return (float)time / CLOCKS_PER_SEC; }
+ static unsigned long ticks(stamp_type time) { return (unsigned long)time; }
+
+ static stamp_type clocks_per_sec;
+ };
+
+
+ /// Timer class that keeps start/stop times.
+ ///
+ /// Requires:
+ /// TP is a timer policy.
+
+ template <typename TP>
+ class P_timer {
+ private:
+ typedef typename TP::stamp_type stamp_type;
+
+ stamp_type start_;
+ stamp_type stop_;
+
+ public:
+ P_timer() {}
+
+ void start() { TP::sample(start_); }
+ void stop() { TP::sample(stop_); }
+
+ stamp_type raw_delta() { return TP::sub(stop_, start_); }
+ float delta() { return TP::seconds(TP::sub(stop_, start_)); }
+ };
+
+
+
+ /// Timer class that accumulates across multiple start/stop times.
+ ///
+ /// Requires:
+ /// TP is a timer policy.
+
+ template <typename TP>
+ class P_acc_timer {
+ private:
+ typedef typename TP::stamp_type stamp_type;
+
+ stamp_type total_;
+ stamp_type start_;
+ stamp_type stop_;
+ unsigned count_;
+
+ public:
+ P_acc_timer() { total_ = stamp_type(); count_ = 0; }
+
+ void start() { TP::sample(start_); }
+ void stop()
+ {
+ TP::sample(stop_);
+ total_ = TP::add(total_, TP::sub(stop_, start_));
+ count_ += 1;
+ }
+
+ stamp_type raw_delta() const { return TP::sub(stop_, start_); }
+ float delta() const { return TP::seconds(TP::sub(stop_, start_)); }
+ float total() const { return TP::seconds(total_); }
+ int count() const { return count_; }
+ };
+
+ typedef Posix_time DefaultTime;
+
+ typedef P_timer<DefaultTime> Timer;
+ typedef P_acc_timer<DefaultTime> Acc_timer;
+
+
+ } // namespace vsip::impl::profile
+ } // namespace vsip::impl
+ } // namespace vsip
+
+
+
+
+ /// Compare two floating-point values for equality.
+ ///
+ /// Algorithm from:
+ /// www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
+
+ template <typename T>
+ bool
+ almost_equal(
+ T A,
+ T B,
+ T rel_epsilon = 1e-4,
+ T abs_epsilon = 1e-6)
+ {
+ if (vsip::mag(A - B) < abs_epsilon)
+ return true;
+
+ T relative_error;
+
+ if (vsip::mag(B) > vsip::mag(A))
+ relative_error = vsip::mag((A - B) / B);
+ else
+ relative_error = vsip::mag((B - A) / A);
+
+ return (relative_error <= rel_epsilon);
+ }
+
+
+
+ template <typename T>
+ bool
+ almost_equal(
+ std::complex<T> A,
+ std::complex<T> B,
+ T rel_epsilon = 1e-4,
+ T abs_epsilon = 1e-6)
+ {
+ if (vsip::mag(A - B) < abs_epsilon)
+ return true;
+
+ T relative_error;
+
+ if (vsip::mag(B) > vsip::mag(A))
+ relative_error = vsip::mag((A - B) / B);
+ else
+ relative_error = vsip::mag((B - A) / A);
+
+ return (relative_error <= rel_epsilon);
+ }
+
+
+
+ /// Compare two values for equality.
+ template <typename T>
+ inline bool
+ equal(T val1, T val2)
+ {
+ return val1 == val2;
+ }
+
+
+ /// Compare two floating point values for equality within epsilon.
+ ///
+ /// Note: A fixed epsilon is not adequate for comparing the results
+ /// of all floating point computations. Epsilon should be choosen
+ /// based on the dynamic range of the computation.
+ template <>
+ inline bool
+ equal(float val1, float val2)
+ {
+ return almost_equal<float>(val1, val2);
+ }
+
+
+
+ /// Compare two floating point (double) values for equality within epsilon.
+ template <>
+ inline bool
+ equal(double val1, double val2)
+ {
+ return almost_equal<double>(val1, val2);
+ }
+
+
+
+ /// Compare two complex values for equality within epsilon.
+
+ template <typename T>
+ inline bool
+ equal(vsip::complex<T> val1, vsip::complex<T> val2)
+ {
+ return equal(val1.real(), val2.real()) &&
+ equal(val1.imag(), val2.imag());
+ }
+
+
+
+
+ void inline
+ test_assert_fail(
+ const char* assertion,
+ const char* file,
+ unsigned int line,
+ const char* function)
+ {
+ fprintf(stderr, "TEST ASSERT FAIL: %s %s %d %s\n",
+ assertion, file, line, function);
+ abort();
+ }
+
+ #if defined(__GNU__)
+ # if defined __cplusplus ? __GNUC_PREREQ (2, 6) : __GNUC_PREREQ (2, 4)
+ # define TEST_ASSERT_FUNCTION __PRETTY_FUNCTION__
+ # else
+ # if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L
+ # define TEST_ASSERT_FUNCTION __func__
+ # else
+ # define TEST_ASSERT_FUNCTION ((__const char *) 0)
+ # endif
+ # endif
+ #else
+ # define TEST_ASSERT_FUNCTION ((__const char *) 0)
+ #endif
+
+ #ifdef __STDC__
+ # define __TEST_STRING(e) #e
+ #else
+ # define __TEST_STRING(e) "e"
+ #endif
+
+ #define test_assert(expr) \
+ (static_cast<void>((expr) ? 0 : \
+ (test_assert_fail(__TEST_STRING(expr), __FILE__, __LINE__, \
+ TEST_ASSERT_FUNCTION), 0)))
+
+
+
+ #endif // not VSIP_IMPL_SOURCERY_VPP
+
+
+ #endif // VSIP_IMPL_BENCHMARKS_HPP
Index: benchmarks/loop_ser.hpp
===================================================================
RCS file: benchmarks/loop_ser.hpp
diff -N benchmarks/loop_ser.hpp
*** /dev/null 1 Jan 1970 00:00:00 -0000
--- benchmarks/loop_ser.hpp 19 Mar 2006 22:40:25 -0000
***************
*** 0 ****
--- 1,340 ----
+ /* Copyright (c) 2006 by CodeSourcery. All rights reserved. */
+
+ /** @file loop_ser.hpp
+ @author Don McCoy
+ @date 2006-03-19
+ @brief VSIPL++ Library: Benchmark outer loop (serial version).
+
+ */
+
+ #ifndef CSL_LOOP_SER_HPP
+ #define CSL_LOOP_SER_HPP
+
+ /***********************************************************************
+ Included Files
+ ***********************************************************************/
+
+ #include <algorithm>
+ #include <vector>
+
+
+ #include <vsip/vector.hpp>
+ #include <vsip/math.hpp>
+
+ #include "benchmarks.hpp"
+
+
+
+ /***********************************************************************
+ Declarations
+ ***********************************************************************/
+
+ enum output_metric
+ {
+ pts_per_sec,
+ ops_per_sec,
+ iob_per_sec,
+ wiob_per_sec,
+ all_per_sec
+ };
+
+
+ enum lhs_metric
+ {
+ lhs_pts,
+ lhs_mem
+ };
+
+ enum bench_mode
+ {
+ steady_mode,
+ sweep_mode
+ };
+
+
+ // 1 Parameter Loop
+ class Loop1P
+ {
+ public:
+
+ // typedef void (TimingFunc)(int M, int loop, float* time, int calib);
+ // typedef boost::function<void(unsigned, unsigned, float*)> TimingFunctor;
+
+ Loop1P() :
+ start_ (2),
+ stop_ (21),
+ cal_ (4),
+ loop_start_ (10),
+ samples_ (1),
+ goal_sec_ (1.0),
+ metric_ (pts_per_sec),
+ lhs_ (lhs_pts),
+ note_ (0),
+ do_prof_ (false),
+ what_ (0),
+ show_loop_ (false),
+ show_time_ (false),
+ mode_ (sweep_mode)
+ {}
+
+ template <typename Functor>
+ void sweep(Functor func);
+
+ template <typename Functor>
+ void steady(Functor func);
+
+ template <typename Functor>
+ void operator()(Functor func);
+
+ template <typename Functor>
+ float metric(Functor& fcn, size_t M, size_t loop, float time,
+ output_metric m);
+
+ // Member data.
+ public:
+ unsigned start_; // loop start power-of-two
+ unsigned stop_; // loop stop power-of-two
+ int cal_; // calibration power-of-two
+ int loop_start_;
+ unsigned samples_;
+ double goal_sec_; // measurement goal (in seconds)
+ output_metric metric_;
+ lhs_metric lhs_;
+ char* note_;
+ int user_param_;
+ bool do_prof_;
+ int what_;
+ bool show_loop_;
+ bool show_time_;
+ bench_mode mode_;
+ };
+
+
+
+ /***********************************************************************
+ Definitions
+ ***********************************************************************/
+
+ template <typename Functor>
+ inline float
+ Loop1P::metric(
+ Functor& fcn,
+ size_t M,
+ size_t loop,
+ float time,
+ output_metric m)
+ {
+ if (m == pts_per_sec)
+ {
+ double pts = (double)M * loop;
+ return pts / (time * 1e6);
+ }
+ else if (m == ops_per_sec)
+ {
+ double ops = (double)M * fcn.ops_per_point(M) * loop;
+ return ops / (time * 1e6);
+ }
+ else if (m == iob_per_sec)
+ {
+ double ops = (double)M * (fcn.riob_per_point(M) + fcn.wiob_per_point(M))
+ * loop;
+ return ops / (time * 1e6);
+ }
+ else if (m == wiob_per_sec)
+ {
+ double ops = (double)M * fcn.wiob_per_point(M) * loop;
+ return ops / (time * 1e6);
+ }
+ else
+ return 0.f;
+ }
+
+
+
+ template <typename Functor>
+ inline void
+ Loop1P::sweep(Functor fcn)
+ {
+ using vsip::Index;
+ using vsip::Vector;
+ using vsip::Dense;
+
+ size_t loop, M;
+ float time;
+ double growth;
+ unsigned const n_time = samples_;
+
+ std::vector<float> mtime(n_time);
+
+ loop = (1 << loop_start_);
+ M = (1 << cal_);
+
+ // calibrate --------------------------------------------------------
+ while (1)
+ {
+ // printf("%d: calib %5d\n", rank, loop);
+ fcn(M, loop, time);
+
+ if (time <= 0.01) time = 0.01;
+ // printf("%d: time %f\n", rank, time);
+
+ float factor = goal_sec_ / time;
+ if (factor < 1.0) factor += 0.1 * (1.0 - factor);
+ loop = (int)(factor * loop);
+
+ if (factor >= 0.75 && factor <= 1.25)
+ break;
+ }
+
+ {
+ printf("# what : %s (%d)\n", fcn.what(), what_);
+ printf("# ops_per_point(1) : %d\n", (int)fcn.ops_per_point(1));
+ printf("# riob_per_point(1): %d\n", fcn.riob_per_point(1));
+ printf("# wiob_per_point(1): %d\n", fcn.wiob_per_point(1));
+ printf("# metric : %s\n",
+ metric_ == pts_per_sec ? "pts_per_sec" :
+ metric_ == ops_per_sec ? "ops_per_sec" :
+ metric_ == iob_per_sec ? "iob_per_sec" :
+ metric_ == wiob_per_sec ? "wiob_per_sec" :
+ "*unknown*");
+ if (this->note_)
+ printf("# note: %s\n", this->note_);
+ printf("# start_loop : %lu\n", (unsigned long) loop);
+ }
+
+
+ // for real ---------------------------------------------------------
+ for (unsigned i=start_; i<=stop_; i++)
+ {
+ M = (1 << i);
+
+ for (unsigned i=0; i<n_time; ++i)
+ {
+ fcn(M, loop, time);
+
+ mtime[i] = time;
+ }
+
+ std::sort(mtime.begin(), mtime.end());
+
+ {
+ size_t L;
+
+ if (this->lhs_ == lhs_mem)
+ L = M * fcn.mem_per_point(M);
+ else // (this->lhs_ == lhs_pts)
+ L = M;
+
+ if (this->metric_ == all_per_sec)
+ printf("%7ld %f %f %f", (unsigned long) L,
+ this->metric(fcn, M, loop, mtime[(n_time-1)/2], pts_per_sec),
+ this->metric(fcn, M, loop, mtime[(n_time-1)/2], ops_per_sec),
+ this->metric(fcn, M, loop, mtime[(n_time-1)/2], iob_per_sec));
+ else if (n_time > 1)
+ // Note: max time is min op/s, and min time is max op/s
+ printf("%7lu %f %f %f", (unsigned long) L,
+ this->metric(fcn, M, loop, mtime[(n_time-1)/2], metric_),
+ this->metric(fcn, M, loop, mtime[n_time-1], metric_),
+ this->metric(fcn, M, loop, mtime[0], metric_));
+ else
+ printf("%7lu %f", (unsigned long) L,
+ this->metric(fcn, M, loop, mtime[0], metric_));
+ if (this->show_loop_)
+ printf(" %8lu", (unsigned long)loop);
+ if (this->show_time_)
+ printf(" %f", mtime[(n_time-1)/2]);
+ printf("\n");
+ fflush(stdout);
+ }
+
+ time = mtime[(n_time-1)/2];
+
+ growth = 2.0 * fcn.ops_per_point(2*M) / fcn.ops_per_point(M);
+ time = time * growth;
+
+ float factor = goal_sec_ / time;
+ if (factor < 1.0) factor += 0.1 * (1.0 - factor);
+ loop = (int)(factor * loop);
+
+ if (loop < 1) loop = 1;
+ }
+ }
+
+
+
+ template <typename Functor>
+ void
+ Loop1P::steady(Functor fcn)
+ {
+ using vsip::Index;
+ using vsip::Vector;
+ using vsip::Dense;
+
+ size_t loop, M;
+ float time;
+
+ loop = (1 << loop_start_);
+
+ {
+ printf("# what : %s (%d)\n", fcn.what(), what_);
+ printf("# ops_per_point(1) : %d\n", (int)fcn.ops_per_point(1));
+ printf("# riob_per_point(1): %d\n", fcn.riob_per_point(1));
+ printf("# wiob_per_point(1): %d\n", fcn.wiob_per_point(1));
+ printf("# metric : %s\n",
+ metric_ == pts_per_sec ? "pts_per_sec" :
+ metric_ == ops_per_sec ? "ops_per_sec" :
+ metric_ == iob_per_sec ? "iob_per_sec" :
+ metric_ == wiob_per_sec ? "wiob_per_sec" :
+ "*unknown*");
+ if (this->note_)
+ printf("# note: %s\n", this->note_);
+ printf("# start_loop : %lu\n", (unsigned long) loop);
+ }
+
+
+ // for real ---------------------------------------------------------
+ while (1)
+ {
+ M = (1 << start_);
+
+ fcn(M, loop, time);
+
+ {
+ if (this->metric_ == all_per_sec)
+ printf("%7ld %f %f %f", (unsigned long) M,
+ this->metric(fcn, M, loop, time, pts_per_sec),
+ this->metric(fcn, M, loop, time, ops_per_sec),
+ this->metric(fcn, M, loop, time, iob_per_sec));
+ else
+ printf("%7lu %f", (unsigned long) M,
+ this->metric(fcn, M, loop, time, metric_));
+ if (this->show_loop_)
+ printf(" %8lu", (unsigned long)loop);
+ if (this->show_time_)
+ printf(" %f", time);
+ printf("\n");
+ fflush(stdout);
+ }
+
+ float factor = goal_sec_ / time;
+ if (factor < 1.0) factor += 0.1 * (1.0 - factor);
+ loop = (int)(factor * loop);
+
+ if (loop < 1) loop = 1;
+ }
+ }
+
+
+
+ template <typename Functor>
+ inline void
+ Loop1P::operator()(
+ Functor fcn)
+ {
+ if (mode_ == steady_mode)
+ this->steady(fcn);
+ else
+ this->sweep(fcn);
+ }
+
+ #endif // CSL_LOOP_SER_HPP
Index: benchmarks/main.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/benchmarks/main.cpp,v
retrieving revision 1.7
diff -c -p -r1.7 main.cpp
*** benchmarks/main.cpp 3 Mar 2006 14:30:53 -0000 1.7
--- benchmarks/main.cpp 19 Mar 2006 22:40:25 -0000
***************
*** 14,23 ****
#include <iostream>
#include <vsip/initfin.hpp>
- #include <vsip/impl/profile.hpp>
! #include "test.hpp"
! #include "loop.hpp"
using namespace vsip;
--- 14,21 ----
#include <iostream>
#include <vsip/initfin.hpp>
! #include "benchmarks.hpp"
using namespace vsip;
Index: benchmarks/make.standalone
===================================================================
RCS file: /home/cvs/Repository/vpp/benchmarks/make.standalone,v
retrieving revision 1.2
diff -c -p -r1.2 make.standalone
*** benchmarks/make.standalone 27 Jan 2006 13:13:23 -0000 1.2
--- benchmarks/make.standalone 19 Mar 2006 22:40:25 -0000
*************** EXEEXT =
*** 56,62 ****
# Variables in this section should not be modified.
# Logic to call pkg-config with PREFIX, if specified.
! ifdef $PREFIX
PC = env PKG_CONFIG_PATH=$(PREFIX)/lib/pkgconfig \
pkg-config --define-variable=prefix=$(PREFIX) $(PKG)
else
--- 56,62 ----
# Variables in this section should not be modified.
# Logic to call pkg-config with PREFIX, if specified.
! ifdef PREFIX
PC = env PKG_CONFIG_PATH=$(PREFIX)/lib/pkgconfig \
pkg-config --define-variable=prefix=$(PREFIX) $(PKG)
else
*************** CXXFLAGS := $(shell $(PC) --cflags
*** 69,81 ****
$(shell $(PC) --variable=cxxflags )
LIBS := $(shell $(PC) --libs )
- CXXFLAGS := $(CXXFLAGS) -I../tests
-
sources := $(wildcard *.cpp)
objects := $(patsubst %.cpp, %.$(OBJEXT), $(sources))
exes := $(patsubst %.cpp, %$(EXEEXT), $(sources))
- tests := $(patsubst %.cpp, %.test, $(sources))
statics := $(patsubst %.cpp, %.static$(EXEEXT), $(sources))
--- 69,78 ----
*************** exes_def_build := $(filter-out $(exes_sp
*** 88,104 ****
# Targets
########################################################################
! all: $(tests)
! check: $(tests)
vars:
@echo "CXX : " $(CXX)
@echo "CXXFLAGS: " $(CXXFLAGS)
@echo "LIBS : " $(LIBS)
clean:
! rm -rf *.exe *.o
--- 85,102 ----
# Targets
########################################################################
! all: $(exes_def_build)
! check: $(exes_def_build)
vars:
+ @echo "PKG-CFG : " $(PC)
@echo "CXX : " $(CXX)
@echo "CXXFLAGS: " $(CXXFLAGS)
@echo "LIBS : " $(LIBS)
clean:
! rm -rf $(exes_def_build) $(objects)
Index: benchmarks/vmul.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/benchmarks/vmul.cpp,v
retrieving revision 1.7
diff -c -p -r1.7 vmul.cpp
*** benchmarks/vmul.cpp 3 Mar 2006 14:30:53 -0000 1.7
--- benchmarks/vmul.cpp 19 Mar 2006 22:40:25 -0000
***************
*** 17,31 ****
#include <vsip/support.hpp>
#include <vsip/math.hpp>
#include <vsip/random.hpp>
! #include <vsip/impl/profile.hpp>
!
! #include "test.hpp"
! #include "loop.hpp"
! #include "ops_info.hpp"
using namespace vsip;
/***********************************************************************
Definitions - vector element-wise multiply
--- 17,31 ----
#include <vsip/support.hpp>
#include <vsip/math.hpp>
#include <vsip/random.hpp>
! #include "benchmarks.hpp"
using namespace vsip;
+ #ifndef VSIP_IMPL_SOURCERY_VPP
+ #undef VSIP_IMPL_NOINLINE
+ #define VSIP_IMPL_NOINLINE
+ #endif
/***********************************************************************
Definitions - vector element-wise multiply
*************** struct t_vmul1
*** 51,59 ****
A = gen.randu(size);
B = gen.randu(size);
! A(0) = T(3);
! B(0) = T(4);
!
vsip::impl::profile::Timer t1;
t1.start();
--- 51,59 ----
A = gen.randu(size);
B = gen.randu(size);
! A.put(0, T(3));
! B.put(0, T(4));
!
vsip::impl::profile::Timer t1;
t1.start();
*************** struct t_vmul1
*** 61,74 ****
C = A * B;
t1.stop();
! if (!equal(C(0), T(12)))
{
std::cout << "t_vmul1: ERROR" << std::endl;
abort();
}
for (index_type i=0; i<size; ++i)
! test_assert(equal(C(i), A(i) * B(i)));
time = t1.delta();
}
--- 61,74 ----
C = A * B;
t1.stop();
! if (!equal(C.get(0), T(12)))
{
std::cout << "t_vmul1: ERROR" << std::endl;
abort();
}
for (index_type i=0; i<size; ++i)
! test_assert(equal(C.get(i), A.get(i) * B.get(i)));
time = t1.delta();
}
*************** struct t_vmul_ip1
*** 104,110 ****
t1.stop();
for (index_type i=0; i<size; ++i)
! test_assert(equal(chk(i), C(i)));
time = t1.delta();
}
--- 104,110 ----
t1.stop();
for (index_type i=0; i<size; ++i)
! test_assert(equal(chk.get(i), C.get(i)));
time = t1.delta();
}
*************** struct t_vmul_dom1
*** 132,139 ****
A = gen.randu(size);
B = gen.randu(size);
! A(0) = T(3);
! B(0) = T(4);
Domain<1> dom(size);
--- 132,139 ----
A = gen.randu(size);
B = gen.randu(size);
! A.put(0, T(3));
! B.put(0, T(4));
Domain<1> dom(size);
*************** struct t_vmul_dom1
*** 144,164 ****
C(dom) = A(dom) * B(dom);
t1.stop();
! if (!equal(C(0), T(12)))
{
! std::cout << "t_vmul1: ERROR" << std::endl;
abort();
}
for (index_type i=0; i<size; ++i)
! test_assert(equal(C(i), A(i) * B(i)));
time = t1.delta();
}
};
!
template <typename T, typename ComplexFmt>
struct t_vmul2
{
--- 144,164 ----
C(dom) = A(dom) * B(dom);
t1.stop();
! if (!equal(C.get(0), T(12)))
{
! std::cout << "t_vmul_dom1: ERROR" << std::endl;
abort();
}
for (index_type i=0; i<size; ++i)
! test_assert(equal(C.get(i), A.get(i) * B.get(i)));
time = t1.delta();
}
};
! #ifdef VSIP_IMPL_SOURCERY_VPP
template <typename T, typename ComplexFmt>
struct t_vmul2
{
*************** struct t_vmul2
*** 179,186 ****
Vector<T, block_type> B(size, T());
Vector<T, block_type> C(size);
! A(0) = T(3);
! B(0) = T(4);
vsip::impl::profile::Timer t1;
--- 179,186 ----
Vector<T, block_type> B(size, T());
Vector<T, block_type> C(size);
! A.put(0, T(3));
! B.put(0, T(4));
vsip::impl::profile::Timer t1;
*************** struct t_vmul2
*** 189,204 ****
C = A * B;
t1.stop();
! if (!equal(C(0), T(12)))
{
! std::cout << "t_vmul1: ERROR" << std::endl;
abort();
}
time = t1.delta();
}
};
!
/***********************************************************************
--- 189,204 ----
C = A * B;
t1.stop();
! if (!equal(C.get(0), T(12)))
{
! std::cout << "t_vmul2: ERROR" << std::endl;
abort();
}
time = t1.delta();
}
};
! #endif // VSIP_IMPL_SOURCERY_VPP
/***********************************************************************
*************** struct t_rcvmul1
*** 231,241 ****
t1.start();
for (index_type l=0; l<loop; ++l)
! C = A * B;
t1.stop();
for (index_type i=0; i<size; ++i)
! test_assert(equal(C(i), A(i) * B(i)));
time = t1.delta();
}
--- 231,241 ----
t1.start();
for (index_type l=0; l<loop; ++l)
! C = B * A;
t1.stop();
for (index_type i=0; i<size; ++i)
! test_assert(equal(C.get(i), A.get(i) * B.get(i)));
time = t1.delta();
}
*************** struct t_svmul1
*** 269,276 ****
Rand<T> gen(0, 0);
A = gen.randu(size);
! A(0) = T(4);
!
vsip::impl::profile::Timer t1;
t1.start();
--- 269,276 ----
Rand<T> gen(0, 0);
A = gen.randu(size);
! A.put(0, T(4));
!
vsip::impl::profile::Timer t1;
t1.start();
*************** struct t_svmul1
*** 279,285 ****
t1.stop();
for (index_type i=0; i<size; ++i)
! test_assert(equal(C(i), alpha * A(i)));
time = t1.delta();
}
--- 279,285 ----
t1.stop();
for (index_type i=0; i<size; ++i)
! test_assert(equal(C.get(i), alpha * A.get(i)));
time = t1.delta();
}
*************** struct t_svmul2
*** 305,311 ****
T alpha = T(3);
! A(0) = T(4);
vsip::impl::profile::Timer t1;
--- 305,311 ----
T alpha = T(3);
! A.put(0, T(4));
vsip::impl::profile::Timer t1;
*************** struct t_svmul2
*** 314,320 ****
C = A * alpha;
t1.stop();
! test_assert(equal(C(0), T(12)));
time = t1.delta();
}
--- 314,320 ----
C = A * alpha;
t1.stop();
! test_assert(equal(C.get(0), T(12)));
time = t1.delta();
}
*************** test(Loop1P& loop, int what)
*** 336,343 ****
--- 336,345 ----
{
case 1: loop(t_vmul1<float>()); break;
case 2: loop(t_vmul1<complex<float> >()); break;
+ #ifdef VSIP_IMPL_SOURCERY_VPP
case 3: loop(t_vmul2<complex<float>, impl::Cmplx_inter_fmt>()); break;
case 4: loop(t_vmul2<complex<float>, impl::Cmplx_split_fmt>()); break;
+ #endif
case 5: loop(t_rcvmul1<float>()); break;
case 11: loop(t_svmul1<float, float>()); break;