Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patch] Fastconv benchmark


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: [patch] Fastconv benchmark
  • From: Don McCoy <don@xxxxxxxxxxxxxxxx>
  • Date: Fri, 31 Mar 2006 12:02:45 -0700

The attached patch updates the fast convolution benchmark by using the new macro VSIP_IMPL_SOURCERY_VPP to separate code dependent on parallel features of the library. This allows it to be compiled against the reference implementation for performance comparisons.

Note that the changes to benchmark.hpp submitted for yesterday's firbank patch are needed for this as well.

Regards,

--
Don McCoy
don (at) CodeSourcery
(888) 776-0262 / (650) 331-3385, x712
2006-03-31  Don McCoy  <don@xxxxxxxxxxxxxxxx>

	* benchmarks/fastconv.cpp: Updated to use benchmarks.hpp.  Separated
	  out parallel-dependent code to allow it to compile against the
	  reference implementation.
	* benchmarks/loop.hpp: Moved references to parallel-related namespaces
	  into the code blocks separated by the PARALLEL_LOOP define.
Index: benchmarks/fastconv.cpp
===================================================================
RCS file: /home/cvs/Repository/vpp/benchmarks/fastconv.cpp,v
retrieving revision 1.4
diff -c -p -r1.4 fastconv.cpp
*** benchmarks/fastconv.cpp	7 Mar 2006 20:09:35 -0000	1.4
--- benchmarks/fastconv.cpp	31 Mar 2006 18:38:33 -0000
***************
*** 17,42 ****
  #include <vsip/support.hpp>
  #include <vsip/math.hpp>
  #include <vsip/signal.hpp>
- #include <vsip/impl/profile.hpp>
- #include <vsip/impl/par-foreach.hpp>
  
! #include "test.hpp"
! #include "loop.hpp"
  
  using namespace vsip;
  
  
  
  /***********************************************************************
    Common definitions
  ***********************************************************************/
  
- int
- fft_ops(length_type len)
- {
-   return int(5 * std::log((float)len) / std::log(2.f));
- }
- 
  template <typename T,
  	  typename ImplTag>
  struct t_fastconv_base;
--- 17,39 ----
  #include <vsip/support.hpp>
  #include <vsip/math.hpp>
  #include <vsip/signal.hpp>
  
! #include "benchmarks.hpp"
  
  using namespace vsip;
  
  
+ #ifdef VSIP_IMPL_SOURCERY_VPP
+ #  define PARALLEL_FASTCONV 1
+ #else
+ #  define PARALLEL_FASTCONV 0
+ #endif
+ 
  
  /***********************************************************************
    Common definitions
  ***********************************************************************/
  
  template <typename T,
  	  typename ImplTag>
  struct t_fastconv_base;
*************** struct t_fastconv_base<T, Impl1op> : fas
*** 74,79 ****
--- 71,77 ----
    void fastconv(length_type npulse, length_type nrange,
  		length_type loop, float& time)
    {
+ #if PARALLEL_FASTCONV
      typedef Map<Block_dist, Whole_dist>      map_type;
      typedef Dense<2, T, row2_type, map_type> block_type;
      typedef Matrix<T, block_type>            view_type;
*************** struct t_fastconv_base<T, Impl1op> : fas
*** 87,92 ****
--- 85,97 ----
      // Create the data cube.
      view_type data(npulse, nrange, map);
      view_type tmp(npulse, nrange, map);
+ #else
+     typedef Matrix<T>  view_type;
+     typedef Vector<T>  replica_view_type;
+ 
+     view_type data(npulse, nrange);
+     view_type tmp(npulse, nrange);
+ #endif
      
      // Create the pulse replica
      replica_view_type replica(nrange);
*************** struct t_fastconv_base<T, Impl1pip2> : f
*** 258,263 ****
--- 263,269 ----
    void fastconv(length_type npulse, length_type nrange,
  		length_type loop, float& time)
    {
+ #if PARALLEL_FASTCONV
      typedef Map<Block_dist, Whole_dist>      map_type;
      typedef Dense<2, T, row2_type, map_type> block_type;
      typedef Matrix<T, block_type>            view_type;
*************** struct t_fastconv_base<T, Impl1pip2> : f
*** 270,276 ****
  
      // Create the data cube.
      view_type data(npulse, nrange, map);
!     
      // Create the pulse replica
      Vector<T> tmp(nrange);
      replica_view_type replica(nrange);
--- 276,288 ----
  
      // Create the data cube.
      view_type data(npulse, nrange, map);
! #else
!     typedef Matrix<T>  view_type;
!     typedef Vector<T>  replica_view_type;
! 
!     view_type data(npulse, nrange);
! #endif
! 
      // Create the pulse replica
      Vector<T> tmp(nrange);
      replica_view_type replica(nrange);
*************** struct t_fastconv_base<T, Impl1pip2> : f
*** 302,320 ****
      t1.start();
      for (index_type l=0; l<loop; ++l)
      {
!       typename view_type::local_type         l_data    = data.local();
!       typename replica_view_type::local_type l_replica = replica.local();
!       length_type                            l_npulse  = l_data.size(0);
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	for_fft(l_data.row(p), tmp);
! 	l_data.row(p) = tmp;
        }
!       l_data = vmmul<0>(l_replica, l_data);
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	inv_fft(l_data.row(p), tmp);
! 	l_data.row(p) = tmp;
        }
      }
      t1.stop();
--- 314,330 ----
      t1.start();
      for (index_type l=0; l<loop; ++l)
      {
!       length_type l_npulse  = LOCAL(data).size(0);
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	for_fft(LOCAL(data).row(p), tmp);
! 	LOCAL(data).row(p) = tmp;
        }
!       LOCAL(data) = vmmul<0>(LOCAL(replica), LOCAL(data));
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	inv_fft(LOCAL(data).row(p), tmp);
! 	LOCAL(data).row(p) = tmp;
        }
      }
      t1.stop();
*************** struct t_fastconv_base<T, Impl2op> : fas
*** 395,400 ****
--- 405,411 ----
    void fastconv(length_type npulse, length_type nrange,
  		length_type loop, float& time)
    {
+ #if PARALLEL_FASTCONV
      typedef Map<Block_dist, Whole_dist>      map_type;
      typedef Dense<2, T, row2_type, map_type> block_type;
      typedef Matrix<T, block_type>            view_type;
*************** struct t_fastconv_base<T, Impl2op> : fas
*** 407,412 ****
--- 418,429 ----
  
      // Create the data cube.
      view_type data(npulse, nrange, map);
+ #else
+     typedef Matrix<T>  view_type;
+     typedef Vector<T>  replica_view_type;
+ 
+     view_type data(npulse, nrange);
+ #endif
      Vector<T> tmp(nrange);
      
      // Create the pulse replica
*************** struct t_fastconv_base<T, Impl2op> : fas
*** 438,451 ****
      t1.start();
      for (index_type l=0; l<loop; ++l)
      {
!       typename view_type::local_type         l_data    = data.local();
!       typename replica_view_type::local_type l_replica = replica.local();
!       length_type                            l_npulse  = l_data.size(0);
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	for_fft(l_data.row(p), tmp);
! 	tmp *= l_replica;
! 	inv_fft(tmp, l_data.row(p));
        }
      }
      t1.stop();
--- 455,466 ----
      t1.start();
      for (index_type l=0; l<loop; ++l)
      {
!       length_type l_npulse  = LOCAL(data).size(0);
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	for_fft(LOCAL(data).row(p), tmp);
! 	tmp *= LOCAL(replica);
! 	inv_fft(tmp, LOCAL(data).row(p));
        }
      }
      t1.stop();
*************** struct t_fastconv_base<T, Impl2ip> : fas
*** 467,472 ****
--- 482,488 ----
    void fastconv(length_type npulse, length_type nrange,
  		length_type loop, float& time)
    {
+ #if PARALLEL_FASTCONV
      typedef Map<Block_dist, Whole_dist>      map_type;
      typedef Dense<2, T, row2_type, map_type> block_type;
      typedef Matrix<T, block_type>            view_type;
*************** struct t_fastconv_base<T, Impl2ip> : fas
*** 479,486 ****
  
      // Create the data cube.
      view_type data(npulse, nrange, map);
-     // Vector<T> tmp(nrange);
      
      // Create the pulse replica
      replica_view_type replica(nrange);
  
--- 495,508 ----
  
      // Create the data cube.
      view_type data(npulse, nrange, map);
      
+ #else
+     typedef Matrix<T>  view_type;
+     typedef Vector<T>  replica_view_type;
+ 
+     view_type data(npulse, nrange);
+ #endif
+ 
      // Create the pulse replica
      replica_view_type replica(nrange);
  
*************** struct t_fastconv_base<T, Impl2ip> : fas
*** 510,523 ****
      t1.start();
      for (index_type l=0; l<loop; ++l)
      {
!       typename view_type::local_type         l_data    = data.local();
!       typename replica_view_type::local_type l_replica = replica.local();
!       length_type                            l_npulse  = l_data.size(0);
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	for_fft(l_data.row(p));
! 	l_data.row(p) *= l_replica;
! 	inv_fft(l_data.row(p));
        }
      }
      t1.stop();
--- 532,543 ----
      t1.start();
      for (index_type l=0; l<loop; ++l)
      {
!       length_type l_npulse  = LOCAL(data).size(0);
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	for_fft(LOCAL(data).row(p));
! 	LOCAL(data).row(p) *= LOCAL(replica);
! 	inv_fft(LOCAL(data).row(p));
        }
      }
      t1.stop();
*************** struct t_fastconv_base<T, Impl2ip_tmp> :
*** 539,544 ****
--- 559,565 ----
    void fastconv(length_type npulse, length_type nrange,
  		length_type loop, float& time)
    {
+ #if PARALLEL_FASTCONV
      typedef Map<Block_dist, Whole_dist>      map_type;
      typedef Dense<2, T, row2_type, map_type> block_type;
      typedef Matrix<T, block_type>            view_type;
*************** struct t_fastconv_base<T, Impl2ip_tmp> :
*** 551,556 ****
--- 572,584 ----
  
      // Create the data cube.
      view_type data(npulse, nrange, map);
+ 
+ #else
+     typedef Matrix<T>  view_type;
+     typedef Vector<T>  replica_view_type;
+ 
+     view_type data(npulse, nrange);
+ #endif
      Vector<T> tmp(nrange);
      
      // Create the pulse replica
*************** struct t_fastconv_base<T, Impl2ip_tmp> :
*** 582,597 ****
      t1.start();
      for (index_type l=0; l<loop; ++l)
      {
!       typename view_type::local_type         l_data    = data.local();
!       typename replica_view_type::local_type l_replica = replica.local();
!       length_type                            l_npulse  = l_data.size(0);
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	tmp = l_data.row(p);
  	for_fft(tmp);
! 	tmp *= l_replica;
  	inv_fft(tmp);
! 	l_data.row(p) = tmp;
        }
      }
      t1.stop();
--- 610,623 ----
      t1.start();
      for (index_type l=0; l<loop; ++l)
      {
!       length_type l_npulse  = LOCAL(data).size(0);
        for (index_type p=0; p<l_npulse; ++p)
        {
! 	tmp = LOCAL(data).row(p);
  	for_fft(tmp);
! 	tmp *= LOCAL(replica);
  	inv_fft(tmp);
! 	LOCAL(data).row(p) = tmp;
        }
      }
      t1.stop();
*************** struct t_fastconv_base<T, Impl2ip_tmp> :
*** 606,611 ****
--- 632,638 ----
  /***********************************************************************
    Impl2fv: foreach_vector, interleaved fast-convolution
  ***********************************************************************/
+ #if PARALLEL_FASTCONV
  
  template <typename T>
  class Fast_convolution
*************** private:
*** 652,658 ****
  };
  
  
- 
  template <typename T>
  struct t_fastconv_base<T, Impl2fv> : fastconv_ops
  {
--- 679,684 ----
*************** struct t_fastconv_base<T, Impl2fv> : fas
*** 681,696 ****
      
      t1.start();
      for (index_type l=0; l<loop; ++l)
-     {
        foreach_vector<tuple<0, 1> >(fconv, data);
-     }
      t1.stop();
  
      // CHECK RESULT
      time = t1.delta();
    }
  };
! 
  
  
  /***********************************************************************
--- 707,720 ----
      
      t1.start();
      for (index_type l=0; l<loop; ++l)
        foreach_vector<tuple<0, 1> >(fconv, data);
      t1.stop();
  
      // CHECK RESULT
      time = t1.delta();
    }
  };
! #endif // PARALLEL_FASTCONV
  
  
  /***********************************************************************
*************** test(Loop1P& loop, int what)
*** 772,778 ****
--- 796,804 ----
    case  5: loop(t_fastconv_pf<complex<float>, Impl2op>(param1)); break;
    case  6: loop(t_fastconv_pf<complex<float>, Impl2ip>(param1)); break;
    case  7: loop(t_fastconv_pf<complex<float>, Impl2ip_tmp>(param1)); break;
+ #if PARALLEL_FASTCONV
    case  8: loop(t_fastconv_pf<complex<float>, Impl2fv>(param1)); break;
+ #endif
  
    case  9: loop(t_fastconv_pf<complex<float>, Impl1pip2_nopar>(param1)); break;
  
*************** test(Loop1P& loop, int what)
*** 783,789 ****
--- 809,817 ----
    case 15: loop(t_fastconv_rf<complex<float>, Impl2op>(param1)); break;
    case 16: loop(t_fastconv_rf<complex<float>, Impl2ip>(param1)); break;
    case 17: loop(t_fastconv_rf<complex<float>, Impl2ip_tmp>(param1)); break;
+ #if PARALLEL_FASTCONV
    case 18: loop(t_fastconv_rf<complex<float>, Impl2fv>(param1)); break;
+ #endif
  
    default: return 0;
    }
Index: benchmarks/loop.hpp
===================================================================
RCS file: /home/cvs/Repository/vpp/benchmarks/loop.hpp,v
retrieving revision 1.13
diff -c -p -r1.13 loop.hpp
*** benchmarks/loop.hpp	24 Mar 2006 12:36:05 -0000	1.13
--- benchmarks/loop.hpp	31 Mar 2006 18:39:10 -0000
***************
*** 17,23 ****
  #include <algorithm>
  #include <vector>
  
- //#include <vsip/impl/profile.hpp>
  #include <vsip/vector.hpp>
  #include <vsip/math.hpp>
  
--- 17,22 ----
*************** Loop1P::sweep(Functor fcn)
*** 182,189 ****
    using vsip::Index;
    using vsip::Vector;
    using vsip::Dense;
-   using vsip::Map;
-   using vsip::Global_map;
    using vsip::row1_type;
  
    size_t   loop, M;
--- 181,186 ----
*************** Loop1P::sweep(Functor fcn)
*** 198,203 ****
--- 195,202 ----
    std::vector<float> mtime(n_time);
  
  #if PARALLEL_LOOP
+   using vsip::Map;
+   using vsip::Global_map;
    Vector<float, Dense<1, float, row1_type, Map<> > >
      dist_time(nproc, Map<>(nproc));
    Vector<float, Dense<1, float, row1_type, Global_map<1> > > glob_time(nproc);
*************** Loop1P::steady(Functor fcn)
*** 342,349 ****
    using vsip::Index;
    using vsip::Vector;
    using vsip::Dense;
-   using vsip::Map;
-   using vsip::Global_map;
    using vsip::row1_type;
  
    size_t   loop, M;
--- 341,346 ----
*************** Loop1P::steady(Functor fcn)
*** 354,359 ****
--- 351,358 ----
    PROCESSOR_TYPE    nproc = NUM_PROCESSORS();
  
  #if PARALLEL_LOOP
+   using vsip::Map;
+   using vsip::Global_map;
    Vector<float, Dense<1, float, row1_type, Map<> > >
      dist_time(nproc, Map<>(nproc));
    Vector<float, Dense<1, float, row1_type, Global_map<1> > > glob_time(nproc);