Actions

icon Post
text/html Subscribe
text/html Unsubscribe

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

pwarp - simd_spu fixes


  • To: VSIPL++ Developers List <vsipl++@xxxxxxxxxxxxxxxx>
  • Subject: pwarp - simd_spu fixes
  • From: Jules Bergmann <jules@xxxxxxxxxxxxxxxx>
  • Date: Thu, 13 Dec 2007 14:56:08 -0500

Somehow I managed to break lots of bits in simd_spu.hpp at the very last minute before committing -- and I thought I tested this!

I also appear to have missed checking in my changelog.

Patch applied.

				-- Jules
--
Jules Bergmann
CodeSourcery
jules@xxxxxxxxxxxxxxxx
(650) 331-3385 x705
Index: ChangeLog
===================================================================
--- ChangeLog	(revision 189309)
+++ ChangeLog	(working copy)
@@ -1,3 +1,48 @@
+2007-12-13  Jules Bergmann  <jules@xxxxxxxxxxxxxxxx>
+
+	* src/vsip/opt/simd/simd_spu.hpp: Fix all sorts of typos!
+	
+2007-12-13  Jules Bergmann  <jules@xxxxxxxxxxxxxxxx>
+
+	Fix error_db to work with unsigned and low-precision types.
+	* src/vsip/core/lvalue_proxy.hpp (Is_lvalue_proxy_type): New trait to
+	  identify an lvalue proxy type.
+	* src/vsip/core/fns_elementwise.hpp: Handle scalar lvalue proxy types.
+	* src/vsip/core/view_cast.hpp: Fix typo in expr block type.
+	* src/vsip/core/fns_scalar.hpp (magsq): avoid using abs (inefficient
+	  for complex, overload ambiguous for integers).
+	* tests/vsip_csl/error_db.cpp: New file, unit test for error_db.
+	
+	* src/vsip/opt/cbe/pwarp_params.h: New file, pwarp ALF kernel
+	  parameter block.
+	* src/vsip/opt/cbe/ppu/pwarp.hpp: New file, pwarp ALF kernel bridge.
+	* src/vsip/opt/cbe/ppu/pwarp.cpp: New file, pwarp ALF kernel bridge.
+	* src/vsip/opt/cbe/spu/alf_pwarp_ub.cpp: New file, pwarp ALF kernel.
+	* src/vsip/opt/cbe/ppu/task_manager.hpp: Add tag for uchar pwarp
+	  ALF task.
+	* src/vsip/opt/cbe/spu/GNUmakefile.inc.in: Add rules for C++ ALF
+	  kernels.
+	* configure.ac (CXX_SPU): Define it.
+	* src/vsip/opt/simd/simd.hpp: Add AltiVec unsigned short and
+	  unsigned int support.  Split common traits into ...
+	* src/vsip/opt/simd/simd_common.hpp: ... here, new file.
+	* src/vsip/opt/simd/simd_spu.hpp: New file, SPU SIMD traits.
+	* src/vsip/GNUmakefile.inc.in (src_vsip_cxx_sources): Add pwarp.cpp
+	* src/vsip_csl/error_db.hpp: Cast difference to double, allows
+	  error_db to be used for unsigned types.
+	* src/vsip_csl/img/impl/pwarp_common.hpp: New file, common bits
+	  for perspective warp.
+	* src/vsip_csl/img/impl/pwarp_cbe.hpp: New file, CBE pwarp BE.
+	* src/vsip_csl/img/impl/pwarp_gen.hpp: New file, generic pwarp BE.
+	* src/vsip_csl/img/impl/pwarp_simd.hpp: New file, SIMD pwarp BE.
+	* src/vsip_csl/img/perspective_warp.hpp: New file, API and functional
+	  pwarp impl.
+	* src/vsip_csl/ref_pwarp.hp: New file, reference version of pwarp
+	  algorithm.
+	* tests/vsip_csl/pwarp.cpp: New file, unit test for pwarp.
+	* benchmarks/pwarp.cpp: New file, benchmark for pwarp.
+	* src/vsip_csl/GNUmakefile.inc.in: Install vsip_csl/img/impl/ headers.
+
 2007-12-13  Stefan Seefeld  <stefan@xxxxxxxxxxxxxxxx>
 
 	* src/vsip_csl/GNUmakefile.inc.in: Install vsip_csl/img/ headers.
Index: src/vsip/opt/simd/simd_spu.hpp
===================================================================
--- src/vsip/opt/simd/simd_spu.hpp	(revision 189310)
+++ src/vsip/opt/simd/simd_spu.hpp	(working copy)
@@ -90,7 +90,7 @@
     // Language Extentions for CBEA, section 1.8
     simd_type x0 = *((simd_type*)addr);
     simd_type x1 = *((simd_type*)addr + 1);
-    unsigned int shift = (unsigned int)(ptr) & 15;
+    unsigned int shift = (unsigned int)(addr) & 15;
     return spu_or(spu_slqwbyte(x0, shift),
 		  spu_rlmaskqwbyte(x1, (signed)(shift - 16)));
   }
@@ -105,7 +105,7 @@
   }
 
   static simd_type perm(simd_type x0, simd_type x1, perm_simd_type sh)
-  { return spu_shuffle(x0, x1, spu_and(c, 0x1F)); }
+  { return spu_shuffle(x0, x1, spu_and(sh, 0x1F)); }
 
   static simd_type load_scalar(value_type value)
   { return (simd_type)si_from_float(value); }
@@ -191,7 +191,7 @@
     // Language Extentions for CBEA, section 1.8
     simd_type x0 = *((simd_type*)addr);
     simd_type x1 = *((simd_type*)addr + 1);
-    unsigned int shift = (unsigned int)(ptr) & 15;
+    unsigned int shift = (unsigned int)(addr) & 15;
     return spu_or(spu_slqwbyte(x0, shift),
 		  spu_rlmaskqwbyte(x1, (signed)(shift - 16)));
   }
@@ -206,10 +206,10 @@
   }
 
   static simd_type perm(simd_type x0, simd_type x1, perm_simd_type sh)
-  { return spu_shuffle(x0, x1, spu_and(c, 0x1F)); }
+  { return spu_shuffle(x0, x1, spu_and(sh, 0x1F)); }
 
   static simd_type load_scalar(value_type value)
-  { return si_from_short(value); }
+  { return (simd_type)si_from_short(value); }
 
   static simd_type load_scalar_all(value_type value)
   { return spu_splats(value); }
@@ -331,7 +331,7 @@
     // Language Extentions for CBEA, section 1.8
     simd_type x0 = *((simd_type*)addr);
     simd_type x1 = *((simd_type*)addr + 1);
-    unsigned int shift = (unsigned int)(ptr) & 15;
+    unsigned int shift = (unsigned int)(addr) & 15;
     return spu_or(spu_slqwbyte(x0, shift),
 		  spu_rlmaskqwbyte(x1, (signed)(shift - 16)));
   }
@@ -346,10 +346,10 @@
   }
 
   static simd_type perm(simd_type x0, simd_type x1, perm_simd_type sh)
-  { return spu_shuffle(x0, x1, spu_and(c, 0x1F)); }
+  { return spu_shuffle(x0, x1, spu_and(sh, 0x1F)); }
 
   static simd_type load_scalar(value_type value)
-  { return si_from_ushort(value); }
+  { return (simd_type)si_from_ushort(value); }
 
   static simd_type load_scalar_all(value_type value)
   { return spu_splats(value); }
@@ -392,44 +392,56 @@
   }
 
   static simd_type add(simd_type const& v1, simd_type const& v2)
-  { return vec_add(v1, v2); }
+  { return spu_add(v1, v2); }
 
   static simd_type sub(simd_type const& v1, simd_type const& v2)
-  { return vec_sub(v1, v2); }
+  { return spu_sub(v1, v2); }
 
   static simd_type fma(simd_type const& v1, simd_type const& v2,
 		       simd_type const& v3)
-  { return vec_mladd(v1, v2, v3); }
+  { // return vec_mladd(v1, v2, v3);
+    typedef __vector signed short ss_simd_type;
+    typedef __vector signed int   si_simd_type;
+    return ((simd_type)
+	    (spu_shuffle(
+	       spu_madd(
+		 (ss_simd_type)(spu_rl((vec_uint4)(v1), -16)),
+		 (ss_simd_type)(spu_rl((vec_uint4)(v2), -16)),
+		 (si_simd_type)(spu_rl((vec_uint4)(v3), -16))),
+	       spu_madd((ss_simd_type)v1, (ss_simd_type)v2,
+			spu_extend((ss_simd_type)v3)),
+	       ((perm_simd_type){ 2,  3, 18, 19,  6,  7, 22, 23,
+		                 10, 11, 26, 27, 14, 15, 30, 31}))));
+  }
 
+
   static simd_type band(simd_type const& v1, simd_type const& v2)
-  { return vec_and(v1, v2); }
+  { return spu_and(v1, v2); }
 
-  static simd_type band(bool_simd_type const& v1, simd_type const& v2)
-  { return vec_and(v1, v2); }
-
   static simd_type bor(simd_type const& v1, simd_type const& v2)
-  { return vec_or(v1, v2); }
+  { return spu_or(v1, v2); }
 
   static simd_type bxor(simd_type const& v1, simd_type const& v2)
-  { return vec_xor(v1, v2); }
+  { return spu_xor(v1, v2); }
 
   static simd_type bnot(simd_type const& v1)
-  { return vec_nor(v1, v1); }
+  { return spu_nor(v1, v1); }
 
   static bool_simd_type gt(simd_type const& v1, simd_type const& v2)
-  { return vec_cmpgt(v1, v2); }
+  { return spu_cmpgt(v1, v2); }
 
   static bool_simd_type lt(simd_type const& v1, simd_type const& v2)
-  { return vec_cmplt(v1, v2); }
+  { return spu_cmpgt(v2, v1); }
 
   static bool_simd_type ge(simd_type const& v1, simd_type const& v2)
-  { return vec_cmplt(v2, v1); }
+  {
+    bool_simd_type is_lt = spu_cmpgt(v2, v1);
+    return spu_nand(is_lt, is_lt);
+  }
 
   static bool_simd_type le(simd_type const& v1, simd_type const& v2)
-  { return vec_cmpgt(v2, v1); }
+  { return ge(v2, v1); }
 
-#endif
-
   static pack_simd_type pack(simd_type const& v1, simd_type const& v2)
   { // return vec_pack(v1, v2);
     static __vector unsigned char shuf = 
@@ -482,7 +494,7 @@
     // Language Extentions for CBEA, section 1.8
     simd_type x0 = *((simd_type*)addr);
     simd_type x1 = *((simd_type*)addr + 1);
-    unsigned int shift = (unsigned int)(ptr) & 15;
+    unsigned int shift = (unsigned int)(addr) & 15;
     return spu_or(spu_slqwbyte(x0, shift),
 		  spu_rlmaskqwbyte(x1, (signed)(shift - 16)));
   }
@@ -497,10 +509,10 @@
   }
 
   static simd_type perm(simd_type x0, simd_type x1, perm_simd_type sh)
-  { return spu_shuffle(x0, x1, spu_and(c, 0x1F)); }
+  { return spu_shuffle(x0, x1, spu_and(sh, 0x1F)); }
 
   static simd_type load_scalar(value_type value)
-  { return si_from_int(value); }
+  { return (simd_type)si_from_int(value); }
 
   static simd_type load_scalar_all(value_type value)
   { return spu_splats(value); }
@@ -610,7 +622,7 @@
     // Language Extentions for CBEA, section 1.8
     simd_type x0 = *((simd_type*)addr);
     simd_type x1 = *((simd_type*)addr + 1);
-    unsigned int shift = (unsigned int)(ptr) & 15;
+    unsigned int shift = (unsigned int)(addr) & 15;
     return spu_or(spu_slqwbyte(x0, shift),
 		  spu_rlmaskqwbyte(x1, (signed)(shift - 16)));
   }
@@ -625,10 +637,10 @@
   }
 
   static simd_type perm(simd_type x0, simd_type x1, perm_simd_type sh)
-  { return spu_shuffle(x0, x1, spu_and(c, 0x1F)); }
+  { return spu_shuffle(x0, x1, spu_and(sh, 0x1F)); }
 
   static simd_type load_scalar(value_type value)
-  { return si_from_uint(value); }
+  { return (simd_type)si_from_uint(value); }
 
   static simd_type load_scalar_all(value_type value)
   { return spu_splats(value); }
@@ -748,7 +760,7 @@
     // Language Extentions for CBEA, section 1.8
     simd_type x0 = *((simd_type*)addr);
     simd_type x1 = *((simd_type*)addr + 1);
-    unsigned int shift = (unsigned int)(ptr) & 15;
+    unsigned int shift = (unsigned int)(addr) & 15;
     return spu_or(spu_slqwbyte(x0, shift),
 		  spu_rlmaskqwbyte(x1, (signed)(shift - 16)));
   }
@@ -763,7 +775,7 @@
   }
 
   static simd_type perm(simd_type x0, simd_type x1, perm_simd_type sh)
-  { return spu_shuffle(x0, x1, spu_and(c, 0x1F)); }
+  { return spu_shuffle(x0, x1, spu_and(sh, 0x1F)); }
 
   static simd_type load_scalar(value_type value)
   { return (simd_type)si_from_float(value); }
@@ -828,8 +840,7 @@
   }
 
   static simd_type mag(simd_type const& v1)
-  { return ((simd_type)(spu_rlmask(spu_sl((uint_simd_type)(a), 1), -1))); }
-  // { uint_simd_type mask = si_from_uint(0x7fff); return spu_and(mask, v1); }
+  { return ((simd_type)(spu_rlmask(spu_sl((uint_simd_type)(v1), 1), -1))); }
 
   static simd_type min(simd_type const& v1, simd_type const& v2)
   { return spu_sel(v1, v2, spu_cmpgt(v1, v2)); }