File: vsip/impl/simd/rscvmul.hpp
1|
2|
3|
4|
5|
6|
7|
8|
9|
10| #ifndef VSIP_IMPL_SIMD_RSCVMUL_HPP
11| #define VSIP_IMPL_SIMD_RSCVMUL_HPP
12|
13|
14|
15|
16|
17| #include <complex>
18|
19| #include <vsip/impl/simd/simd.hpp>
20| #include <vsip/impl/metaprogramming.hpp>
21|
22| #define VSIP_IMPL_INLINE_LIBSIMD 0
23|
24|
25|
26|
27|
28|
29|
30| namespace vsip
31| {
32| namespace impl
33| {
34| namespace simd
35| {
36|
37|
38|
39|
40|
41| template <typename T,
42| bool IsSplit>
43| struct Is_algorithm_supported<T, IsSplit, Alg_rscvmul>
44| {
45| static bool const value =
46| Simd_traits<T>::is_accel &&
47| (Type_equal<T, float>::value ||
48| Type_equal<T, double>::value);
49| };
50|
51|
52|
53|
54|
55| template <typename T,
56| bool Is_vectorized>
57| struct Simd_rscvmul;
58|
59|
60|
61|
62|
63| template <typename T>
64| struct Simd_rscvmul<std::complex<T>, false>
65| {
66| static void exec(T alpha, std::complex<T>* B, std::complex<T>* R, int n)
67| {
68| while (n)
69| {
70| *R = alpha * *B;
71| R++; B++;
72| n--;
73| }
74| }
75| };
76|
77|
78|
79|
80|
81|
82| template <typename T>
83| struct Simd_rscvmul<std::complex<T>, true>
84| {
85| static void exec(
86| T alpha,
87| std::complex<T>* B,
88| std::complex<T>* R,
89| int n)
90| {
91| typedef Simd_traits<T> simd;
92| typedef typename simd::simd_type simd_type;
93|
94| // handle mis-aligned vectors
95| if (simd::alignment_of((T*)R) != simd::alignment_of((T*)B))
96| {
97| // PROFILE
98| while (n)
99| {
100| *R = alpha * *B;
101| R++; B++;
102| n--;
103| }
104| return;
105| }
106|
107| // clean up initial unaligned values
108| while (simd::alignment_of((T*)B) != 0)
109| {
110| *R = alpha * *B;
111| R++; B++;
112| n--;
113| }
114|
115| if (n == 0) return;
116|
117| simd::enter();
118|
119| simd_type regA = simd::load_scalar_all(alpha);
120|
121| while (n >= simd::vec_size)
122| {
123| n -= simd::vec_size;
124|
125| simd_type regB1 = simd::load((T*)B);
126| simd_type regB2 = simd::load((T*)B + simd::vec_size);
127|
128| simd_type regR1 = simd::mul(regA, regB1);
129| simd_type regR2 = simd::mul(regA, regB2);
130|
131| simd::store((T*)R, regR1);
132| simd::store((T*)R + simd::vec_size, regR2);
133|
134| B+=simd::vec_size; R+=simd::vec_size;
135| }
136|
137| simd::exit();
138|
139| while (n)
140| {
141| *R = alpha * *B;
142| R++; B++;
143| n--;
144| }
145| }
146| };
147|
148|
149|
150|
151|
152|
153| template <typename T>
154| struct Simd_rscvmul<std::pair<T, T>, false>
155| {
156| static void exec(
157| T alpha,
158| std::pair<T*, T*> const& B,
159| std::pair<T*, T*> const& R,
160| int n)
161| {
162| T const* pBr = B.first;
163| T const* pBi = B.second;
164|
165| T* pRr = R.first;
166| T* pRi = R.second;
167|
168| while (n)
169| {
170| *pRr = alpha * *pBr;
171| *pRi = alpha * *pBi;
172| pRr++; pRi++;
173| pBr++; pBi++;
174| n--;
175| }
176| }
177| };
178|
179|
180|
181|
182|
183|
184| template <typename T>
185| struct Simd_rscvmul<std::pair<T, T>, true>
186| {
187| static void exec(
188| T alpha,
189| std::pair<T*, T*> const& B,
190| std::pair<T*, T*> const& R,
191| int n)
192| {
193| typedef Simd_traits<T> simd;
194| typedef typename simd::simd_type simd_type;
195|
196| T const* pBr = B.first;
197| T const* pBi = B.second;
198|
199| T* pRr = R.first;
200| T* pRi = R.second;
201|
202| // handle mis-aligned vectors
203| if (simd::alignment_of(pRr) != simd::alignment_of(pRi) ||
204| simd::alignment_of(pRr) != simd::alignment_of(pBr) ||
205| simd::alignment_of(pRr) != simd::alignment_of(pBi))
206| {
207| // PROFILE
208| while (n)
209| {
210| *pRr = alpha * *pBr;
211| *pRi = alpha * *pBi;
212| pRr++; pRi++;
213| pBr++; pBi++;
214| n--;
215| }
216| return;
217| }
218|
219| // clean up initial unaligned values
220| while (simd::alignment_of(pRr) != 0)
221| {
222| *pRr = alpha * *pBr;
223| *pRi = alpha * *pBi;
224| pRr++; pRi++;
225| pBr++; pBi++;
226| n--;
227| }
228|
229| if (n == 0) return;
230|
231| simd::enter();
232|
233| simd_type regA = simd::load_scalar_all(alpha);
234|
235| while (n >= simd::vec_size)
236| {
237| n -= simd::vec_size;
238|
239| simd_type Br = simd::load((T*)pBr);
240| simd_type Bi = simd::load((T*)pBi);
241|
242| simd_type Rr = simd::mul(regA, Br);
243| simd_type Ri = simd::mul(regA, Bi);
244|
245| simd::store_stream(pRr, Rr);
246| simd::store_stream(pRi, Ri);
247|
248| pRr += simd::vec_size; pRi += simd::vec_size;
249| pBr += simd::vec_size; pBi += simd::vec_size;
250| }
251|
252| simd::exit();
253|
254| while (n)
255| {
256| *pRr = alpha * *pBr;
257| *pRi = alpha * *pBi;
258| pRr++; pRi++;
259| pBr++; pBi++;
260| n--;
261| }
262| }
263| };
264|
265|
266|
267|
268|
269|
270| #if VSIP_IMPL_INLINE_LIBSIMD
271|
272| template <typename T>
273| inline void
274| rscvmul(
275| T op1,
276| std::complex<T>* op2,
277| std::complex<T>* res,
278| int size)
279| {
280| static bool const Is_vectorized =
281| Is_algorithm_supported<T, false, Alg_rscvmul>::value;
282| Simd_rscvmul<T, Is_vectorized>::exec(op1, op2, res, size);
283| }
284|
285| template <typename T>
286| inline void
287| rscvmul(
288| T op1,
289| std::pair<T*,T*> op2,
290| std::pair<T*,T*> res,
291| int size)
292| {
293| static bool const Is_vectorized =
294| Is_algorithm_supported<T, true, Alg_rscvmul>::value;
295| Simd_rscvmul<std::pair<T,T>, Is_vectorized>::exec(op1, op2, res, size);
296| }
297|
298| #else
299|
300| template <typename T>
301| void
302| rscvmul(
303| T op1,
304| std::complex<T>* op2,
305| std::complex<T>* res,
306| int size);
307|
308| template <typename T>
309| void
310| rscvmul(
311| T op1,
312| std::pair<T*,T*> op2,
313| std::pair<T*,T*> res,
314| int size);
315|
316| #endif // VSIP_IMPL_INLINE_LIBSIMD
317|
318|
319| }
320| }
321| }
322|
323| #endif // VSIP_IMPL_SIMD_VMUL_HPP