File: vsip/impl/lapack.hpp
1|
2|
3|
4|
5|
6|
7|
8|
9|
10|
11|
12|
13|
14|
15|
16|
17|
18|
19|
20|
21|
22| #ifndef VSIP_IMPL_LAPACK_HPP
23| #define VSIP_IMPL_LAPACK_HPP
24|
25|
26|
27|
28|
29| #include <complex>
30|
31| #include <vsip/support.hpp>
32| #include <vsip/impl/config.hpp>
33| #include <vsip/impl/metaprogramming.hpp>
34|
35| #define CVT_TRANSPOSE(c) \
36| (((c) == 'N' || (c) == 'n') ? CblasNoTrans : \
37| ((c) == 'T' || (c) == 't') ? CblasTrans : \
38| ((c) == 'C' || (c) == 'c') ? CblasConjTrans : \
39| CblasNoTrans)
40|
41| #define CVT_UPLO(c) \
42| (((c) == 'U' || (c) == 'u') ? CblasUpper : \
43| ((c) == 'L' || (c) == 'l') ? CblasLower : \
44| CblasLower)
45|
46| #define CVT_DIAG(c) \
47| (((c) == 'U' || (c) == 'u') ? CblasUnit : \
48| ((c) == 'N' || (c) == 'n') ? CblasNonUnit : \
49| CblasNonUnit)
50|
51| #define CVT_SIDE(c) \
52| (((c) == 'L' || (c) == 'l') ? CblasLeft : \
53| ((c) == 'R' || (c) == 'r') ? CblasRight : \
54| CblasRight)
55|
56|
57|
58|
59|
60| extern "C"
61| {
62|
63|
64|
65|
66|
67|
68|
69|
70|
71|
72|
73|
74|
75|
76|
77|
78| #if VSIP_IMPL_USE_CBLAS == 1
79| # include <cblas.h>
80| # define VSIP_IMPL_USE_CBLAS_DOT 1
81| # define VSIP_IMPL_USE_CBLAS_OTHERS 1
82| #elif VSIP_IMPL_USE_CBLAS == 2
83| # include <mkl_cblas.h>
84| # define VSIP_IMPL_USE_CBLAS_DOT 1
85| # define VSIP_IMPL_USE_CBLAS_OTHERS 1
86| #elif VSIP_IMPL_USE_CBLAS == 3
87| # include <vsip/impl/lapack/acml_cblas.hpp>
88| # define VSIP_IMPL_USE_CBLAS_DOT 1
89| # define VSIP_IMPL_USE_CBLAS_OTHERS 0
90| #else
91| # define VSIP_IMPL_USE_CBLAS_DOT 0
92| # define VSIP_IMPL_USE_CBLAS_OTHERS 0
93| #endif
94| }
95|
96|
97|
98|
99|
100|
101|
102| namespace vsip
103| {
104|
105| namespace impl
106| {
107|
108| namespace blas
109| {
110|
111| extern "C"
112| {
113| typedef int* I;
114| typedef float* S;
115| typedef double* D;
116| typedef std::complex<float>* C;
117| typedef std::complex<double>* Z;
118|
119| #if !VSIP_IMPL_USE_CBLAS_DOT
120| // dot
121| VSIP_IMPL_FORTRAN_FLOAT_RETURN sdot_ (I, S, I, S, I);
122| double ddot_ (I, D, I, D, I);
123|
124| void cdotu_(C, I, C, I, C, I);
125| void zdotu_(Z, I, Z, I, Z, I);
126|
127| void cdotc_(C, I, C, I, C, I);
128| void zdotc_(Z, I, Z, I, Z, I);
129| #endif
130|
131| #if !VSIP_IMPL_USE_CBLAS_OTHERS
132| // trsm
133| void strsm_ (char*, char*, char*, char*, I, I, S, S, I, S, I);
134| void dtrsm_ (char*, char*, char*, char*, I, I, D, D, I, D, I);
135| void ctrsm_ (char*, char*, char*, char*, I, I, C, C, I, C, I);
136| void ztrsm_ (char*, char*, char*, char*, I, I, Z, Z, I, Z, I);
137|
138| // gemv
139| void sgemv_(char*, I, I, S, S, I, S, I, S, S, I);
140| void dgemv_(char*, I, I, D, D, I, D, I, D, D, I);
141| void cgemv_(char*, I, I, C, C, I, C, I, C, C, I);
142| void zgemv_(char*, I, I, Z, Z, I, Z, I, Z, Z, I);
143|
144| // gemm
145| void sgemm_(char*, char*, I, I, I, S, S, I, S, I, S, S, I);
146| void dgemm_(char*, char*, I, I, I, D, D, I, D, I, D, D, I);
147| void cgemm_(char*, char*, I, I, I, C, C, I, C, I, C, C, I);
148| void zgemm_(char*, char*, I, I, I, Z, Z, I, Z, I, Z, Z, I);
149|
150| // ger
151| void sger_ ( I, I, S, S, I, S, I, S, I );
152| void dger_ ( I, I, D, D, I, D, I, D, I );
153| void cgerc_ ( I, I, C, C, I, C, I, C, I );
154| void zgerc_ ( I, I, Z, Z, I, Z, I, Z, I );
155| void cgeru_ ( I, I, C, C, I, C, I, C, I );
156| void zgeru_ ( I, I, Z, Z, I, Z, I, Z, I );
157| #endif
158| };
159|
160| #define VSIP_IMPL_CBLAS_DOT(T, VPPFCN, BLASFCN)
161| inline T
162| VPPFCN(int n,
163| T* x, int incx,
164| T* y, int incy)
165| {
166| return BLASFCN(n, x, incx, y, incy);
167| }
168|
169| #define VSIP_IMPL_BLAS_DOT(T, VPPFCN, BLASFCN)
170| inline T
171| VPPFCN(int n,
172| T* x, int incx,
173| T* y, int incy)
174| {
175| return BLASFCN(&n, x, &incx, y, &incy);
176| }
177|
178| #if VSIP_IMPL_USE_CBLAS_DOT
179| VSIP_IMPL_CBLAS_DOT(float, dot, cblas_sdot)
180| VSIP_IMPL_CBLAS_DOT(double, dot, cblas_ddot)
181| #else
182| VSIP_IMPL_BLAS_DOT(float, dot, sdot_)
183| VSIP_IMPL_BLAS_DOT(double, dot, ddot_)
184| #endif // VSIP_IMPL_USE_CBLAS_DOT
185|
186| #undef VSIP_IMPL_BLAS_DOT
187|
188|
189|
190| #define VSIP_IMPL_CBLAS_CDOT(T, VPPFCN, BLASFCN)
191| inline T
192| VPPFCN(int n,
193| T* x, int incx,
194| T* y, int incy)
195| {
196| T ret;
197| BLASFCN(n,
198| static_cast<const void*>(x), incx,
199| static_cast<const void*>(y), incy, &ret);
200| return ret;
201| }
202|
203| #define VSIP_IMPL_BLAS_CDOT(T, VPPFCN, BLASFCN)
204| inline T
205| VPPFCN(int n,
206| T* x, int incx,
207| T* y, int incy)
208| {
209| T ret;
210| BLASFCN(&ret, &n, x, &incx, y, &incy);
211| return ret;
212| }
213|
214|
215| #if VSIP_IMPL_USE_CBLAS_DOT
216| VSIP_IMPL_CBLAS_CDOT(std::complex<float>, dot, cblas_cdotu_sub)
217| VSIP_IMPL_CBLAS_CDOT(std::complex<double>, dot, cblas_zdotu_sub)
218|
219| VSIP_IMPL_CBLAS_CDOT(std::complex<float>, dotc, cblas_cdotc_sub)
220| VSIP_IMPL_CBLAS_CDOT(std::complex<double>, dotc, cblas_zdotc_sub)
221| #else
222| VSIP_IMPL_BLAS_CDOT(std::complex<float>, dot, cdotu_)
223| VSIP_IMPL_BLAS_CDOT(std::complex<double>, dot, zdotu_)
224|
225| VSIP_IMPL_BLAS_CDOT(std::complex<float>, dotc, cdotc_)
226| VSIP_IMPL_BLAS_CDOT(std::complex<double>, dotc, zdotc_)
227| #endif
228|
229| #undef VSIP_IMPL_BLAS_CDOT
230|
231|
232|
233| #define VSIP_IMPL_BLAS_TRSM(T, FCN)
234| inline void
235| trsm(char side, char uplo,char transa,char diag,
236| int m, int n,
237| T alpha,
238| T *a, int lda,
239| T *b, int ldb)
240| {
241| FCN(&side, &uplo, &transa, &diag,
242| &m, &n,
243| &alpha,
244| a, &lda,
245| b, &ldb);
246| }
247|
248|
249|
250|
251|
252|
253|
254| inline float cblas_scalar_cast(float arg) { return arg; }
255| inline double cblas_scalar_cast(double arg) { return arg; }
256| inline void* cblas_scalar_cast(std::complex<float> &arg)
257| { return static_cast<void*> (&arg); }
258| inline void* cblas_scalar_cast(std::complex<double> &arg)
259| { return static_cast<void*> (&arg); }
260|
261|
262| #define VSIP_IMPL_CBLAS_TRSM(T, FCN)
263| inline void
264| trsm(char side, char uplo,char transa,char diag,
265| int m, int n,
266| T alpha,
267| T *a, int lda,
268| T *b, int ldb)
269| {
270| FCN(CblasColMajor,
271| CVT_SIDE(side), CVT_UPLO(uplo),CVT_TRANSPOSE(transa),
272| CVT_DIAG(diag),
273| m, n,
274| cblas_scalar_cast(alpha),
275| a, lda,
276| b, ldb);
277| }
278|
279| #if VSIP_IMPL_USE_CBLAS_OTHERS
280| VSIP_IMPL_CBLAS_TRSM(float, cblas_strsm)
281| VSIP_IMPL_CBLAS_TRSM(double, cblas_dtrsm)
282| VSIP_IMPL_CBLAS_TRSM(std::complex<float>, cblas_ctrsm)
283| VSIP_IMPL_CBLAS_TRSM(std::complex<double>,cblas_ztrsm)
284| #else
285| VSIP_IMPL_BLAS_TRSM(float, strsm_)
286| VSIP_IMPL_BLAS_TRSM(double, dtrsm_)
287| VSIP_IMPL_BLAS_TRSM(std::complex<float>, ctrsm_)
288| VSIP_IMPL_BLAS_TRSM(std::complex<double>, ztrsm_)
289| #endif
290|
291| #undef VSIP_IMPL_CBLAS_TRSM
292| #undef VSIP_IMPL_BLAS_TRSM
293|
294|
295|
296| #define VSIP_IMPL_BLAS_GEMV(T, FCN)
297| inline void
298| gemv(char transa,
299| int m, int n,
300| T alpha,
301| T *a, int lda,
302| T *x, int incx,
303| T beta,
304| T *y, int incy)
305| {
306| FCN(&transa,
307| &m, &n,
308| &alpha,
309| a, &lda,
310| x, &incx,
311| &beta,
312| y, &incy);
313| }
314|
315| #define VSIP_IMPL_CBLAS_GEMV(T, FCN)
316| inline void
317| gemv(char transa,
318| int m, int n,
319| T alpha,
320| T *a, int lda,
321| T *x, int incx,
322| T beta,
323| T *y, int incy)
324| {
325| FCN(CblasColMajor,
326| CVT_TRANSPOSE(transa),
327| m, n,
328| cblas_scalar_cast(alpha),
329| a, lda,
330| x, incx,
331| cblas_scalar_cast(beta),
332| y, incy);
333| }
334|
335|
336| #if VSIP_IMPL_USE_CBLAS_OTHERS
337| VSIP_IMPL_CBLAS_GEMV(float, cblas_sgemv)
338| VSIP_IMPL_CBLAS_GEMV(double, cblas_dgemv)
339| VSIP_IMPL_CBLAS_GEMV(std::complex<float>, cblas_cgemv)
340| VSIP_IMPL_CBLAS_GEMV(std::complex<double>,cblas_zgemv)
341| #else
342| VSIP_IMPL_BLAS_GEMV(float, sgemv_)
343| VSIP_IMPL_BLAS_GEMV(double, dgemv_)
344| VSIP_IMPL_BLAS_GEMV(std::complex<float>, cgemv_)
345| VSIP_IMPL_BLAS_GEMV(std::complex<double>, zgemv_)
346| #endif
347|
348| #undef VSIP_IMPL_CBLAS_GEMV
349| #undef VSIP_IMPL_BLAS_GEMV
350|
351|
352|
353| #define VSIP_IMPL_BLAS_GEMM(T, FCN)
354| inline void
355| gemm(char transa, char transb,
356| int m, int n, int k,
357| T alpha,
358| T *a, int lda,
359| T *b, int ldb,
360| T beta,
361| T *c, int ldc)
362| {
363| FCN(&transa, &transb,
364| &m, &n, &k,
365| &alpha,
366| a, &lda,
367| b, &ldb,
368| &beta,
369| c, &ldc);
370| }
371|
372| #define VSIP_IMPL_CBLAS_GEMM(T, FCN)
373| inline void
374| gemm(char transa, char transb,
375| int m, int n, int k,
376| T alpha,
377| T *a, int lda,
378| T *b, int ldb,
379| T beta,
380| T *c, int ldc)
381| {
382| FCN(CblasColMajor,
383| CVT_TRANSPOSE(transa), CVT_TRANSPOSE(transb),
384| m, n, k,
385| cblas_scalar_cast(alpha),
386| a, lda,
387| b, ldb,
388| cblas_scalar_cast(beta),
389| c, ldc);
390| }
391|
392|
393| #if VSIP_IMPL_USE_CBLAS_OTHERS
394| VSIP_IMPL_CBLAS_GEMM(float, cblas_sgemm)
395| VSIP_IMPL_CBLAS_GEMM(double, cblas_dgemm)
396| VSIP_IMPL_CBLAS_GEMM(std::complex<float>, cblas_cgemm)
397| VSIP_IMPL_CBLAS_GEMM(std::complex<double>, cblas_zgemm)
398| #else
399| VSIP_IMPL_BLAS_GEMM(float, sgemm_)
400| VSIP_IMPL_BLAS_GEMM(double, dgemm_)
401| VSIP_IMPL_BLAS_GEMM(std::complex<float>, cgemm_)
402| VSIP_IMPL_BLAS_GEMM(std::complex<double>, zgemm_)
403| #endif
404|
405| #undef VSIP_IMPL_BLAS_GEMM
406| #undef VSIP_IMPL_CBLAS_GEMM
407|
408|
409|
410| #define VSIP_IMPL_BLAS_GER(T, VPPFCN, FCN)
411| inline void
412| VPPFCN( int m, int n,
413| T alpha,
414| T *x, int incx,
415| T *y, int incy,
416| T *a, int lda)
417| {
418| FCN(&m, &n,
419| &alpha,
420| x, &incx,
421| y, &incy,
422| a, &lda);
423| }
424|
425| #define VSIP_IMPL_CBLAS_GER(T, VPPFCN, FCN)
426| inline void
427| VPPFCN( int m, int n,
428| T alpha,
429| T *x, int incx,
430| T *y, int incy,
431| T *a, int lda)
432| {
433| FCN(CblasColMajor,
434| m, n,
435| cblas_scalar_cast(alpha),
436| x, incx,
437| y, incy,
438| a, lda);
439| }
440|
441| #if VSIP_IMPL_USE_CBLAS_OTHERS
442| VSIP_IMPL_CBLAS_GER(float, ger, cblas_sger)
443| VSIP_IMPL_CBLAS_GER(double, ger, cblas_dger)
444| VSIP_IMPL_CBLAS_GER(std::complex<float>, gerc, cblas_cgerc)
445| VSIP_IMPL_CBLAS_GER(std::complex<double>, gerc, cblas_zgerc)
446| VSIP_IMPL_CBLAS_GER(std::complex<float>, geru, cblas_cgeru)
447| VSIP_IMPL_CBLAS_GER(std::complex<double>, geru, cblas_zgeru)
448| #else
449| VSIP_IMPL_BLAS_GER(float, ger, sger_)
450| VSIP_IMPL_BLAS_GER(double, ger, dger_)
451| VSIP_IMPL_BLAS_GER(std::complex<float>, gerc, cgerc_)
452| VSIP_IMPL_BLAS_GER(std::complex<double>, gerc, zgerc_)
453| VSIP_IMPL_BLAS_GER(std::complex<float>, geru, cgeru_)
454| VSIP_IMPL_BLAS_GER(std::complex<double>, geru, zgeru_)
455| #endif
456|
457| #undef VSIP_IMPL_BLAS_GER
458| #undef VSIP_IMPL_CBLAS_GER
459|
460|
461|
462| template <typename T>
463| struct Blas_traits
464| {
465| static bool const valid = false;
466| };
467|
468| template <>
469| struct Blas_traits<float>
470| {
471| static bool const valid = true;
472| static char const trans = 't';
473| };
474|
475| template <>
476| struct Blas_traits<double>
477| {
478| static bool const valid = true;
479| static char const trans = 't';
480| };
481|
482| template <>
483| struct Blas_traits<std::complex<float> >
484| {
485| static bool const valid = true;
486| static char const trans = 'c';
487| };
488|
489| template <>
490| struct Blas_traits<std::complex<double> >
491| {
492| static bool const valid = true;
493| static char const trans = 'c';
494| };
495|
496| }
497|
498|
499|
500|
501|
502|
503|
504| namespace lapack
505| {
506|
507| extern "C"
508| {
509| typedef int* I;
510| typedef float* S;
511| typedef double* D;
512| typedef std::complex<float>* C;
513| typedef std::complex<double>* Z;
514|
515|
516| void sgeqrf_(I, I, S, I, S, S, I, I);
517| void dgeqrf_(I, I, D, I, D, D, I, I);
518| void cgeqrf_(I, I, C, I, C, C, I, I);
519| void zgeqrf_(I, I, Z, I, Z, Z, I, I);
520|
521| void sgeqr2_(I, I, S, I, S, S, I);
522| void dgeqr2_(I, I, D, I, D, D, I);
523| void cgeqr2_(I, I, C, I, C, C, I);
524| void zgeqr2_(I, I, Z, I, Z, Z, I);
525|
526| void sormqr_(char*, char*, I, I, I, S, I, S, S, I, S, I, I);
527| void dormqr_(char*, char*, I, I, I, D, I, D, D, I, D, I, I);
528| void cunmqr_(char*, char*, I, I, I, C, I, C, C, I, C, I, I);
529| void zunmqr_(char*, char*, I, I, I, Z, I, Z, Z, I, Z, I, I);
530|
531| void sgebrd_(I, I, S, I, S, S, S, S, S, I, I);
532| void dgebrd_(I, I, D, I, D, D, D, D, D, I, I);
533| void cgebrd_(I, I, C, I, S, S, C, C, C, I, I);
534| void zgebrd_(I, I, Z, I, D, D, Z, Z, Z, I, I);
535|
536| void sorgbr_(char*, I, I, I, S, I, S, S, I, I);
537| void dorgbr_(char*, I, I, I, D, I, D, D, I, I);
538| void cungbr_(char*, I, I, I, C, I, C, C, I, I);
539| void zungbr_(char*, I, I, I, Z, I, Z, Z, I, I);
540|
541| void sbdsqr_(char*, I, I, I, I, S, S, S, I, S, I, S, I, S, I);
542| void dbdsqr_(char*, I, I, I, I, D, D, D, I, D, I, D, I, D, I);
543| void cbdsqr_(char*, I, I, I, I, S, S, C, I, C, I, C, I, C, I);
544| void zbdsqr_(char*, I, I, I, I, D, D, Z, I, Z, I, Z, I, Z, I);
545|
546| void spotrf_(char*, I, S, I, I);
547| void dpotrf_(char*, I, D, I, I);
548| void cpotrf_(char*, I, C, I, I);
549| void zpotrf_(char*, I, Z, I, I);
550|
551| void spotrs_(char*, I, I, S, I, S, I, I);
552| void dpotrs_(char*, I, I, D, I, D, I, I);
553| void cpotrs_(char*, I, I, C, I, C, I, I);
554| void zpotrs_(char*, I, I, Z, I, Z, I, I);
555|
556| void sgetrf_(I, I, S, I, I, I);
557| void dgetrf_(I, I, D, I, I, I);
558| void cgetrf_(I, I, C, I, I, I);
559| void zgetrf_(I, I, Z, I, I, I);
560|
561| void sgetrs_(char*, I, I, S, I, I, S, I, I);
562| void dgetrs_(char*, I, I, D, I, I, D, I, I);
563| void cgetrs_(char*, I, I, C, I, I, C, I, I);
564| void zgetrs_(char*, I, I, Z, I, I, Z, I, I);
565|
566| #if VSIP_IMPL_USE_LAPACK_ILAENV
567| int ilaenv_(I, char*, char*, I, I, I, I);
568| #endif
569| }
570|
571| #if VSIP_IMPL_USE_LAPACK_ILAENV
572| inline int
573| ilaenv(int ispec, char* name, char* opts, int n1, int n2, int n3, int n4)
574| {
575| return ilaenv_(&ispec, name, opts, &n1, &n2, &n3, &n4);
576| }
577| #else
578| inline int
579| ilaenv(int, char*, char*, int , int , int , int)
580| {
581| return 80;
582| }
583| #endif
584|
585|
586|
587| #define VSIP_IMPL_LAPACK_GEQRF(T, FCN, NAME)
588| inline void geqrf(int m, int n, T* a, int lda, T* tau, T* work,
589| int& lwork)
590| {
591| int info;
592| FCN(&m, &n, a, &lda, tau, work, &lwork, &info);
593| if (info != 0)
594| {
595| char msg[256];
596| sprintf(msg, "lapack::geqrf -- illegal arg (info=%d)", info);
597| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
598| }
599| }
600| \
601| template <>
602| inline int
603| geqrf_blksize<T >(int m, int n) /* Note [1] */
604| {
605| return ilaenv(1, NAME, "", m, n, -1, -1);
606| }
607|
608| template <typename T>
609| inline int
610| geqrf_blksize(int m, int n);
611|
612|
613| VSIP_IMPL_LAPACK_GEQRF(float, sgeqrf_, "sgeqrf")
614| VSIP_IMPL_LAPACK_GEQRF(double, dgeqrf_, "dgeqrf")
615| VSIP_IMPL_LAPACK_GEQRF(std::complex<float>, cgeqrf_, "cgeqrf")
616| VSIP_IMPL_LAPACK_GEQRF(std::complex<double>, zgeqrf_, "zgeqrf")
617|
618| #undef VSIP_IMPL_LAPACK_GEQRF
619|
620|
621|
622| #define VSIP_IMPL_LAPACK_GEQR2(T, FCN, NAME)
623| inline void geqr2(int m, int n, T* a, int lda, T* tau, T* work,
624| int& /*lwork*/)
625| {
626| int info;
627| FCN(&m, &n, a, &lda, tau, work, &info);
628| if (info != 0)
629| {
630| char msg[256];
631| sprintf(msg, "lapack::geqr2 -- illegal arg (info=%d)", info);
632| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
633| }
634| }
635| \
636| template <>
637| inline int
638| geqr2_blksize<T >(int m, int n) /* Note [1] */
639| {
640| return ilaenv(1, NAME, "", m, n, -1, -1);
641| }
642|
643| template <typename T>
644| inline int
645| geqr2_blksize(int m, int n);
646|
647|
648| VSIP_IMPL_LAPACK_GEQR2(float, sgeqr2_, "sgeqr2")
649| VSIP_IMPL_LAPACK_GEQR2(double, dgeqr2_, "dgeqr2")
650| VSIP_IMPL_LAPACK_GEQR2(std::complex<float>, cgeqr2_, "cgeqr2")
651| VSIP_IMPL_LAPACK_GEQR2(std::complex<double>, zgeqr2_, "zgeqr2")
652|
653| #undef VSIP_IMPL_LAPACK_GEQR2
654|
655|
656|
657| #define VSIP_IMPL_LAPACK_MQR(T, FCN, NAME)
658| inline void mqr(char side, char trans,
659| int m, int n, int k,
660| T *a, int lda,
661| T *tau,
662| T *c, int ldc,
663| T *work, int& lwork)
664| {
665| int info;
666| FCN(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info); \
667| if (info != 0)
668| {
669| char msg[256];
670| sprintf(msg, "lapack::mqr -- illegal arg (info=%d)", info);
671| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
672| }
673| }
674| \
675| template <>
676| inline int
677| mqr_blksize<T >(char side, char trans, int m, int n, int k) /* Note [1] */ \
678| {
679| char arg[4]; arg[0] = side; arg[1] = trans; arg[2] = 0;
680| return ilaenv(1, NAME, arg, m, n, k, -1);
681| }
682|
683| template <typename T>
684| inline int
685| mqr_blksize(char side, char trans, int m, int n, int k);
686|
687| VSIP_IMPL_LAPACK_MQR(float, sormqr_, "sormqr")
688| VSIP_IMPL_LAPACK_MQR(double, dormqr_, "dormqr")
689| VSIP_IMPL_LAPACK_MQR(std::complex<float>, cunmqr_, "cunmqr")
690| VSIP_IMPL_LAPACK_MQR(std::complex<double>, zunmqr_, "zunmqr")
691|
692| #undef VSIP_IMPL_LAPACK_MQR
693|
694|
695|
696| #define VSIP_IMPL_LAPACK_GEBRD(T, FCN, NAME)
697| inline void gebrd(int m, int n, T* a, int lda,
698| vsip::impl::Scalar_of<T >::type* d,
699| vsip::impl::Scalar_of<T >::type* e,
700| T* tauq, T* taup, T* work, int& lwork)
701| {
702| int info;
703| FCN(&m, &n, a, &lda, d, e, tauq, taup, work, &lwork, &info);
704| if (info != 0)
705| {
706| char msg[256];
707| sprintf(msg, "lapack::gebrd -- illegal arg (info=%d)", info);
708| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
709| }
710| }
711| \
712| template <>
713| inline int
714| gebrd_blksize<T >(int m, int n) /* Note [1] */
715| {
716| return ilaenv(1, NAME, "", m, n, -1, -1);
717| }
718|
719| template <typename T>
720| inline int
721| gebrd_blksize(int m, int n);
722|
723|
724| VSIP_IMPL_LAPACK_GEBRD(float, sgebrd_, "sgebrd")
725| VSIP_IMPL_LAPACK_GEBRD(double, dgebrd_, "dgebrd")
726| VSIP_IMPL_LAPACK_GEBRD(std::complex<float>, cgebrd_, "cgebrd")
727| VSIP_IMPL_LAPACK_GEBRD(std::complex<double>, zgebrd_, "zgebrd")
728|
729| #undef VSIP_IMPL_LAPACK_GEBRD
730|
731|
732|
733| #define VSIP_IMPL_LAPACK_GBR(T, FCN, NAME)
734| inline void gbr(char vect,
735| int m, int n, int k,
736| T *a, int lda,
737| T *tau,
738| T *work, int& lwork)
739| {
740| int info;
741| FCN(&vect, &m, &n, &k, a, &lda, tau, work, &lwork, &info);
742| if (info != 0)
743| {
744| char msg[256];
745| sprintf(msg, "lapack::gbr -- illegal arg (info=%d)", info);
746| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
747| }
748| }
749| \
750| template <>
751| inline int
752| gbr_blksize<T >(char vect, int m, int n, int k) /* Note [1] */
753| {
754| char arg[2]; arg[0] = vect; arg[1] = 0;
755| return ilaenv(1, NAME, arg, m, n, k, -1);
756| }
757|
758| template <typename T>
759| inline int
760| gbr_blksize(char vect, int m, int n, int k);
761|
762| VSIP_IMPL_LAPACK_GBR(float, sorgbr_, "sorgbr")
763| VSIP_IMPL_LAPACK_GBR(double, dorgbr_, "dorgbr")
764| VSIP_IMPL_LAPACK_GBR(std::complex<float>, cungbr_, "cungbr")
765| VSIP_IMPL_LAPACK_GBR(std::complex<double>, zungbr_, "zungbr")
766|
767| #undef VSIP_IMPL_LAPACK_GBR
768|
769|
770|
771|
772|
773|
774| #define VSIP_IMPL_LAPACK_BDSQR(T, FCN)
775| inline void bdsqr(char uplo,
776| int n, int ncvt, int nru, int ncc,
777| vsip::impl::Scalar_of<T >::type* d,
778| vsip::impl::Scalar_of<T >::type* e,
779| T *vt, int ldvt,
780| T *u, int ldu,
781| T *c, int ldc,
782| T *work)
783| {
784| int info;
785| FCN(&uplo, &n, &ncvt, &nru, &ncc, d, e,
786| vt, &ldvt, u, &ldu, c, &ldc, work, &info);
787| if (info != 0)
788| {
789| char msg[256];
790| sprintf(msg, "lapack::bdsqr -- illegal arg (info=%d)", info);
791| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
792| }
793| }
794|
795| VSIP_IMPL_LAPACK_BDSQR(float, sbdsqr_)
796| VSIP_IMPL_LAPACK_BDSQR(double, dbdsqr_)
797| VSIP_IMPL_LAPACK_BDSQR(std::complex<float>, cbdsqr_)
798| VSIP_IMPL_LAPACK_BDSQR(std::complex<double>, zbdsqr_)
799|
800| #undef VSIP_IMPL_LAPACK_BDSQR
801|
802|
803|
804|
805|
806|
807|
808|
809|
810|
811|
812|
813|
814| #define VSIP_IMPL_LAPACK_POTRF(T, FCN)
815| inline bool
816| potrf(char uplo, int n, T* a, int lda)
817| {
818| int info;
819| FCN(&uplo, &n, a, &lda, &info);
820| if (info < 0)
821| {
822| char msg[256];
823| sprintf(msg, "lapack::potrf -- illegal arg (info=%d)", info);
824| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
825| }
826| return (info == 0);
827| }
828|
829| VSIP_IMPL_LAPACK_POTRF(float, spotrf_)
830| VSIP_IMPL_LAPACK_POTRF(double, dpotrf_)
831| VSIP_IMPL_LAPACK_POTRF(std::complex<float>, cpotrf_)
832| VSIP_IMPL_LAPACK_POTRF(std::complex<double>, zpotrf_)
833|
834| #undef VSIP_IMPL_LAPACK_POTRF
835|
836|
837|
838|
839|
840|
841|
842|
843|
844|
845|
846|
847|
848| #define VSIP_IMPL_LAPACK_POTRS(T, FCN)
849| inline void
850| potrs(char uplo, int n, int nhrs, T* a, int lda, T* b, int ldb)
851| {
852| int info;
853| FCN(&uplo, &n, &nhrs, a, &lda, b, &ldb, &info);
854| if (info != 0)
855| {
856| char msg[256];
857| sprintf(msg, "lapack::potrs -- illegal arg (info=%d)", info);
858| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
859| }
860| }
861|
862| VSIP_IMPL_LAPACK_POTRS(float, spotrs_)
863| VSIP_IMPL_LAPACK_POTRS(double, dpotrs_)
864| VSIP_IMPL_LAPACK_POTRS(std::complex<float>, cpotrs_)
865| VSIP_IMPL_LAPACK_POTRS(std::complex<double>, zpotrs_)
866|
867| #undef VSIP_IMPL_LAPACK_POTRS
868|
869|
870|
871|
872|
873|
874|
875|
876|
877|
878|
879|
880| #define VSIP_IMPL_LAPACK_GETRF(T, FCN)
881| inline bool
882| getrf(int m, int n, T* a, int lda, int* ipiv)
883| {
884| int info;
885| FCN(&m, &n, a, &lda, ipiv, &info);
886| if (info < 0)
887| {
888| char msg[256];
889| sprintf(msg, "lapack::getrf -- illegal arg (info=%d)", info);
890| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
891| }
892| return (info == 0);
893| }
894|
895| VSIP_IMPL_LAPACK_GETRF(float, sgetrf_)
896| VSIP_IMPL_LAPACK_GETRF(double, dgetrf_)
897| VSIP_IMPL_LAPACK_GETRF(std::complex<float>, cgetrf_)
898| VSIP_IMPL_LAPACK_GETRF(std::complex<double>, zgetrf_)
899|
900| #undef VSIP_IMPL_LAPACK_GETRF
901|
902|
903|
904|
905|
906|
907| #define VSIP_IMPL_LAPACK_GETRS(T, FCN)
908| inline void
909| getrs(char trans, int n, int nhrs, T* a, int lda, int* ipiv, T* b, int ldb) \
910| {
911| int info;
912| FCN(&trans, &n, &nhrs, a, &lda, ipiv, b, &ldb, &info);
913| if (info != 0)
914| {
915| char msg[256];
916| sprintf(msg, "lapack::getrs -- illegal arg (info=%d)", info);
917| VSIP_IMPL_THROW(vsip::impl::unimplemented(msg));
918| }
919| }
920|
921| VSIP_IMPL_LAPACK_GETRS(float, sgetrs_)
922| VSIP_IMPL_LAPACK_GETRS(double, dgetrs_)
923| VSIP_IMPL_LAPACK_GETRS(std::complex<float>, cgetrs_)
924| VSIP_IMPL_LAPACK_GETRS(std::complex<double>, zgetrs_)
925|
926| #undef VSIP_IMPL_LAPACK_GETRS
927|
928|
929|
930| }
931| }
932| }
933|
934|
935|
936| extern "C"
937| {
938|
939|
940|
941|
942| void xerbla_(char* name, int* info);
943|
944| }
945|
946| #endif // VSIP_IMPL_LAPACK_HPP