9namespace cuSolverConnector {
13void trtri (cusolverDnHandle_t& cusolver_handle,
const char& uplo,
const char& diag,
const int& n,
T* A,
const int& lda)
15 size_t d_lwork = 0, h_lwork = 0;
18 void* d_work =
nullptr, *h_work =
nullptr;
21 h_work = malloc(h_lwork);
22 if (h_work ==
nullptr) {
23 throw std::bad_alloc();
27 int* d_info =
nullptr;
30 cusolverErrcheck(cusolverDnXtrtri(cusolver_handle, cublas_fill_mode(uplo), cublas_diag_type(diag), n,
GetTypeCuda<T>::cuda_data_type,
reinterpret_cast<Type*
>(A), n, d_work, d_lwork, h_work, h_lwork, d_info));
31 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
33 throw std::runtime_error(
"trtri: failed to invert matrix");
41void potri (cusolverDnHandle_t& cusolver_handle,
const char& uplo,
const char& diag,
const int& n,
float * A,
const int& lda)
44 cusolverErrcheck(cusolverDnSpotri_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork));
46 cudaErrcheck(cudaMalloc((
void**)&work, lwork *
sizeof(
float)));
48 cusolverErrcheck(cusolverDnSpotri(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork,
nullptr));
52void potri (cusolverDnHandle_t& cusolver_handle,
const char& uplo,
const char& diag,
const int& n,
double * A,
const int& lda)
55 cusolverErrcheck(cusolverDnDpotri_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork));
57 cudaErrcheck(cudaMalloc((
void**)&work, lwork *
sizeof(
double)));
59 cusolverErrcheck(cusolverDnDpotri(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork,
nullptr));
63void potri (cusolverDnHandle_t& cusolver_handle,
const char& uplo,
const char& diag,
const int& n, std::complex<float> * A,
const int& lda)
66 cusolverErrcheck(cusolverDnCpotri_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n,
reinterpret_cast<cuComplex *
>(A), n, &lwork));
68 cudaErrcheck(cudaMalloc((
void**)&work, lwork *
sizeof(cuComplex)));
70 cusolverErrcheck(cusolverDnCpotri(cusolver_handle, cublas_fill_mode(uplo), n,
reinterpret_cast<cuComplex *
>(A), n, work, lwork,
nullptr));
74void potri (cusolverDnHandle_t& cusolver_handle,
const char& uplo,
const char& diag,
const int& n, std::complex<double> * A,
const int& lda)
77 cusolverErrcheck(cusolverDnZpotri_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n,
reinterpret_cast<cuDoubleComplex *
>(A), n, &lwork));
78 cuDoubleComplex* work;
79 cudaErrcheck(cudaMalloc((
void**)&work, lwork *
sizeof(cuDoubleComplex)));
81 cusolverErrcheck(cusolverDnZpotri(cusolver_handle, cublas_fill_mode(uplo), n,
reinterpret_cast<cuDoubleComplex *
>(A), n, work, lwork,
nullptr));
87void potrf (cusolverDnHandle_t& cusolver_handle,
const char& uplo,
const int& n,
float * A,
const int& lda)
91 cudaErrcheck(cudaMalloc((
void**)&info, 1 *
sizeof(
int)));
92 cusolverErrcheck(cusolverDnSpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork));
94 cudaErrcheck(cudaMalloc((
void**)&work, lwork *
sizeof(
float)));
96 cusolverErrcheck(cusolverDnSpotrf(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, info));
101void potrf (cusolverDnHandle_t& cusolver_handle,
const char& uplo,
const int& n,
double * A,
const int& lda)
105 cudaErrcheck(cudaMalloc((
void**)&info, 1 *
sizeof(
int)));
106 cusolverErrcheck(cusolverDnDpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n, A, n, &lwork));
108 cudaErrcheck(cudaMalloc((
void**)&work, lwork *
sizeof(
double)));
110 cusolverErrcheck(cusolverDnDpotrf(cusolver_handle, cublas_fill_mode(uplo), n, A, n, work, lwork, info));
115void potrf (cusolverDnHandle_t& cusolver_handle,
const char& uplo,
const int& n, std::complex<float> * A,
const int& lda)
119 cudaErrcheck(cudaMalloc((
void**)&info, 1 *
sizeof(
int)));
120 cusolverErrcheck(cusolverDnCpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n,
reinterpret_cast<cuComplex*
>(A), lda, &lwork));
122 cudaErrcheck(cudaMalloc((
void**)&work, lwork *
sizeof(cuComplex)));
124 cusolverErrcheck(cusolverDnCpotrf(cusolver_handle, cublas_fill_mode(uplo), n,
reinterpret_cast<cuComplex*
>(A), lda, work, lwork, info));
129void potrf (cusolverDnHandle_t& cusolver_handle,
const char& uplo,
const int& n, std::complex<double> * A,
const int& lda)
133 cudaErrcheck(cudaMalloc((
void**)&info, 1 *
sizeof(
int)));
134 cusolverErrcheck(cusolverDnZpotrf_bufferSize(cusolver_handle, cublas_fill_mode(uplo), n,
reinterpret_cast<cuDoubleComplex*
>(A), lda, &lwork));
135 cuDoubleComplex* work;
136 cudaErrcheck(cudaMalloc((
void**)&work, lwork *
sizeof(cuDoubleComplex)));
138 cusolverErrcheck(cusolverDnZpotrf(cusolver_handle, cublas_fill_mode(uplo), n,
reinterpret_cast<cuDoubleComplex*
>(A), lda, work, lwork, info));
145void dnevd (cusolverDnHandle_t& cusolver_handle,
const char& jobz,
const char& uplo,
const int& n,
float* A,
const int& lda,
float * W)
150 int* d_info =
nullptr;
151 float* d_work =
nullptr;
155 cusolverErrcheck(cusolverDnSsyevd_bufferSize(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo),
156 n, A, lda, W, &lwork));
158 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(
float) * lwork));
160 cusolverErrcheck(cusolverDnSsyevd(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo),
161 n, A, lda, W, d_work, lwork, d_info));
163 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
165 throw std::runtime_error(
"dnevd: failed to invert matrix");
171void dnevd (cusolverDnHandle_t& cusolver_handle,
const char& jobz,
const char& uplo,
const int& n,
double* A,
const int& lda,
double * W)
176 int* d_info =
nullptr;
177 double* d_work =
nullptr;
181 cusolverErrcheck(cusolverDnDsyevd_bufferSize(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo),
182 n, A, lda, W, &lwork));
184 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(
double) * lwork));
186 cusolverErrcheck(cusolverDnDsyevd(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo),
187 n, A, lda, W, d_work, lwork, d_info));
189 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
191 throw std::runtime_error(
"dnevd: failed to invert matrix");
197void dnevd (cusolverDnHandle_t& cusolver_handle,
const char& jobz,
const char& uplo,
const int& n, std::complex<float>* A,
const int& lda,
float * W)
202 int* d_info =
nullptr;
203 cuComplex* d_work =
nullptr;
207 cusolverErrcheck(cusolverDnCheevd_bufferSize(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo),
208 n,
reinterpret_cast<cuComplex*
>(A), lda, W, &lwork));
210 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(cuComplex) * lwork));
212 cusolverErrcheck(cusolverDnCheevd(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo),
213 n,
reinterpret_cast<cuComplex*
>(A), lda, W, d_work, lwork, d_info));
215 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
217 throw std::runtime_error(
"dnevd: failed to invert matrix");
223void dnevd (cusolverDnHandle_t& cusolver_handle,
const char& jobz,
const char& uplo,
const int& n, std::complex<double>* A,
const int& lda,
double* W)
228 int* d_info =
nullptr;
229 cuDoubleComplex* d_work =
nullptr;
233 cusolverErrcheck(cusolverDnZheevd_bufferSize(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo),
234 n,
reinterpret_cast<cuDoubleComplex*
>(A), lda, W, &lwork));
236 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(cuDoubleComplex) * lwork));
238 cusolverErrcheck(cusolverDnZheevd(cusolver_handle, cublas_eig_mode(jobz), cublas_fill_mode(uplo),
239 n,
reinterpret_cast<cuDoubleComplex*
>(A), lda, W, d_work, lwork, d_info));
241 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
243 throw std::runtime_error(
"dnevd: failed to invert matrix");
250void dngvd (cusolverDnHandle_t& cusolver_handle,
const int& itype,
const char& jobz,
const char& uplo,
const int& n,
float* A,
const int& lda,
float* B,
const int& ldb,
float * W)
255 int* d_info =
nullptr;
256 float* d_work =
nullptr;
260 cusolverErrcheck(cusolverDnSsygvd_bufferSize(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo),
261 n, A, lda, B, ldb, W, &lwork));
263 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(
float) * lwork));
265 cusolverErrcheck(cusolverDnSsygvd(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo),
266 n, A, lda, B, ldb, W, d_work, lwork, d_info));
268 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
270 throw std::runtime_error(
"dnevd: failed to invert matrix");
276void dngvd (cusolverDnHandle_t& cusolver_handle,
const int& itype,
const char& jobz,
const char& uplo,
const int& n,
double* A,
const int& lda,
double* B,
const int& ldb,
double * W)
281 int* d_info =
nullptr;
282 double* d_work =
nullptr;
286 cusolverErrcheck(cusolverDnDsygvd_bufferSize(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo),
287 n, A, lda, B, ldb, W, &lwork));
289 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(
double) * lwork));
291 cusolverErrcheck(cusolverDnDsygvd(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo),
292 n, A, lda, B, ldb, W, d_work, lwork, d_info));
294 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
296 throw std::runtime_error(
"dnevd: failed to invert matrix");
302void dngvd (cusolverDnHandle_t& cusolver_handle,
const int& itype,
const char& jobz,
const char& uplo,
const int& n, std::complex<float>* A,
const int& lda, std::complex<float>* B,
const int& ldb,
float* W)
307 int* d_info =
nullptr;
308 cuComplex* d_work =
nullptr;
312 cusolverErrcheck(cusolverDnChegvd_bufferSize(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo),
313 n,
reinterpret_cast<cuComplex*
>(A), lda,
reinterpret_cast<cuComplex*
>(B), ldb, W, &lwork));
315 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(cuComplex) * lwork));
317 cusolverErrcheck(cusolverDnChegvd(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo),
318 n,
reinterpret_cast<cuComplex*
>(A), lda,
reinterpret_cast<cuComplex*
>(B), ldb, W, d_work, lwork, d_info));
320 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
322 throw std::runtime_error(
"dnevd: failed to invert matrix");
328void dngvd (cusolverDnHandle_t& cusolver_handle,
const int& itype,
const char& jobz,
const char& uplo,
const int& n, std::complex<double>* A,
const int& lda, std::complex<double>* B,
const int& ldb,
double* W)
333 int* d_info =
nullptr;
334 cuDoubleComplex* d_work =
nullptr;
338 cusolverErrcheck(cusolverDnZhegvd_bufferSize(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo),
339 n,
reinterpret_cast<cuDoubleComplex*
>(A), lda,
reinterpret_cast<cuDoubleComplex*
>(B), ldb, W, &lwork));
341 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(cuDoubleComplex) * lwork));
343 cusolverErrcheck(cusolverDnZhegvd(cusolver_handle, cublas_eig_type(itype), cublas_eig_mode(jobz), cublas_fill_mode(uplo),
344 n,
reinterpret_cast<cuDoubleComplex*
>(A), lda,
reinterpret_cast<cuDoubleComplex*
>(B), ldb, W, d_work, lwork, d_info));
346 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
348 throw std::runtime_error(
"dnevd: failed to invert matrix");
355void getrf(cusolverDnHandle_t& cusolver_handle,
const int& m,
const int& n,
float* A,
const int& lda,
int* ipiv)
360 int* d_info =
nullptr;
361 float* d_work =
nullptr;
365 cusolverErrcheck(cusolverDnSgetrf_bufferSize(cusolver_handle, m, n, A, lda, &lwork));
368 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(
float) * lwork));
371 cusolverErrcheck(cusolverDnSgetrf(cusolver_handle, m, n, A, lda, d_work, ipiv, d_info));
373 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
375 throw std::runtime_error(
"getrf: failed to compute LU factorization");
382void getrf(cusolverDnHandle_t& cusolver_handle,
const int& m,
const int& n,
double* A,
const int& lda,
int* ipiv)
387 int* d_info =
nullptr;
388 double* d_work =
nullptr;
392 cusolverErrcheck(cusolverDnDgetrf_bufferSize(cusolver_handle, m, n, A, lda, &lwork));
395 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(
double) * lwork));
398 cusolverErrcheck(cusolverDnDgetrf(cusolver_handle, m, n, A, lda, d_work, ipiv, d_info));
400 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
402 throw std::runtime_error(
"getrf: failed to compute LU factorization");
409void getrf(cusolverDnHandle_t& cusolver_handle,
const int& m,
const int& n, std::complex<float>* A,
const int& lda,
int* ipiv)
414 int* d_info =
nullptr;
415 cuComplex* d_work =
nullptr;
419 cusolverErrcheck(cusolverDnCgetrf_bufferSize(cusolver_handle, m, n,
reinterpret_cast<cuComplex*
>(A), lda, &lwork));
422 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(cuComplex) * lwork));
425 cusolverErrcheck(cusolverDnCgetrf(cusolver_handle, m, n,
reinterpret_cast<cuComplex*
>(A), lda, d_work, ipiv, d_info));
427 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
429 throw std::runtime_error(
"getrf: failed to compute LU factorization");
436void getrf(cusolverDnHandle_t& cusolver_handle,
const int& m,
const int& n, std::complex<double>* A,
const int& lda,
int* ipiv)
441 int* d_info =
nullptr;
442 cuDoubleComplex* d_work =
nullptr;
446 cusolverErrcheck(cusolverDnZgetrf_bufferSize(cusolver_handle, m, n,
reinterpret_cast<cuDoubleComplex*
>(A), lda, &lwork));
449 cudaErrcheck(cudaMalloc((
void**)&d_work,
sizeof(cuDoubleComplex) * lwork));
452 cusolverErrcheck(cusolverDnZgetrf(cusolver_handle, m, n,
reinterpret_cast<cuDoubleComplex*
>(A), lda, d_work, ipiv, d_info));
454 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
456 throw std::runtime_error(
"getrf: failed to compute LU factorization");
464void getrs(cusolverDnHandle_t& cusolver_handle,
const char& trans,
const int& n,
const int& nrhs,
float* A,
const int& lda,
const int* ipiv,
float* B,
const int& ldb)
467 int* d_info =
nullptr;
470 cusolverErrcheck(cusolverDnSgetrs(cusolver_handle, GetCublasOperation(trans), n, nrhs, A, lda, ipiv, B, ldb, d_info));
472 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
474 throw std::runtime_error(
"getrs: failed to solve the linear system");
480void getrs(cusolverDnHandle_t& cusolver_handle,
const char& trans,
const int& n,
const int& nrhs,
double* A,
const int& lda,
const int* ipiv,
double* B,
const int& ldb)
483 int* d_info =
nullptr;
486 cusolverErrcheck(cusolverDnDgetrs(cusolver_handle, GetCublasOperation(trans), n, nrhs, A, lda, ipiv, B, ldb, d_info));
488 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
490 throw std::runtime_error(
"getrs: failed to solve the linear system");
496void getrs(cusolverDnHandle_t& cusolver_handle,
const char& trans,
const int& n,
const int& nrhs, std::complex<float>* A,
const int& lda,
const int* ipiv, std::complex<float>* B,
const int& ldb)
499 int* d_info =
nullptr;
502 cusolverErrcheck(cusolverDnCgetrs(cusolver_handle, GetCublasOperation(trans), n, nrhs,
reinterpret_cast<cuComplex*
>(A), lda, ipiv,
reinterpret_cast<cuComplex*
>(B), ldb, d_info));
504 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
506 throw std::runtime_error(
"getrs: failed to solve the linear system");
512void getrs(cusolverDnHandle_t& cusolver_handle,
const char& trans,
const int& n,
const int& nrhs, std::complex<double>* A,
const int& lda,
const int* ipiv, std::complex<double>* B,
const int& ldb)
515 int* d_info =
nullptr;
518 cusolverErrcheck(cusolverDnZgetrs(cusolver_handle, GetCublasOperation(trans), n, nrhs,
reinterpret_cast<cuDoubleComplex*
>(A), lda, ipiv,
reinterpret_cast<cuDoubleComplex*
>(B), ldb, d_info));
520 cudaErrcheck(cudaMemcpy(&h_info, d_info,
sizeof(
int), cudaMemcpyDeviceToHost));
522 throw std::runtime_error(
"getrs: failed to solve the linear system");