ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
device_check.h
Go to the documentation of this file.
1#ifndef DEVICE_CHECK_H
2#define DEVICE_CHECK_H
3
4#include <cstdlib>
5#include <cstdio>
6
7#ifdef __CUDA
8#include "cublas_v2.h"
9#include "cufft.h"
10#include "cusolverDn.h"
11#include <cuda.h>
12
13static const char* _cublasGetErrorString(cublasStatus_t error)
14{
15 switch (error)
16 {
17 case CUBLAS_STATUS_SUCCESS:
18 return "CUBLAS_STATUS_SUCCESS";
19 case CUBLAS_STATUS_NOT_INITIALIZED:
20 return "CUBLAS_STATUS_NOT_INITIALIZED";
21 case CUBLAS_STATUS_ALLOC_FAILED:
22 return "CUBLAS_STATUS_ALLOC_FAILED";
23 case CUBLAS_STATUS_INVALID_VALUE:
24 return "CUBLAS_STATUS_INVALID_VALUE";
25 case CUBLAS_STATUS_ARCH_MISMATCH:
26 return "CUBLAS_STATUS_ARCH_MISMATCH";
27 case CUBLAS_STATUS_MAPPING_ERROR:
28 return "CUBLAS_STATUS_MAPPING_ERROR";
29 case CUBLAS_STATUS_EXECUTION_FAILED:
30 return "CUBLAS_STATUS_EXECUTION_FAILED";
31 case CUBLAS_STATUS_INTERNAL_ERROR:
32 return "CUBLAS_STATUS_INTERNAL_ERROR";
33 case CUBLAS_STATUS_NOT_SUPPORTED:
34 return "CUBLAS_STATUS_NOT_SUPPORTED";
35 case CUBLAS_STATUS_LICENSE_ERROR:
36 return "CUBLAS_STATUS_LICENSE_ERROR";
37 default:
38 return "<unknown>";
39 }
40}
41
42static const char* _cusolverGetErrorString(cusolverStatus_t error)
43{
44 switch (error)
45 {
46 case CUSOLVER_STATUS_SUCCESS:
47 return "CUSOLVER_STATUS_SUCCESS";
48 case CUSOLVER_STATUS_NOT_INITIALIZED:
49 return "CUSOLVER_STATUS_NOT_INITIALIZED";
50 case CUSOLVER_STATUS_ALLOC_FAILED:
51 return "CUSOLVER_STATUS_ALLOC_FAILED";
52 case CUSOLVER_STATUS_INVALID_VALUE:
53 return "CUSOLVER_STATUS_INVALID_VALUE";
54 case CUSOLVER_STATUS_ARCH_MISMATCH:
55 return "CUSOLVER_STATUS_ARCH_MISMATCH";
56 case CUSOLVER_STATUS_MAPPING_ERROR:
57 return "CUSOLVER_STATUS_MAPPING_ERROR";
58 case CUSOLVER_STATUS_EXECUTION_FAILED:
59 return "CUSOLVER_STATUS_EXECUTION_FAILED";
60 case CUSOLVER_STATUS_INTERNAL_ERROR:
61 return "CUSOLVER_STATUS_INTERNAL_ERROR";
62 case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
63 return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
64 case CUSOLVER_STATUS_NOT_SUPPORTED:
65 return "CUSOLVER_STATUS_NOT_SUPPORTED";
66 case CUSOLVER_STATUS_ZERO_PIVOT:
67 return "CUSOLVER_STATUS_ZERO_PIVOT";
68 case CUSOLVER_STATUS_INVALID_LICENSE:
69 return "CUSOLVER_STATUS_INVALID_LICENSE";
70 case CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED:
71 return "CUSOLVER_STATUS_IRS_PARAMS_NOT_INITIALIZED";
72 case CUSOLVER_STATUS_IRS_PARAMS_INVALID:
73 return "CUSOLVER_STATUS_IRS_PARAMS_INVALID";
74 case CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC:
75 return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_PREC";
76 case CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE:
77 return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_REFINE";
78 case CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER:
79 return "CUSOLVER_STATUS_IRS_PARAMS_INVALID_MAXITER";
80 case CUSOLVER_STATUS_IRS_INTERNAL_ERROR:
81 return "CUSOLVER_STATUS_IRS_INTERNAL_ERROR";
82 case CUSOLVER_STATUS_IRS_NOT_SUPPORTED:
83 return "CUSOLVER_STATUS_IRS_NOT_SUPPORTED";
84 case CUSOLVER_STATUS_IRS_OUT_OF_RANGE:
85 return "CUSOLVER_STATUS_IRS_OUT_OF_RANGE";
86 case CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES:
87 return "CUSOLVER_STATUS_IRS_NRHS_NOT_SUPPORTED_FOR_REFINE_GMRES";
88 case CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED:
89 return "CUSOLVER_STATUS_IRS_INFOS_NOT_INITIALIZED";
90 case CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED:
91 return "CUSOLVER_STATUS_IRS_INFOS_NOT_DESTROYED";
92 case CUSOLVER_STATUS_IRS_MATRIX_SINGULAR:
93 return "CUSOLVER_STATUS_IRS_MATRIX_SINGULAR";
94 case CUSOLVER_STATUS_INVALID_WORKSPACE:
95 return "CUSOLVER_STATUS_INVALID_WORKSPACE";
96 default:
97 return "<unknown>";
98 }
99}
100
101static const char* _cufftGetErrorString(cufftResult_t error)
102{
103 switch (error)
104 {
105 case CUFFT_SUCCESS:
106 return "CUFFT_SUCCESS";
107 case CUFFT_INVALID_PLAN:
108 return "CUFFT_INVALID_PLAN";
109 case CUFFT_ALLOC_FAILED:
110 return "CUFFT_ALLOC_FAILED";
111 case CUFFT_INVALID_TYPE:
112 return "CUFFT_INVALID_TYPE";
113 case CUFFT_INVALID_VALUE:
114 return "CUFFT_INVALID_VALUE";
115 case CUFFT_INTERNAL_ERROR:
116 return "CUFFT_INTERNAL_ERROR";
117 case CUFFT_EXEC_FAILED:
118 return "CUFFT_EXEC_FAILED";
119 case CUFFT_SETUP_FAILED:
120 return "CUFFT_SETUP_FAILED";
121 case CUFFT_INVALID_SIZE:
122 return "CUFFT_INVALID_SIZE";
123 case CUFFT_UNALIGNED_DATA:
124 return "CUFFT_UNALIGNED_DATA";
125 case CUFFT_INVALID_DEVICE:
126 return "CUFFT_INVALID_DEVICE";
127 case CUFFT_NO_WORKSPACE:
128 return "CUFFT_NO_WORKSPACE";
129 case CUFFT_NOT_IMPLEMENTED:
130 return "CUFFT_NOT_IMPLEMENTED";
131 case CUFFT_NOT_SUPPORTED:
132 return "CUFFT_NOT_SUPPORTED";
133#if defined(CUDA_VERSION) && CUDA_VERSION < 13000
134 case CUFFT_INCOMPLETE_PARAMETER_LIST:
135 return "CUFFT_INCOMPLETE_PARAMETER_LIST";
136 case CUFFT_PARSE_ERROR:
137 return "CUFFT_PARSE_ERROR";
138 case CUFFT_LICENSE_ERROR:
139 return "CUFFT_LICENSE_ERROR";
140#endif
141 default:
142 return "<unknown>";
143 }
144}
145
146#define CHECK_CUDA(func) \
147 do \
148 { \
149 cudaError_t status = (func); \
150 if (status != cudaSuccess) \
151 { \
152 fprintf(stderr, "In File %s : CUDA API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
153 cudaGetErrorString(status), status); \
154 exit(EXIT_FAILURE); \
155 } \
156 } while (0)
157
158#define CHECK_CUBLAS(func) \
159 do \
160 { \
161 cublasStatus_t status = (func); \
162 if (status != CUBLAS_STATUS_SUCCESS) \
163 { \
164 fprintf(stderr, "In File %s : CUBLAS API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
165 _cublasGetErrorString(status), status); \
166 exit(EXIT_FAILURE); \
167 } \
168 } while (0)
169
170#define CHECK_CUSOLVER(func) \
171 do \
172 { \
173 cusolverStatus_t status = (func); \
174 if (status != CUSOLVER_STATUS_SUCCESS) \
175 { \
176 fprintf(stderr, "In File %s : CUSOLVER API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
177 _cusolverGetErrorString(status), status); \
178 exit(EXIT_FAILURE); \
179 } \
180 } while (0)
181
182#define CHECK_CUFFT(func) \
183 do \
184 { \
185 cufftResult_t status = (func); \
186 if (status != CUFFT_SUCCESS) \
187 { \
188 fprintf(stderr, "In File %s : CUFFT API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
189 _cufftGetErrorString(status), status); \
190 exit(EXIT_FAILURE); \
191 } \
192 } while (0)
193
194#define CHECK_LAST_CUDA_ERROR(msg) \
195 do \
196 { \
197 cudaError_t status = cudaGetLastError(); \
198 if (status != cudaSuccess) \
199 { \
200 fprintf(stderr, "%s(%d) : CUDA error : %s : (%d) %s.\n", __FILE__, __LINE__, msg, \
201 static_cast<int>(status), cudaGetErrorString(status)); \
202 exit(EXIT_FAILURE); \
203 } \
204 } while (0)
205
206#ifdef __DEBUG
207#define CHECK_CUDA_SYNC() \
208 do \
209 { \
210 cudaError_t status = cudaDeviceSynchronize(); \
211 if (status != cudaSuccess) \
212 { \
213 fprintf(stderr, "In File %s : CUDA sync failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
214 cudaGetErrorString(status), status); \
215 exit(EXIT_FAILURE); \
216 } \
217 } while (0)
218#else
219#define CHECK_CUDA_SYNC() do {} while (0)
220#endif
221
222// NCCL check macro: shared by cuSOLVER MP (non-CAL path) and parallel device
223#if (defined(__CUSOLVERMP) && !defined(__USE_CAL)) || defined(__NCCL_PARALLEL_DEVICE)
224#include <nccl.h>
225
226#define CHECK_NCCL(func) \
227 do \
228 { \
229 ncclResult_t status = (func); \
230 if (status != ncclSuccess) \
231 { \
232 fprintf(stderr, "In File %s : NCCL API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
233 ncclGetErrorString(status), status); \
234 exit(EXIT_FAILURE); \
235 } \
236 } while (0)
237#endif
238
239// cuSOLVER MP support
240#ifdef __CUSOLVERMP
241#include <cusolverMp.h>
242
243#ifdef __USE_CAL
244#include <cal.h>
245
246static const char* _calGetErrorString(calError_t error)
247{
248 switch (error)
249 {
250 case CAL_OK:
251 return "CAL_OK";
252 case CAL_ERROR:
253 return "CAL_ERROR";
254 case CAL_ERROR_INVALID_PARAMETER:
255 return "CAL_ERROR_INVALID_PARAMETER";
256 case CAL_ERROR_INTERNAL:
257 return "CAL_ERROR_INTERNAL";
258 case CAL_ERROR_CUDA:
259 return "CAL_ERROR_CUDA";
260 case CAL_ERROR_UCC:
261 return "CAL_ERROR_UCC";
262 case CAL_ERROR_NOT_SUPPORTED:
263 return "CAL_ERROR_NOT_SUPPORTED";
264 case CAL_ERROR_INPROGRESS:
265 return "CAL_ERROR_INPROGRESS";
266 default:
267 return "<unknown>";
268 }
269}
270
271#define CHECK_CAL(func) \
272 do \
273 { \
274 calError_t status = (func); \
275 if (status != CAL_OK) \
276 { \
277 fprintf(stderr, "In File %s : CAL API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
278 _calGetErrorString(status), status); \
279 exit(EXIT_FAILURE); \
280 } \
281 } while (0)
282#endif // __USE_CAL
283
284#endif // __CUSOLVERMP
285
286#endif // __CUDA
287
288#ifdef __ROCM
289#include <hip/hip_runtime.h>
290#include <hipblas/hipblas.h>
291#include <hipfft/hipfft.h>
292#include <hipsolver/hipsolver.h>
293
294static const char* _hipblasGetErrorString(hipblasStatus_t error)
295{
296 switch (error)
297 {
298 case HIPBLAS_STATUS_SUCCESS:
299 return "HIPBLAS_STATUS_SUCCESS";
300 case HIPBLAS_STATUS_NOT_INITIALIZED:
301 return "HIPBLAS_STATUS_NOT_INITIALIZED";
302 case HIPBLAS_STATUS_ALLOC_FAILED:
303 return "HIPBLAS_STATUS_ALLOC_FAILED";
304 case HIPBLAS_STATUS_INVALID_VALUE:
305 return "HIPBLAS_STATUS_INVALID_VALUE";
306 case HIPBLAS_STATUS_ARCH_MISMATCH:
307 return "HIPBLAS_STATUS_ARCH_MISMATCH";
308 case HIPBLAS_STATUS_MAPPING_ERROR:
309 return "HIPBLAS_STATUS_MAPPING_ERROR";
310 case HIPBLAS_STATUS_EXECUTION_FAILED:
311 return "HIPBLAS_STATUS_EXECUTION_FAILED";
312 case HIPBLAS_STATUS_INTERNAL_ERROR:
313 return "HIPBLAS_STATUS_INTERNAL_ERROR";
314 case HIPBLAS_STATUS_NOT_SUPPORTED:
315 return "HIPBLAS_STATUS_NOT_SUPPORTED";
316 case HIPBLAS_STATUS_HANDLE_IS_NULLPTR:
317 return "HIPBLAS_STATUS_HANDLE_IS_NULLPTR";
318 default:
319 return "<unknown>";
320 }
321}
322
323static const char* _hipfftGetErrorString(hipfftResult_t error)
324{
325 switch (error)
326 {
327 case HIPFFT_SUCCESS:
328 return "HIPFFT_SUCCESS";
329 case HIPFFT_INVALID_PLAN:
330 return "HIPFFT_INVALID_PLAN";
331 case HIPFFT_ALLOC_FAILED:
332 return "HIPFFT_ALLOC_FAILED";
333 case HIPFFT_INVALID_TYPE:
334 return "HIPFFT_INVALID_TYPE";
335 case HIPFFT_INVALID_VALUE:
336 return "HIPFFT_INVALID_VALUE";
337 case HIPFFT_INTERNAL_ERROR:
338 return "HIPFFT_INTERNAL_ERROR";
339 case HIPFFT_EXEC_FAILED:
340 return "HIPFFT_EXEC_FAILED";
341 case HIPFFT_SETUP_FAILED:
342 return "HIPFFT_SETUP_FAILED";
343 case HIPFFT_INVALID_SIZE:
344 return "HIPFFT_INVALID_SIZE";
345 case HIPFFT_UNALIGNED_DATA:
346 return "HIPFFT_UNALIGNED_DATA";
347 case HIPFFT_INCOMPLETE_PARAMETER_LIST:
348 return "HIPFFT_INCOMPLETE_PARAMETER_LIST";
349 case HIPFFT_INVALID_DEVICE:
350 return "HIPFFT_INVALID_DEVICE";
351 case HIPFFT_PARSE_ERROR:
352 return "HIPFFT_PARSE_ERROR";
353 case HIPFFT_NO_WORKSPACE:
354 return "HIPFFT_NO_WORKSPACE";
355 case HIPFFT_NOT_IMPLEMENTED:
356 return "HIPFFT_NOT_IMPLEMENTED";
357 case HIPFFT_NOT_SUPPORTED:
358 return "HIPFFT_NOT_SUPPORTED";
359 default:
360 return "<unknown>";
361 }
362}
363
364static const char* _hipsolverGetErrorString(hipsolverStatus_t error)
365{
366 switch (error)
367 {
368 case HIPSOLVER_STATUS_SUCCESS:
369 return "HIPSOLVER_STATUS_SUCCESS";
370 case HIPSOLVER_STATUS_NOT_INITIALIZED:
371 return "HIPSOLVER_STATUS_NOT_INITIALIZED";
372 case HIPSOLVER_STATUS_ALLOC_FAILED:
373 return "HIPSOLVER_STATUS_ALLOC_FAILED";
374 case HIPSOLVER_STATUS_INVALID_VALUE:
375 return "HIPSOLVER_STATUS_INVALID_VALUE";
376 case HIPSOLVER_STATUS_MAPPING_ERROR:
377 return "HIPSOLVER_STATUS_MAPPING_ERROR";
378 case HIPSOLVER_STATUS_EXECUTION_FAILED:
379 return "HIPSOLVER_STATUS_EXECUTION_FAILED";
380 case HIPSOLVER_STATUS_INTERNAL_ERROR:
381 return "HIPSOLVER_STATUS_INTERNAL_ERROR";
382 case HIPSOLVER_STATUS_NOT_SUPPORTED:
383 return "HIPSOLVER_STATUS_NOT_SUPPORTED";
384 case HIPSOLVER_STATUS_ARCH_MISMATCH:
385 return "HIPSOLVER_STATUS_ARCH_MISMATCH";
386 case HIPSOLVER_STATUS_HANDLE_IS_NULLPTR:
387 return "HIPSOLVER_STATUS_HANDLE_IS_NULLPTR";
388 case HIPSOLVER_STATUS_INVALID_ENUM:
389 return "HIPSOLVER_STATUS_INVALID_ENUM";
390 case HIPSOLVER_STATUS_UNKNOWN:
391 return "HIPSOLVER_STATUS_UNKNOWN";
392 default:
393 return "<unknown>";
394 }
395}
396
397#define CHECK_CUDA(func) \
398 do \
399 { \
400 hipError_t status = (func); \
401 if (status != hipSuccess) \
402 { \
403 fprintf(stderr, "In File %s : HIP API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
404 hipGetErrorString(status), status); \
405 exit(EXIT_FAILURE); \
406 } \
407 } while (0)
408
409#define CHECK_CUBLAS(func) \
410 do \
411 { \
412 hipblasStatus_t status = (func); \
413 if (status != HIPBLAS_STATUS_SUCCESS) \
414 { \
415 fprintf(stderr, "In File %s : HIPBLAS API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
416 _hipblasGetErrorString(status), status); \
417 exit(EXIT_FAILURE); \
418 } \
419 } while (0)
420
421#define CHECK_CUSOLVER(func) \
422 do \
423 { \
424 hipsolverStatus_t status = (func); \
425 if (status != HIPSOLVER_STATUS_SUCCESS) \
426 { \
427 fprintf(stderr, "In File %s : HIPSOLVER API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
428 _hipsolverGetErrorString(status), status); \
429 exit(EXIT_FAILURE); \
430 } \
431 } while (0)
432
433#define CHECK_CUFFT(func) \
434 do \
435 { \
436 hipfftResult_t status = (func); \
437 if (status != HIPFFT_SUCCESS) \
438 { \
439 fprintf(stderr, "In File %s : HIPFFT API failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
440 _hipfftGetErrorString(status), status); \
441 exit(EXIT_FAILURE); \
442 } \
443 } while (0)
444
445#define CHECK_LAST_CUDA_ERROR(msg) \
446 do \
447 { \
448 hipError_t status = hipGetLastError(); \
449 if (status != hipSuccess) \
450 { \
451 fprintf(stderr, "%s(%d) : HIP error : %s : (%d) %s.\n", __FILE__, __LINE__, msg, \
452 static_cast<int>(status), hipGetErrorString(status)); \
453 exit(EXIT_FAILURE); \
454 } \
455 } while (0)
456
457#ifdef __DEBUG
458#define CHECK_CUDA_SYNC() \
459 do \
460 { \
461 hipError_t status = hipDeviceSynchronize(); \
462 if (status != hipSuccess) \
463 { \
464 fprintf(stderr, "In File %s : HIP sync failed at line %d with error: %s (%d)\n", __FILE__, __LINE__, \
465 hipGetErrorString(status), status); \
466 exit(EXIT_FAILURE); \
467 } \
468 } while (0)
469#else
470#define CHECK_CUDA_SYNC() do {} while (0)
471#endif
472
473#endif // __ROCM
474
475#endif // DEVICE_CHECK_H