ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
helper_cuda.h
Go to the documentation of this file.
1/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2 *
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions
5 * are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * * Neither the name of NVIDIA CORPORATION nor the names of its
12 * contributors may be used to endorse or promote products derived
13 * from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
29// These are CUDA Helper functions for initialization and error checking
30
31#ifndef COMMON_HELPER_CUDA_H_
32#define COMMON_HELPER_CUDA_H_
33
34#pragma once
35
36#include <stdint.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40
41#include "helper_string.h"
42
43#ifndef EXIT_WAIVED
44#define EXIT_WAIVED 2
45#endif
46
47// Note, it is required that your SDK sample to include the proper header
48// files, please refer the CUDA examples for examples of the needed CUDA
49// headers, which may change depending on which CUDA functions are used.
50
51// CUDA Runtime error messages
52#ifdef __DRIVER_TYPES_H__
53static const char *_cudaGetErrorEnum(cudaError_t error) {
54 return cudaGetErrorName(error);
55}
56#endif
57
58#ifdef CUDA_DRIVER_API
59// CUDA Driver API errors
60static const char *_cudaGetErrorEnum(CUresult error) {
61 static char unknown[] = "<unknown>";
62 const char *ret = NULL;
63 cuGetErrorName(error, &ret);
64 return ret ? ret : unknown;
65}
66#endif
67
68#ifdef CUBLAS_API_H_
69// cuBLAS API errors
70static const char *_cudaGetErrorEnum(cublasStatus_t error) {
71 switch (error) {
72 case CUBLAS_STATUS_SUCCESS:
73 return "CUBLAS_STATUS_SUCCESS";
74
75 case CUBLAS_STATUS_NOT_INITIALIZED:
76 return "CUBLAS_STATUS_NOT_INITIALIZED";
77
78 case CUBLAS_STATUS_ALLOC_FAILED:
79 return "CUBLAS_STATUS_ALLOC_FAILED";
80
81 case CUBLAS_STATUS_INVALID_VALUE:
82 return "CUBLAS_STATUS_INVALID_VALUE";
83
84 case CUBLAS_STATUS_ARCH_MISMATCH:
85 return "CUBLAS_STATUS_ARCH_MISMATCH";
86
87 case CUBLAS_STATUS_MAPPING_ERROR:
88 return "CUBLAS_STATUS_MAPPING_ERROR";
89
90 case CUBLAS_STATUS_EXECUTION_FAILED:
91 return "CUBLAS_STATUS_EXECUTION_FAILED";
92
93 case CUBLAS_STATUS_INTERNAL_ERROR:
94 return "CUBLAS_STATUS_INTERNAL_ERROR";
95
96 case CUBLAS_STATUS_NOT_SUPPORTED:
97 return "CUBLAS_STATUS_NOT_SUPPORTED";
98
99 case CUBLAS_STATUS_LICENSE_ERROR:
100 return "CUBLAS_STATUS_LICENSE_ERROR";
101 }
102
103 return "<unknown>";
104}
105#endif
106
107#ifdef _CUFFT_H_
108// cuFFT API errors
109static const char *_cudaGetErrorEnum(cufftResult error) {
110 switch (error) {
111 case CUFFT_SUCCESS:
112 return "CUFFT_SUCCESS";
113
114 case CUFFT_INVALID_PLAN:
115 return "CUFFT_INVALID_PLAN";
116
117 case CUFFT_ALLOC_FAILED:
118 return "CUFFT_ALLOC_FAILED";
119
120 case CUFFT_INVALID_TYPE:
121 return "CUFFT_INVALID_TYPE";
122
123 case CUFFT_INVALID_VALUE:
124 return "CUFFT_INVALID_VALUE";
125
126 case CUFFT_INTERNAL_ERROR:
127 return "CUFFT_INTERNAL_ERROR";
128
129 case CUFFT_EXEC_FAILED:
130 return "CUFFT_EXEC_FAILED";
131
132 case CUFFT_SETUP_FAILED:
133 return "CUFFT_SETUP_FAILED";
134
135 case CUFFT_INVALID_SIZE:
136 return "CUFFT_INVALID_SIZE";
137
138 case CUFFT_UNALIGNED_DATA:
139 return "CUFFT_UNALIGNED_DATA";
140
141 case CUFFT_INCOMPLETE_PARAMETER_LIST:
142 return "CUFFT_INCOMPLETE_PARAMETER_LIST";
143
144 case CUFFT_INVALID_DEVICE:
145 return "CUFFT_INVALID_DEVICE";
146
147 case CUFFT_PARSE_ERROR:
148 return "CUFFT_PARSE_ERROR";
149
150 case CUFFT_NO_WORKSPACE:
151 return "CUFFT_NO_WORKSPACE";
152
153 case CUFFT_NOT_IMPLEMENTED:
154 return "CUFFT_NOT_IMPLEMENTED";
155
156 case CUFFT_LICENSE_ERROR:
157 return "CUFFT_LICENSE_ERROR";
158
159 case CUFFT_NOT_SUPPORTED:
160 return "CUFFT_NOT_SUPPORTED";
161 }
162
163 return "<unknown>";
164}
165#endif
166
167#ifdef CUSPARSEAPI
168// cuSPARSE API errors
169static const char *_cudaGetErrorEnum(cusparseStatus_t error) {
170 switch (error) {
171 case CUSPARSE_STATUS_SUCCESS:
172 return "CUSPARSE_STATUS_SUCCESS";
173
174 case CUSPARSE_STATUS_NOT_INITIALIZED:
175 return "CUSPARSE_STATUS_NOT_INITIALIZED";
176
177 case CUSPARSE_STATUS_ALLOC_FAILED:
178 return "CUSPARSE_STATUS_ALLOC_FAILED";
179
180 case CUSPARSE_STATUS_INVALID_VALUE:
181 return "CUSPARSE_STATUS_INVALID_VALUE";
182
183 case CUSPARSE_STATUS_ARCH_MISMATCH:
184 return "CUSPARSE_STATUS_ARCH_MISMATCH";
185
186 case CUSPARSE_STATUS_MAPPING_ERROR:
187 return "CUSPARSE_STATUS_MAPPING_ERROR";
188
189 case CUSPARSE_STATUS_EXECUTION_FAILED:
190 return "CUSPARSE_STATUS_EXECUTION_FAILED";
191
192 case CUSPARSE_STATUS_INTERNAL_ERROR:
193 return "CUSPARSE_STATUS_INTERNAL_ERROR";
194
195 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
196 return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
197 }
198
199 return "<unknown>";
200}
201#endif
202
203#ifdef CUSOLVER_COMMON_H_
204// cuSOLVER API errors
205static const char *_cudaGetErrorEnum(cusolverStatus_t error) {
206 switch (error) {
207 case CUSOLVER_STATUS_SUCCESS:
208 return "CUSOLVER_STATUS_SUCCESS";
209 case CUSOLVER_STATUS_NOT_INITIALIZED:
210 return "CUSOLVER_STATUS_NOT_INITIALIZED";
211 case CUSOLVER_STATUS_ALLOC_FAILED:
212 return "CUSOLVER_STATUS_ALLOC_FAILED";
213 case CUSOLVER_STATUS_INVALID_VALUE:
214 return "CUSOLVER_STATUS_INVALID_VALUE";
215 case CUSOLVER_STATUS_ARCH_MISMATCH:
216 return "CUSOLVER_STATUS_ARCH_MISMATCH";
217 case CUSOLVER_STATUS_MAPPING_ERROR:
218 return "CUSOLVER_STATUS_MAPPING_ERROR";
219 case CUSOLVER_STATUS_EXECUTION_FAILED:
220 return "CUSOLVER_STATUS_EXECUTION_FAILED";
221 case CUSOLVER_STATUS_INTERNAL_ERROR:
222 return "CUSOLVER_STATUS_INTERNAL_ERROR";
223 case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
224 return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
225 case CUSOLVER_STATUS_NOT_SUPPORTED:
226 return "CUSOLVER_STATUS_NOT_SUPPORTED ";
227 case CUSOLVER_STATUS_ZERO_PIVOT:
228 return "CUSOLVER_STATUS_ZERO_PIVOT";
229 case CUSOLVER_STATUS_INVALID_LICENSE:
230 return "CUSOLVER_STATUS_INVALID_LICENSE";
231 }
232
233 return "<unknown>";
234}
235#endif
236
237#ifdef CURAND_H_
238// cuRAND API errors
239static const char *_cudaGetErrorEnum(curandStatus_t error) {
240 switch (error) {
241 case CURAND_STATUS_SUCCESS:
242 return "CURAND_STATUS_SUCCESS";
243
244 case CURAND_STATUS_VERSION_MISMATCH:
245 return "CURAND_STATUS_VERSION_MISMATCH";
246
247 case CURAND_STATUS_NOT_INITIALIZED:
248 return "CURAND_STATUS_NOT_INITIALIZED";
249
250 case CURAND_STATUS_ALLOCATION_FAILED:
251 return "CURAND_STATUS_ALLOCATION_FAILED";
252
253 case CURAND_STATUS_TYPE_ERROR:
254 return "CURAND_STATUS_TYPE_ERROR";
255
256 case CURAND_STATUS_OUT_OF_RANGE:
257 return "CURAND_STATUS_OUT_OF_RANGE";
258
259 case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
260 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
261
262 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
263 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
264
265 case CURAND_STATUS_LAUNCH_FAILURE:
266 return "CURAND_STATUS_LAUNCH_FAILURE";
267
268 case CURAND_STATUS_PREEXISTING_FAILURE:
269 return "CURAND_STATUS_PREEXISTING_FAILURE";
270
271 case CURAND_STATUS_INITIALIZATION_FAILED:
272 return "CURAND_STATUS_INITIALIZATION_FAILED";
273
274 case CURAND_STATUS_ARCH_MISMATCH:
275 return "CURAND_STATUS_ARCH_MISMATCH";
276
277 case CURAND_STATUS_INTERNAL_ERROR:
278 return "CURAND_STATUS_INTERNAL_ERROR";
279 }
280
281 return "<unknown>";
282}
283#endif
284
285#ifdef NVJPEGAPI
286// nvJPEG API errors
287static const char *_cudaGetErrorEnum(nvjpegStatus_t error) {
288 switch (error) {
289 case NVJPEG_STATUS_SUCCESS:
290 return "NVJPEG_STATUS_SUCCESS";
291
292 case NVJPEG_STATUS_NOT_INITIALIZED:
293 return "NVJPEG_STATUS_NOT_INITIALIZED";
294
295 case NVJPEG_STATUS_INVALID_PARAMETER:
296 return "NVJPEG_STATUS_INVALID_PARAMETER";
297
298 case NVJPEG_STATUS_BAD_JPEG:
299 return "NVJPEG_STATUS_BAD_JPEG";
300
301 case NVJPEG_STATUS_JPEG_NOT_SUPPORTED:
302 return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED";
303
304 case NVJPEG_STATUS_ALLOCATOR_FAILURE:
305 return "NVJPEG_STATUS_ALLOCATOR_FAILURE";
306
307 case NVJPEG_STATUS_EXECUTION_FAILED:
308 return "NVJPEG_STATUS_EXECUTION_FAILED";
309
310 case NVJPEG_STATUS_ARCH_MISMATCH:
311 return "NVJPEG_STATUS_ARCH_MISMATCH";
312
313 case NVJPEG_STATUS_INTERNAL_ERROR:
314 return "NVJPEG_STATUS_INTERNAL_ERROR";
315 }
316
317 return "<unknown>";
318}
319#endif
320
321#ifdef NV_NPPIDEFS_H
322// NPP API errors
323static const char *_cudaGetErrorEnum(NppStatus error) {
324 switch (error) {
325 case NPP_NOT_SUPPORTED_MODE_ERROR:
326 return "NPP_NOT_SUPPORTED_MODE_ERROR";
327
328 case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
329 return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
330
331 case NPP_RESIZE_NO_OPERATION_ERROR:
332 return "NPP_RESIZE_NO_OPERATION_ERROR";
333
334 case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
335 return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
336
337#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
338
339 case NPP_BAD_ARG_ERROR:
340 return "NPP_BAD_ARGUMENT_ERROR";
341
342 case NPP_COEFF_ERROR:
343 return "NPP_COEFFICIENT_ERROR";
344
345 case NPP_RECT_ERROR:
346 return "NPP_RECTANGLE_ERROR";
347
348 case NPP_QUAD_ERROR:
349 return "NPP_QUADRANGLE_ERROR";
350
351 case NPP_MEM_ALLOC_ERR:
352 return "NPP_MEMORY_ALLOCATION_ERROR";
353
354 case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
355 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
356
357 case NPP_INVALID_INPUT:
358 return "NPP_INVALID_INPUT";
359
360 case NPP_POINTER_ERROR:
361 return "NPP_POINTER_ERROR";
362
363 case NPP_WARNING:
364 return "NPP_WARNING";
365
366 case NPP_ODD_ROI_WARNING:
367 return "NPP_ODD_ROI_WARNING";
368#else
369
370 // These are for CUDA 5.5 or higher
371 case NPP_BAD_ARGUMENT_ERROR:
372 return "NPP_BAD_ARGUMENT_ERROR";
373
374 case NPP_COEFFICIENT_ERROR:
375 return "NPP_COEFFICIENT_ERROR";
376
377 case NPP_RECTANGLE_ERROR:
378 return "NPP_RECTANGLE_ERROR";
379
380 case NPP_QUADRANGLE_ERROR:
381 return "NPP_QUADRANGLE_ERROR";
382
383 case NPP_MEMORY_ALLOCATION_ERR:
384 return "NPP_MEMORY_ALLOCATION_ERROR";
385
386 case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
387 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
388
389 case NPP_INVALID_HOST_POINTER_ERROR:
390 return "NPP_INVALID_HOST_POINTER_ERROR";
391
392 case NPP_INVALID_DEVICE_POINTER_ERROR:
393 return "NPP_INVALID_DEVICE_POINTER_ERROR";
394#endif
395
396 case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
397 return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
398
399 case NPP_TEXTURE_BIND_ERROR:
400 return "NPP_TEXTURE_BIND_ERROR";
401
402 case NPP_WRONG_INTERSECTION_ROI_ERROR:
403 return "NPP_WRONG_INTERSECTION_ROI_ERROR";
404
405 case NPP_NOT_EVEN_STEP_ERROR:
406 return "NPP_NOT_EVEN_STEP_ERROR";
407
408 case NPP_INTERPOLATION_ERROR:
409 return "NPP_INTERPOLATION_ERROR";
410
411 case NPP_RESIZE_FACTOR_ERROR:
412 return "NPP_RESIZE_FACTOR_ERROR";
413
414 case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
415 return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
416
417#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
418
419 case NPP_MEMFREE_ERR:
420 return "NPP_MEMFREE_ERR";
421
422 case NPP_MEMSET_ERR:
423 return "NPP_MEMSET_ERR";
424
425 case NPP_MEMCPY_ERR:
426 return "NPP_MEMCPY_ERROR";
427
428 case NPP_MIRROR_FLIP_ERR:
429 return "NPP_MIRROR_FLIP_ERR";
430#else
431
432 case NPP_MEMFREE_ERROR:
433 return "NPP_MEMFREE_ERROR";
434
435 case NPP_MEMSET_ERROR:
436 return "NPP_MEMSET_ERROR";
437
438 case NPP_MEMCPY_ERROR:
439 return "NPP_MEMCPY_ERROR";
440
441 case NPP_MIRROR_FLIP_ERROR:
442 return "NPP_MIRROR_FLIP_ERROR";
443#endif
444
445 case NPP_ALIGNMENT_ERROR:
446 return "NPP_ALIGNMENT_ERROR";
447
448 case NPP_STEP_ERROR:
449 return "NPP_STEP_ERROR";
450
451 case NPP_SIZE_ERROR:
452 return "NPP_SIZE_ERROR";
453
454 case NPP_NULL_POINTER_ERROR:
455 return "NPP_NULL_POINTER_ERROR";
456
457 case NPP_CUDA_KERNEL_EXECUTION_ERROR:
458 return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
459
460 case NPP_NOT_IMPLEMENTED_ERROR:
461 return "NPP_NOT_IMPLEMENTED_ERROR";
462
463 case NPP_ERROR:
464 return "NPP_ERROR";
465
466 case NPP_SUCCESS:
467 return "NPP_SUCCESS";
468
469 case NPP_WRONG_INTERSECTION_QUAD_WARNING:
470 return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
471
472 case NPP_MISALIGNED_DST_ROI_WARNING:
473 return "NPP_MISALIGNED_DST_ROI_WARNING";
474
475 case NPP_AFFINE_QUAD_INCORRECT_WARNING:
476 return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
477
478 case NPP_DOUBLE_SIZE_WARNING:
479 return "NPP_DOUBLE_SIZE_WARNING";
480
481 case NPP_WRONG_INTERSECTION_ROI_WARNING:
482 return "NPP_WRONG_INTERSECTION_ROI_WARNING";
483
484#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
485 /* These are 6.0 or higher */
486 case NPP_LUT_PALETTE_BITSIZE_ERROR:
487 return "NPP_LUT_PALETTE_BITSIZE_ERROR";
488
489 case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
490 return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
491
492 case NPP_QUALITY_INDEX_ERROR:
493 return "NPP_QUALITY_INDEX_ERROR";
494
495 case NPP_CHANNEL_ORDER_ERROR:
496 return "NPP_CHANNEL_ORDER_ERROR";
497
498 case NPP_ZERO_MASK_VALUE_ERROR:
499 return "NPP_ZERO_MASK_VALUE_ERROR";
500
501 case NPP_NUMBER_OF_CHANNELS_ERROR:
502 return "NPP_NUMBER_OF_CHANNELS_ERROR";
503
504 case NPP_COI_ERROR:
505 return "NPP_COI_ERROR";
506
507 case NPP_DIVISOR_ERROR:
508 return "NPP_DIVISOR_ERROR";
509
510 case NPP_CHANNEL_ERROR:
511 return "NPP_CHANNEL_ERROR";
512
513 case NPP_STRIDE_ERROR:
514 return "NPP_STRIDE_ERROR";
515
516 case NPP_ANCHOR_ERROR:
517 return "NPP_ANCHOR_ERROR";
518
519 case NPP_MASK_SIZE_ERROR:
520 return "NPP_MASK_SIZE_ERROR";
521
522 case NPP_MOMENT_00_ZERO_ERROR:
523 return "NPP_MOMENT_00_ZERO_ERROR";
524
525 case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
526 return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
527
528 case NPP_THRESHOLD_ERROR:
529 return "NPP_THRESHOLD_ERROR";
530
531 case NPP_CONTEXT_MATCH_ERROR:
532 return "NPP_CONTEXT_MATCH_ERROR";
533
534 case NPP_FFT_FLAG_ERROR:
535 return "NPP_FFT_FLAG_ERROR";
536
537 case NPP_FFT_ORDER_ERROR:
538 return "NPP_FFT_ORDER_ERROR";
539
540 case NPP_SCALE_RANGE_ERROR:
541 return "NPP_SCALE_RANGE_ERROR";
542
543 case NPP_DATA_TYPE_ERROR:
544 return "NPP_DATA_TYPE_ERROR";
545
546 case NPP_OUT_OFF_RANGE_ERROR:
547 return "NPP_OUT_OFF_RANGE_ERROR";
548
549 case NPP_DIVIDE_BY_ZERO_ERROR:
550 return "NPP_DIVIDE_BY_ZERO_ERROR";
551
552 case NPP_RANGE_ERROR:
553 return "NPP_RANGE_ERROR";
554
555 case NPP_NO_MEMORY_ERROR:
556 return "NPP_NO_MEMORY_ERROR";
557
558 case NPP_ERROR_RESERVED:
559 return "NPP_ERROR_RESERVED";
560
561 case NPP_NO_OPERATION_WARNING:
562 return "NPP_NO_OPERATION_WARNING";
563
564 case NPP_DIVIDE_BY_ZERO_WARNING:
565 return "NPP_DIVIDE_BY_ZERO_WARNING";
566#endif
567
568#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
569 /* These are 7.0 or higher */
570 case NPP_OVERFLOW_ERROR:
571 return "NPP_OVERFLOW_ERROR";
572
573 case NPP_CORRUPTED_DATA_ERROR:
574 return "NPP_CORRUPTED_DATA_ERROR";
575#endif
576 }
577
578 return "<unknown>";
579}
580#endif
581
582template <typename T>
583void check(T result, char const *const func, const char *const file,
584 int const line) {
585 if (result) {
586 fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
587 static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
588 exit(EXIT_FAILURE);
589 }
590}
591
592#ifdef __DRIVER_TYPES_H__
593// This will output the proper CUDA error strings in the event
594// that a CUDA host call returns an error
595#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
596
597// This will output the proper error string when calling cudaGetLastError
598#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
599
600inline void __getLastCudaError(const char *errorMessage, const char *file,
601 const int line) {
602 cudaError_t err = cudaGetLastError();
603
604 if (cudaSuccess != err) {
605 fprintf(stderr,
606 "%s(%i) : getLastCudaError() CUDA error :"
607 " %s : (%d) %s.\n",
608 file, line, errorMessage, static_cast<int>(err),
609 cudaGetErrorString(err));
610 exit(EXIT_FAILURE);
611 }
612}
613
614// This will only print the proper error string when calling cudaGetLastError
615// but not exit program incase error detected.
616#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__)
617
618inline void __printLastCudaError(const char *errorMessage, const char *file,
619 const int line) {
620 cudaError_t err = cudaGetLastError();
621
622 if (cudaSuccess != err) {
623 fprintf(stderr,
624 "%s(%i) : getLastCudaError() CUDA error :"
625 " %s : (%d) %s.\n",
626 file, line, errorMessage, static_cast<int>(err),
627 cudaGetErrorString(err));
628 }
629}
630#endif
631
632#ifndef MAX
633#define MAX(a, b) (a > b ? a : b)
634#endif
635
636// Float To Int conversion
637inline int ftoi(float value) {
638 return (value >= 0 ? static_cast<int>(value + 0.5)
639 : static_cast<int>(value - 0.5));
640}
641
642// Beginning of GPU Architecture definitions
643inline int _ConvertSMVer2Cores(int major, int minor) {
644 // Defines for GPU Architecture types (using the SM version to determine
645 // the # of cores per SM
646 typedef struct {
647 int SM; // 0xMm (hexidecimal notation), M = SM Major version,
648 // and m = SM minor version
649 int Cores;
650 } sSMtoCores;
651
652 sSMtoCores nGpuArchCoresPerSM[] = {
653 {0x30, 192},
654 {0x32, 192},
655 {0x35, 192},
656 {0x37, 192},
657 {0x50, 128},
658 {0x52, 128},
659 {0x53, 128},
660 {0x60, 64},
661 {0x61, 128},
662 {0x62, 128},
663 {0x70, 64},
664 {0x72, 64},
665 {0x75, 64},
666 {0x80, 64},
667 {0x86, 128},
668 {0x87, 128},
669 {-1, -1}};
670
671 int index = 0;
672
673 while (nGpuArchCoresPerSM[index].SM != -1) {
674 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
675 return nGpuArchCoresPerSM[index].Cores;
676 }
677
678 index++;
679 }
680
681 // If we don't find the values, we default use the previous one
682 // to run properly
683 printf(
684 "MapSMtoCores for SM %d.%d is undefined."
685 " Default to use %d Cores/SM\n",
686 major, minor, nGpuArchCoresPerSM[index - 1].Cores);
687 return nGpuArchCoresPerSM[index - 1].Cores;
688}
689
690inline const char* _ConvertSMVer2ArchName(int major, int minor) {
691 // Defines for GPU Architecture types (using the SM version to determine
692 // the GPU Arch name)
693 typedef struct {
694 int SM; // 0xMm (hexidecimal notation), M = SM Major version,
695 // and m = SM minor version
696 const char* name;
697 } sSMtoArchName;
698
699 sSMtoArchName nGpuArchNameSM[] = {
700 {0x30, "Kepler"},
701 {0x32, "Kepler"},
702 {0x35, "Kepler"},
703 {0x37, "Kepler"},
704 {0x50, "Maxwell"},
705 {0x52, "Maxwell"},
706 {0x53, "Maxwell"},
707 {0x60, "Pascal"},
708 {0x61, "Pascal"},
709 {0x62, "Pascal"},
710 {0x70, "Volta"},
711 {0x72, "Xavier"},
712 {0x75, "Turing"},
713 {0x80, "Ampere"},
714 {0x86, "Ampere"},
715 {-1, "Graphics Device"}};
716
717 int index = 0;
718
719 while (nGpuArchNameSM[index].SM != -1) {
720 if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) {
721 return nGpuArchNameSM[index].name;
722 }
723
724 index++;
725 }
726
727 // If we don't find the values, we default use the previous one
728 // to run properly
729 printf(
730 "MapSMtoArchName for SM %d.%d is undefined."
731 " Default to use %s\n",
732 major, minor, nGpuArchNameSM[index - 1].name);
733 return nGpuArchNameSM[index - 1].name;
734}
735 // end of GPU Architecture definitions
736
737#ifdef __CUDA_RUNTIME_H__
738// General GPU Device CUDA Initialization
739inline int gpuDeviceInit(int devID) {
740 int device_count;
741 checkCudaErrors(cudaGetDeviceCount(&device_count));
742
743 if (device_count == 0) {
744 fprintf(stderr,
745 "gpuDeviceInit() CUDA error: "
746 "no devices supporting CUDA.\n");
747 exit(EXIT_FAILURE);
748 }
749
750 if (devID < 0) {
751 devID = 0;
752 }
753
754 if (devID > device_count - 1) {
755 fprintf(stderr, "\n");
756 fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n",
757 device_count);
758 fprintf(stderr,
759 ">> gpuDeviceInit (-device=%d) is not a valid"
760 " GPU device. <<\n",
761 devID);
762 fprintf(stderr, "\n");
763 return -devID;
764 }
765
766 int computeMode = -1, major = 0, minor = 0;
767 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID));
768 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
769 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
770 if (computeMode == cudaComputeModeProhibited) {
771 fprintf(stderr,
772 "Error: device is running in <Compute Mode "
773 "Prohibited>, no threads can use cudaSetDevice().\n");
774 return -1;
775 }
776
777 if (major < 1) {
778 fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
779 exit(EXIT_FAILURE);
780 }
781
782 checkCudaErrors(cudaSetDevice(devID));
783 printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, _ConvertSMVer2ArchName(major, minor));
784
785 return devID;
786}
787
788// This function returns the best GPU (with maximum GFLOPS)
789inline int gpuGetMaxGflopsDeviceId() {
790 int current_device = 0, sm_per_multiproc = 0;
791 int max_perf_device = 0;
792 int device_count = 0;
793 int devices_prohibited = 0;
794
795 uint64_t max_compute_perf = 0;
796 checkCudaErrors(cudaGetDeviceCount(&device_count));
797
798 if (device_count == 0) {
799 fprintf(stderr,
800 "gpuGetMaxGflopsDeviceId() CUDA error:"
801 " no devices supporting CUDA.\n");
802 exit(EXIT_FAILURE);
803 }
804
805 // Find the best CUDA capable GPU device
806 current_device = 0;
807
808 while (current_device < device_count) {
809 int computeMode = -1, major = 0, minor = 0;
810 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
811 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
812 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
813
814 // If this GPU is not running on Compute Mode prohibited,
815 // then we can add it to the list
816 if (computeMode != cudaComputeModeProhibited) {
817 if (major == 9999 && minor == 9999) {
818 sm_per_multiproc = 1;
819 } else {
820 sm_per_multiproc =
821 _ConvertSMVer2Cores(major, minor);
822 }
823 int multiProcessorCount = 0, clockRate = 0;
824 checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device));
825 cudaError_t result = cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device);
826 if (result != cudaSuccess) {
827 // If cudaDevAttrClockRate attribute is not supported we
828 // set clockRate as 1, to consider GPU with most SMs and CUDA Cores.
829 if(result == cudaErrorInvalidValue) {
830 clockRate = 1;
831 }
832 else {
833 fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n", __FILE__, __LINE__,
834 static_cast<unsigned int>(result), _cudaGetErrorEnum(result));
835 exit(EXIT_FAILURE);
836 }
837 }
838 uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate;
839
840 if (compute_perf > max_compute_perf) {
841 max_compute_perf = compute_perf;
842 max_perf_device = current_device;
843 }
844 } else {
845 devices_prohibited++;
846 }
847
848 ++current_device;
849 }
850
851 if (devices_prohibited == device_count) {
852 fprintf(stderr,
853 "gpuGetMaxGflopsDeviceId() CUDA error:"
854 " all devices have compute mode prohibited.\n");
855 exit(EXIT_FAILURE);
856 }
857
858 return max_perf_device;
859}
860
861// Initialization code to find the best CUDA Device
862inline int findCudaDevice(int argc, const char **argv) {
863 int devID = 0;
864
865 // If the command-line has a device number specified, use it
866 if (checkCmdLineFlag(argc, argv, "device")) {
867 devID = getCmdLineArgumentInt(argc, argv, "device=");
868
869 if (devID < 0) {
870 printf("Invalid command line parameter\n ");
871 exit(EXIT_FAILURE);
872 } else {
873 devID = gpuDeviceInit(devID);
874
875 if (devID < 0) {
876 printf("exiting...\n");
877 exit(EXIT_FAILURE);
878 }
879 }
880 } else {
881 // Otherwise pick the device with highest Gflops/s
882 devID = gpuGetMaxGflopsDeviceId();
883 checkCudaErrors(cudaSetDevice(devID));
884 int major = 0, minor = 0;
885 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
886 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
887 printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
888 devID, _ConvertSMVer2ArchName(major, minor), major, minor);
889
890 }
891
892 return devID;
893}
894
895inline int findIntegratedGPU() {
896 int current_device = 0;
897 int device_count = 0;
898 int devices_prohibited = 0;
899
900 checkCudaErrors(cudaGetDeviceCount(&device_count));
901
902 if (device_count == 0) {
903 fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
904 exit(EXIT_FAILURE);
905 }
906
907 // Find the integrated GPU which is compute capable
908 while (current_device < device_count) {
909 int computeMode = -1, integrated = -1;
910 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
911 checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device));
912 // If GPU is integrated and is not running on Compute Mode prohibited,
913 // then cuda can map to GLES resource
914 if (integrated && (computeMode != cudaComputeModeProhibited)) {
915 checkCudaErrors(cudaSetDevice(current_device));
916
917 int major = 0, minor = 0;
918 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
919 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
920 printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
921 current_device, _ConvertSMVer2ArchName(major, minor), major, minor);
922
923 return current_device;
924 } else {
925 devices_prohibited++;
926 }
927
928 current_device++;
929 }
930
931 if (devices_prohibited == device_count) {
932 fprintf(stderr,
933 "CUDA error:"
934 " No GLES-CUDA Interop capable GPU found.\n");
935 exit(EXIT_FAILURE);
936 }
937
938 return -1;
939}
940
941// General check for CUDA GPU SM Capabilities
942inline bool checkCudaCapabilities(int major_version, int minor_version) {
943 int dev;
944 int major = 0, minor = 0;
945
946 checkCudaErrors(cudaGetDevice(&dev));
947 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev));
948 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev));
949
950 if ((major > major_version) ||
951 (major == major_version &&
952 minor >= minor_version)) {
953 printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
954 _ConvertSMVer2ArchName(major, minor), major, minor);
955 return true;
956 } else {
957 printf(
958 " No GPU device was found that can support "
959 "CUDA compute capability %d.%d.\n",
960 major_version, minor_version);
961 return false;
962 }
963}
964#endif
965
966 // end of CUDA Helper Functions
967
968#endif // COMMON_HELPER_CUDA_H_
#define T
Definition exp.cpp:237
int _ConvertSMVer2Cores(int major, int minor)
Definition helper_cuda.h:643
const char * _ConvertSMVer2ArchName(int major, int minor)
Definition helper_cuda.h:690
void check(T result, char const *const func, const char *const file, int const line)
Definition helper_cuda.h:583
int ftoi(float value)
Definition helper_cuda.h:637
bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
Definition helper_string.h:127
int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
Definition helper_string.h:180
file(GLOB ATen_CORE_SRCS "*.cpp") set(ATen_CPU_SRCS $
Definition CMakeLists.txt:1
double func(const Vec3 &r, const std::vector< Vec3 > &R, const std::vector< double > &a, const std::vector< double > &n)
Definition test_partition.cpp:50