ABACUS develop
Atomic-orbital Based Ab-initio Computation at UStc
Loading...
Searching...
No Matches
helper_cuda.h
Go to the documentation of this file.
1/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2 *
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions
5 * are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * * Neither the name of NVIDIA CORPORATION nor the names of its
12 * contributors may be used to endorse or promote products derived
13 * from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
29// These are CUDA Helper functions for initialization and error checking
30
31#ifndef COMMON_HELPER_CUDA_H_
32#define COMMON_HELPER_CUDA_H_
33
34#pragma once
35
36#include <stdint.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <string.h>
40
41#include "helper_string.h"
42
44
45#ifndef EXIT_WAIVED
46#define EXIT_WAIVED 2
47#endif
48
49// Note, it is required that your SDK sample to include the proper header
50// files, please refer the CUDA examples for examples of the needed CUDA
51// headers, which may change depending on which CUDA functions are used.
52
53// CUDA Runtime error messages
54#ifdef __DRIVER_TYPES_H__
55static const char *_cudaGetErrorEnum(cudaError_t error) {
56 return cudaGetErrorName(error);
57}
58#endif
59
60#ifdef CUDA_DRIVER_API
61// CUDA Driver API errors
62static const char *_cudaGetErrorEnum(CUresult error) {
63 static char unknown[] = "<unknown>";
64 const char *ret = NULL;
65 cuGetErrorName(error, &ret);
66 return ret ? ret : unknown;
67}
68#endif
69
70#ifdef CUBLAS_API_H_
71// cuBLAS API errors
72static const char *_cudaGetErrorEnum(cublasStatus_t error) {
73 switch (error) {
74 case CUBLAS_STATUS_SUCCESS:
75 return "CUBLAS_STATUS_SUCCESS";
76
77 case CUBLAS_STATUS_NOT_INITIALIZED:
78 return "CUBLAS_STATUS_NOT_INITIALIZED";
79
80 case CUBLAS_STATUS_ALLOC_FAILED:
81 return "CUBLAS_STATUS_ALLOC_FAILED";
82
83 case CUBLAS_STATUS_INVALID_VALUE:
84 return "CUBLAS_STATUS_INVALID_VALUE";
85
86 case CUBLAS_STATUS_ARCH_MISMATCH:
87 return "CUBLAS_STATUS_ARCH_MISMATCH";
88
89 case CUBLAS_STATUS_MAPPING_ERROR:
90 return "CUBLAS_STATUS_MAPPING_ERROR";
91
92 case CUBLAS_STATUS_EXECUTION_FAILED:
93 return "CUBLAS_STATUS_EXECUTION_FAILED";
94
95 case CUBLAS_STATUS_INTERNAL_ERROR:
96 return "CUBLAS_STATUS_INTERNAL_ERROR";
97
98 case CUBLAS_STATUS_NOT_SUPPORTED:
99 return "CUBLAS_STATUS_NOT_SUPPORTED";
100
101 case CUBLAS_STATUS_LICENSE_ERROR:
102 return "CUBLAS_STATUS_LICENSE_ERROR";
103 }
104
105 return "<unknown>";
106}
107#endif
108
109#ifdef _CUFFT_H_
110// cuFFT API errors
111static const char *_cudaGetErrorEnum(cufftResult error) {
113}
114#endif
115
116#ifdef CUSPARSEAPI
117// cuSPARSE API errors
118static const char *_cudaGetErrorEnum(cusparseStatus_t error) {
119 switch (error) {
120 case CUSPARSE_STATUS_SUCCESS:
121 return "CUSPARSE_STATUS_SUCCESS";
122
123 case CUSPARSE_STATUS_NOT_INITIALIZED:
124 return "CUSPARSE_STATUS_NOT_INITIALIZED";
125
126 case CUSPARSE_STATUS_ALLOC_FAILED:
127 return "CUSPARSE_STATUS_ALLOC_FAILED";
128
129 case CUSPARSE_STATUS_INVALID_VALUE:
130 return "CUSPARSE_STATUS_INVALID_VALUE";
131
132 case CUSPARSE_STATUS_ARCH_MISMATCH:
133 return "CUSPARSE_STATUS_ARCH_MISMATCH";
134
135 case CUSPARSE_STATUS_MAPPING_ERROR:
136 return "CUSPARSE_STATUS_MAPPING_ERROR";
137
138 case CUSPARSE_STATUS_EXECUTION_FAILED:
139 return "CUSPARSE_STATUS_EXECUTION_FAILED";
140
141 case CUSPARSE_STATUS_INTERNAL_ERROR:
142 return "CUSPARSE_STATUS_INTERNAL_ERROR";
143
144 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
145 return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
146 }
147
148 return "<unknown>";
149}
150#endif
151
152#ifdef CUSOLVER_COMMON_H_
153// cuSOLVER API errors
154static const char *_cudaGetErrorEnum(cusolverStatus_t error) {
155 switch (error) {
156 case CUSOLVER_STATUS_SUCCESS:
157 return "CUSOLVER_STATUS_SUCCESS";
158 case CUSOLVER_STATUS_NOT_INITIALIZED:
159 return "CUSOLVER_STATUS_NOT_INITIALIZED";
160 case CUSOLVER_STATUS_ALLOC_FAILED:
161 return "CUSOLVER_STATUS_ALLOC_FAILED";
162 case CUSOLVER_STATUS_INVALID_VALUE:
163 return "CUSOLVER_STATUS_INVALID_VALUE";
164 case CUSOLVER_STATUS_ARCH_MISMATCH:
165 return "CUSOLVER_STATUS_ARCH_MISMATCH";
166 case CUSOLVER_STATUS_MAPPING_ERROR:
167 return "CUSOLVER_STATUS_MAPPING_ERROR";
168 case CUSOLVER_STATUS_EXECUTION_FAILED:
169 return "CUSOLVER_STATUS_EXECUTION_FAILED";
170 case CUSOLVER_STATUS_INTERNAL_ERROR:
171 return "CUSOLVER_STATUS_INTERNAL_ERROR";
172 case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
173 return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
174 case CUSOLVER_STATUS_NOT_SUPPORTED:
175 return "CUSOLVER_STATUS_NOT_SUPPORTED ";
176 case CUSOLVER_STATUS_ZERO_PIVOT:
177 return "CUSOLVER_STATUS_ZERO_PIVOT";
178 case CUSOLVER_STATUS_INVALID_LICENSE:
179 return "CUSOLVER_STATUS_INVALID_LICENSE";
180 }
181
182 return "<unknown>";
183}
184#endif
185
186#ifdef CURAND_H_
187// cuRAND API errors
188static const char *_cudaGetErrorEnum(curandStatus_t error) {
189 switch (error) {
190 case CURAND_STATUS_SUCCESS:
191 return "CURAND_STATUS_SUCCESS";
192
193 case CURAND_STATUS_VERSION_MISMATCH:
194 return "CURAND_STATUS_VERSION_MISMATCH";
195
196 case CURAND_STATUS_NOT_INITIALIZED:
197 return "CURAND_STATUS_NOT_INITIALIZED";
198
199 case CURAND_STATUS_ALLOCATION_FAILED:
200 return "CURAND_STATUS_ALLOCATION_FAILED";
201
202 case CURAND_STATUS_TYPE_ERROR:
203 return "CURAND_STATUS_TYPE_ERROR";
204
205 case CURAND_STATUS_OUT_OF_RANGE:
206 return "CURAND_STATUS_OUT_OF_RANGE";
207
208 case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
209 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
210
211 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
212 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
213
214 case CURAND_STATUS_LAUNCH_FAILURE:
215 return "CURAND_STATUS_LAUNCH_FAILURE";
216
217 case CURAND_STATUS_PREEXISTING_FAILURE:
218 return "CURAND_STATUS_PREEXISTING_FAILURE";
219
220 case CURAND_STATUS_INITIALIZATION_FAILED:
221 return "CURAND_STATUS_INITIALIZATION_FAILED";
222
223 case CURAND_STATUS_ARCH_MISMATCH:
224 return "CURAND_STATUS_ARCH_MISMATCH";
225
226 case CURAND_STATUS_INTERNAL_ERROR:
227 return "CURAND_STATUS_INTERNAL_ERROR";
228 }
229
230 return "<unknown>";
231}
232#endif
233
234#ifdef NVJPEGAPI
235// nvJPEG API errors
236static const char *_cudaGetErrorEnum(nvjpegStatus_t error) {
237 switch (error) {
238 case NVJPEG_STATUS_SUCCESS:
239 return "NVJPEG_STATUS_SUCCESS";
240
241 case NVJPEG_STATUS_NOT_INITIALIZED:
242 return "NVJPEG_STATUS_NOT_INITIALIZED";
243
244 case NVJPEG_STATUS_INVALID_PARAMETER:
245 return "NVJPEG_STATUS_INVALID_PARAMETER";
246
247 case NVJPEG_STATUS_BAD_JPEG:
248 return "NVJPEG_STATUS_BAD_JPEG";
249
250 case NVJPEG_STATUS_JPEG_NOT_SUPPORTED:
251 return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED";
252
253 case NVJPEG_STATUS_ALLOCATOR_FAILURE:
254 return "NVJPEG_STATUS_ALLOCATOR_FAILURE";
255
256 case NVJPEG_STATUS_EXECUTION_FAILED:
257 return "NVJPEG_STATUS_EXECUTION_FAILED";
258
259 case NVJPEG_STATUS_ARCH_MISMATCH:
260 return "NVJPEG_STATUS_ARCH_MISMATCH";
261
262 case NVJPEG_STATUS_INTERNAL_ERROR:
263 return "NVJPEG_STATUS_INTERNAL_ERROR";
264 }
265
266 return "<unknown>";
267}
268#endif
269
270#ifdef NV_NPPIDEFS_H
271// NPP API errors
272static const char *_cudaGetErrorEnum(NppStatus error) {
273 switch (error) {
274 case NPP_NOT_SUPPORTED_MODE_ERROR:
275 return "NPP_NOT_SUPPORTED_MODE_ERROR";
276
277 case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
278 return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
279
280 case NPP_RESIZE_NO_OPERATION_ERROR:
281 return "NPP_RESIZE_NO_OPERATION_ERROR";
282
283 case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
284 return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
285
286#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
287
288 case NPP_BAD_ARG_ERROR:
289 return "NPP_BAD_ARGUMENT_ERROR";
290
291 case NPP_COEFF_ERROR:
292 return "NPP_COEFFICIENT_ERROR";
293
294 case NPP_RECT_ERROR:
295 return "NPP_RECTANGLE_ERROR";
296
297 case NPP_QUAD_ERROR:
298 return "NPP_QUADRANGLE_ERROR";
299
300 case NPP_MEM_ALLOC_ERR:
301 return "NPP_MEMORY_ALLOCATION_ERROR";
302
303 case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
304 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
305
306 case NPP_INVALID_INPUT:
307 return "NPP_INVALID_INPUT";
308
309 case NPP_POINTER_ERROR:
310 return "NPP_POINTER_ERROR";
311
312 case NPP_WARNING:
313 return "NPP_WARNING";
314
315 case NPP_ODD_ROI_WARNING:
316 return "NPP_ODD_ROI_WARNING";
317#else
318
319 // These are for CUDA 5.5 or higher
320 case NPP_BAD_ARGUMENT_ERROR:
321 return "NPP_BAD_ARGUMENT_ERROR";
322
323 case NPP_COEFFICIENT_ERROR:
324 return "NPP_COEFFICIENT_ERROR";
325
326 case NPP_RECTANGLE_ERROR:
327 return "NPP_RECTANGLE_ERROR";
328
329 case NPP_QUADRANGLE_ERROR:
330 return "NPP_QUADRANGLE_ERROR";
331
332 case NPP_MEMORY_ALLOCATION_ERR:
333 return "NPP_MEMORY_ALLOCATION_ERROR";
334
335 case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
336 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
337
338 case NPP_INVALID_HOST_POINTER_ERROR:
339 return "NPP_INVALID_HOST_POINTER_ERROR";
340
341 case NPP_INVALID_DEVICE_POINTER_ERROR:
342 return "NPP_INVALID_DEVICE_POINTER_ERROR";
343#endif
344
345 case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
346 return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
347
348 case NPP_TEXTURE_BIND_ERROR:
349 return "NPP_TEXTURE_BIND_ERROR";
350
351 case NPP_WRONG_INTERSECTION_ROI_ERROR:
352 return "NPP_WRONG_INTERSECTION_ROI_ERROR";
353
354 case NPP_NOT_EVEN_STEP_ERROR:
355 return "NPP_NOT_EVEN_STEP_ERROR";
356
357 case NPP_INTERPOLATION_ERROR:
358 return "NPP_INTERPOLATION_ERROR";
359
360 case NPP_RESIZE_FACTOR_ERROR:
361 return "NPP_RESIZE_FACTOR_ERROR";
362
363 case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
364 return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
365
366#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
367
368 case NPP_MEMFREE_ERR:
369 return "NPP_MEMFREE_ERR";
370
371 case NPP_MEMSET_ERR:
372 return "NPP_MEMSET_ERR";
373
374 case NPP_MEMCPY_ERR:
375 return "NPP_MEMCPY_ERROR";
376
377 case NPP_MIRROR_FLIP_ERR:
378 return "NPP_MIRROR_FLIP_ERR";
379#else
380
381 case NPP_MEMFREE_ERROR:
382 return "NPP_MEMFREE_ERROR";
383
384 case NPP_MEMSET_ERROR:
385 return "NPP_MEMSET_ERROR";
386
387 case NPP_MEMCPY_ERROR:
388 return "NPP_MEMCPY_ERROR";
389
390 case NPP_MIRROR_FLIP_ERROR:
391 return "NPP_MIRROR_FLIP_ERROR";
392#endif
393
394 case NPP_ALIGNMENT_ERROR:
395 return "NPP_ALIGNMENT_ERROR";
396
397 case NPP_STEP_ERROR:
398 return "NPP_STEP_ERROR";
399
400 case NPP_SIZE_ERROR:
401 return "NPP_SIZE_ERROR";
402
403 case NPP_NULL_POINTER_ERROR:
404 return "NPP_NULL_POINTER_ERROR";
405
406 case NPP_CUDA_KERNEL_EXECUTION_ERROR:
407 return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
408
409 case NPP_NOT_IMPLEMENTED_ERROR:
410 return "NPP_NOT_IMPLEMENTED_ERROR";
411
412 case NPP_ERROR:
413 return "NPP_ERROR";
414
415 case NPP_SUCCESS:
416 return "NPP_SUCCESS";
417
418 case NPP_WRONG_INTERSECTION_QUAD_WARNING:
419 return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
420
421 case NPP_MISALIGNED_DST_ROI_WARNING:
422 return "NPP_MISALIGNED_DST_ROI_WARNING";
423
424 case NPP_AFFINE_QUAD_INCORRECT_WARNING:
425 return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
426
427 case NPP_DOUBLE_SIZE_WARNING:
428 return "NPP_DOUBLE_SIZE_WARNING";
429
430 case NPP_WRONG_INTERSECTION_ROI_WARNING:
431 return "NPP_WRONG_INTERSECTION_ROI_WARNING";
432
433#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
434 /* These are 6.0 or higher */
435 case NPP_LUT_PALETTE_BITSIZE_ERROR:
436 return "NPP_LUT_PALETTE_BITSIZE_ERROR";
437
438 case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
439 return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
440
441 case NPP_QUALITY_INDEX_ERROR:
442 return "NPP_QUALITY_INDEX_ERROR";
443
444 case NPP_CHANNEL_ORDER_ERROR:
445 return "NPP_CHANNEL_ORDER_ERROR";
446
447 case NPP_ZERO_MASK_VALUE_ERROR:
448 return "NPP_ZERO_MASK_VALUE_ERROR";
449
450 case NPP_NUMBER_OF_CHANNELS_ERROR:
451 return "NPP_NUMBER_OF_CHANNELS_ERROR";
452
453 case NPP_COI_ERROR:
454 return "NPP_COI_ERROR";
455
456 case NPP_DIVISOR_ERROR:
457 return "NPP_DIVISOR_ERROR";
458
459 case NPP_CHANNEL_ERROR:
460 return "NPP_CHANNEL_ERROR";
461
462 case NPP_STRIDE_ERROR:
463 return "NPP_STRIDE_ERROR";
464
465 case NPP_ANCHOR_ERROR:
466 return "NPP_ANCHOR_ERROR";
467
468 case NPP_MASK_SIZE_ERROR:
469 return "NPP_MASK_SIZE_ERROR";
470
471 case NPP_MOMENT_00_ZERO_ERROR:
472 return "NPP_MOMENT_00_ZERO_ERROR";
473
474 case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
475 return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
476
477 case NPP_THRESHOLD_ERROR:
478 return "NPP_THRESHOLD_ERROR";
479
480 case NPP_CONTEXT_MATCH_ERROR:
481 return "NPP_CONTEXT_MATCH_ERROR";
482
483 case NPP_FFT_FLAG_ERROR:
484 return "NPP_FFT_FLAG_ERROR";
485
486 case NPP_FFT_ORDER_ERROR:
487 return "NPP_FFT_ORDER_ERROR";
488
489 case NPP_SCALE_RANGE_ERROR:
490 return "NPP_SCALE_RANGE_ERROR";
491
492 case NPP_DATA_TYPE_ERROR:
493 return "NPP_DATA_TYPE_ERROR";
494
495 case NPP_OUT_OFF_RANGE_ERROR:
496 return "NPP_OUT_OFF_RANGE_ERROR";
497
498 case NPP_DIVIDE_BY_ZERO_ERROR:
499 return "NPP_DIVIDE_BY_ZERO_ERROR";
500
501 case NPP_RANGE_ERROR:
502 return "NPP_RANGE_ERROR";
503
504 case NPP_NO_MEMORY_ERROR:
505 return "NPP_NO_MEMORY_ERROR";
506
507 case NPP_ERROR_RESERVED:
508 return "NPP_ERROR_RESERVED";
509
510 case NPP_NO_OPERATION_WARNING:
511 return "NPP_NO_OPERATION_WARNING";
512
513 case NPP_DIVIDE_BY_ZERO_WARNING:
514 return "NPP_DIVIDE_BY_ZERO_WARNING";
515#endif
516
517#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
518 /* These are 7.0 or higher */
519 case NPP_OVERFLOW_ERROR:
520 return "NPP_OVERFLOW_ERROR";
521
522 case NPP_CORRUPTED_DATA_ERROR:
523 return "NPP_CORRUPTED_DATA_ERROR";
524#endif
525 }
526
527 return "<unknown>";
528}
529#endif
530
531template <typename T>
532void check(T result, char const *const func, const char *const file,
533 int const line) {
534 if (result) {
535 fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
536 static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
537 exit(EXIT_FAILURE);
538 }
539}
540
541#ifdef __DRIVER_TYPES_H__
542// This will output the proper CUDA error strings in the event
543// that a CUDA host call returns an error
544#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
545
546// This will output the proper error string when calling cudaGetLastError
547#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
548
549inline void __getLastCudaError(const char *errorMessage, const char *file,
550 const int line) {
551 cudaError_t err = cudaGetLastError();
552
553 if (cudaSuccess != err) {
554 fprintf(stderr,
555 "%s(%i) : getLastCudaError() CUDA error :"
556 " %s : (%d) %s.\n",
557 file, line, errorMessage, static_cast<int>(err),
558 cudaGetErrorString(err));
559 exit(EXIT_FAILURE);
560 }
561}
562
563// This will only print the proper error string when calling cudaGetLastError
564// but not exit program incase error detected.
565#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__)
566
567inline void __printLastCudaError(const char *errorMessage, const char *file,
568 const int line) {
569 cudaError_t err = cudaGetLastError();
570
571 if (cudaSuccess != err) {
572 fprintf(stderr,
573 "%s(%i) : getLastCudaError() CUDA error :"
574 " %s : (%d) %s.\n",
575 file, line, errorMessage, static_cast<int>(err),
576 cudaGetErrorString(err));
577 }
578}
579#endif
580
581#ifndef MAX
582#define MAX(a, b) (a > b ? a : b)
583#endif
584
585// Float To Int conversion
586inline int ftoi(float value) {
587 return (value >= 0 ? static_cast<int>(value + 0.5)
588 : static_cast<int>(value - 0.5));
589}
590
591// Beginning of GPU Architecture definitions
592inline int _ConvertSMVer2Cores(int major, int minor) {
593 // Defines for GPU Architecture types (using the SM version to determine
594 // the # of cores per SM
595 typedef struct {
596 int SM; // 0xMm (hexidecimal notation), M = SM Major version,
597 // and m = SM minor version
598 int Cores;
599 } sSMtoCores;
600
601 sSMtoCores nGpuArchCoresPerSM[] = {
602 {0x30, 192},
603 {0x32, 192},
604 {0x35, 192},
605 {0x37, 192},
606 {0x50, 128},
607 {0x52, 128},
608 {0x53, 128},
609 {0x60, 64},
610 {0x61, 128},
611 {0x62, 128},
612 {0x70, 64},
613 {0x72, 64},
614 {0x75, 64},
615 {0x80, 64},
616 {0x86, 128},
617 {0x87, 128},
618 {-1, -1}};
619
620 int index = 0;
621
622 while (nGpuArchCoresPerSM[index].SM != -1) {
623 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
624 return nGpuArchCoresPerSM[index].Cores;
625 }
626
627 index++;
628 }
629
630 // If we don't find the values, we default use the previous one
631 // to run properly
632 printf(
633 "MapSMtoCores for SM %d.%d is undefined."
634 " Default to use %d Cores/SM\n",
635 major, minor, nGpuArchCoresPerSM[index - 1].Cores);
636 return nGpuArchCoresPerSM[index - 1].Cores;
637}
638
639inline const char* _ConvertSMVer2ArchName(int major, int minor) {
640 // Defines for GPU Architecture types (using the SM version to determine
641 // the GPU Arch name)
642 typedef struct {
643 int SM; // 0xMm (hexidecimal notation), M = SM Major version,
644 // and m = SM minor version
645 const char* name;
646 } sSMtoArchName;
647
648 sSMtoArchName nGpuArchNameSM[] = {
649 {0x30, "Kepler"},
650 {0x32, "Kepler"},
651 {0x35, "Kepler"},
652 {0x37, "Kepler"},
653 {0x50, "Maxwell"},
654 {0x52, "Maxwell"},
655 {0x53, "Maxwell"},
656 {0x60, "Pascal"},
657 {0x61, "Pascal"},
658 {0x62, "Pascal"},
659 {0x70, "Volta"},
660 {0x72, "Xavier"},
661 {0x75, "Turing"},
662 {0x80, "Ampere"},
663 {0x86, "Ampere"},
664 {-1, "Graphics Device"}};
665
666 int index = 0;
667
668 while (nGpuArchNameSM[index].SM != -1) {
669 if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) {
670 return nGpuArchNameSM[index].name;
671 }
672
673 index++;
674 }
675
676 // If we don't find the values, we default use the previous one
677 // to run properly
678 printf(
679 "MapSMtoArchName for SM %d.%d is undefined."
680 " Default to use %s\n",
681 major, minor, nGpuArchNameSM[index - 1].name);
682 return nGpuArchNameSM[index - 1].name;
683}
684 // end of GPU Architecture definitions
685
686#ifdef __CUDA_RUNTIME_H__
687// General GPU Device CUDA Initialization
688inline int gpuDeviceInit(int devID) {
689 int device_count;
690 checkCudaErrors(cudaGetDeviceCount(&device_count));
691
692 if (device_count == 0) {
693 fprintf(stderr,
694 "gpuDeviceInit() CUDA error: "
695 "no devices supporting CUDA.\n");
696 exit(EXIT_FAILURE);
697 }
698
699 if (devID < 0) {
700 devID = 0;
701 }
702
703 if (devID > device_count - 1) {
704 fprintf(stderr, "\n");
705 fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n",
706 device_count);
707 fprintf(stderr,
708 ">> gpuDeviceInit (-device=%d) is not a valid"
709 " GPU device. <<\n",
710 devID);
711 fprintf(stderr, "\n");
712 return -devID;
713 }
714
715 int computeMode = -1, major = 0, minor = 0;
716 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID));
717 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
718 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
719 if (computeMode == cudaComputeModeProhibited) {
720 fprintf(stderr,
721 "Error: device is running in <Compute Mode "
722 "Prohibited>, no threads can use cudaSetDevice().\n");
723 return -1;
724 }
725
726 if (major < 1) {
727 fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
728 exit(EXIT_FAILURE);
729 }
730
731 checkCudaErrors(cudaSetDevice(devID));
732 printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, _ConvertSMVer2ArchName(major, minor));
733
734 return devID;
735}
736
737// This function returns the best GPU (with maximum GFLOPS)
738inline int gpuGetMaxGflopsDeviceId() {
739 int current_device = 0, sm_per_multiproc = 0;
740 int max_perf_device = 0;
741 int device_count = 0;
742 int devices_prohibited = 0;
743
744 uint64_t max_compute_perf = 0;
745 checkCudaErrors(cudaGetDeviceCount(&device_count));
746
747 if (device_count == 0) {
748 fprintf(stderr,
749 "gpuGetMaxGflopsDeviceId() CUDA error:"
750 " no devices supporting CUDA.\n");
751 exit(EXIT_FAILURE);
752 }
753
754 // Find the best CUDA capable GPU device
755 current_device = 0;
756
757 while (current_device < device_count) {
758 int computeMode = -1, major = 0, minor = 0;
759 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
760 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
761 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
762
763 // If this GPU is not running on Compute Mode prohibited,
764 // then we can add it to the list
765 if (computeMode != cudaComputeModeProhibited) {
766 if (major == 9999 && minor == 9999) {
767 sm_per_multiproc = 1;
768 } else {
769 sm_per_multiproc =
770 _ConvertSMVer2Cores(major, minor);
771 }
772 int multiProcessorCount = 0, clockRate = 0;
773 checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device));
774 cudaError_t result = cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device);
775 if (result != cudaSuccess) {
776 // If cudaDevAttrClockRate attribute is not supported we
777 // set clockRate as 1, to consider GPU with most SMs and CUDA Cores.
778 if(result == cudaErrorInvalidValue) {
779 clockRate = 1;
780 }
781 else {
782 fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \n", __FILE__, __LINE__,
783 static_cast<unsigned int>(result), _cudaGetErrorEnum(result));
784 exit(EXIT_FAILURE);
785 }
786 }
787 uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate;
788
789 if (compute_perf > max_compute_perf) {
790 max_compute_perf = compute_perf;
791 max_perf_device = current_device;
792 }
793 } else {
794 devices_prohibited++;
795 }
796
797 ++current_device;
798 }
799
800 if (devices_prohibited == device_count) {
801 fprintf(stderr,
802 "gpuGetMaxGflopsDeviceId() CUDA error:"
803 " all devices have compute mode prohibited.\n");
804 exit(EXIT_FAILURE);
805 }
806
807 return max_perf_device;
808}
809
810// Initialization code to find the best CUDA Device
811inline int findCudaDevice(int argc, const char **argv) {
812 int devID = 0;
813
814 // If the command-line has a device number specified, use it
815 if (checkCmdLineFlag(argc, argv, "device")) {
816 devID = getCmdLineArgumentInt(argc, argv, "device=");
817
818 if (devID < 0) {
819 printf("Invalid command line parameter\n ");
820 exit(EXIT_FAILURE);
821 } else {
822 devID = gpuDeviceInit(devID);
823
824 if (devID < 0) {
825 printf("exiting...\n");
826 exit(EXIT_FAILURE);
827 }
828 }
829 } else {
830 // Otherwise pick the device with highest Gflops/s
831 devID = gpuGetMaxGflopsDeviceId();
832 checkCudaErrors(cudaSetDevice(devID));
833 int major = 0, minor = 0;
834 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
835 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
836 printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
837 devID, _ConvertSMVer2ArchName(major, minor), major, minor);
838
839 }
840
841 return devID;
842}
843
844inline int findIntegratedGPU() {
845 int current_device = 0;
846 int device_count = 0;
847 int devices_prohibited = 0;
848
849 checkCudaErrors(cudaGetDeviceCount(&device_count));
850
851 if (device_count == 0) {
852 fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
853 exit(EXIT_FAILURE);
854 }
855
856 // Find the integrated GPU which is compute capable
857 while (current_device < device_count) {
858 int computeMode = -1, integrated = -1;
859 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
860 checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device));
861 // If GPU is integrated and is not running on Compute Mode prohibited,
862 // then cuda can map to GLES resource
863 if (integrated && (computeMode != cudaComputeModeProhibited)) {
864 checkCudaErrors(cudaSetDevice(current_device));
865
866 int major = 0, minor = 0;
867 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
868 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
869 printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
870 current_device, _ConvertSMVer2ArchName(major, minor), major, minor);
871
872 return current_device;
873 } else {
874 devices_prohibited++;
875 }
876
877 current_device++;
878 }
879
880 if (devices_prohibited == device_count) {
881 fprintf(stderr,
882 "CUDA error:"
883 " No GLES-CUDA Interop capable GPU found.\n");
884 exit(EXIT_FAILURE);
885 }
886
887 return -1;
888}
889
890// General check for CUDA GPU SM Capabilities
891inline bool checkCudaCapabilities(int major_version, int minor_version) {
892 int dev;
893 int major = 0, minor = 0;
894
895 checkCudaErrors(cudaGetDevice(&dev));
896 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev));
897 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev));
898
899 if ((major > major_version) ||
900 (major == major_version &&
901 minor >= minor_version)) {
902 printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
903 _ConvertSMVer2ArchName(major, minor), major, minor);
904 return true;
905 } else {
906 printf(
907 " No GPU device was found that can support "
908 "CUDA compute capability %d.%d.\n",
909 major_version, minor_version);
910 return false;
911 }
912}
913#endif
914
915 // end of CUDA Helper Functions
916
917#endif // COMMON_HELPER_CUDA_H_
Compatibility layer for CUDA and NVTX headers across different CUDA Toolkit versions.
#define T
Definition exp.cpp:237
int _ConvertSMVer2Cores(int major, int minor)
Definition helper_cuda.h:592
const char * _ConvertSMVer2ArchName(int major, int minor)
Definition helper_cuda.h:639
void check(T result, char const *const func, const char *const file, int const line)
Definition helper_cuda.h:532
int ftoi(float value)
Definition helper_cuda.h:586
bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
Definition helper_string.h:127
int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
Definition helper_string.h:180
const char * cufftGetErrorStringCompat(cufftResult_t error)
Provides a cross-CUDA-version string conversion for cuFFT error codes.
Definition cuda_compat.cpp:68
file(GLOB ATen_CORE_SRCS "*.cpp") set(ATen_CPU_SRCS $
Definition CMakeLists.txt:1
double func(const Vec3 &r, const std::vector< Vec3 > &R, const std::vector< double > &a, const std::vector< double > &n)
Definition test_partition.cpp:50