31#ifndef COMMON_HELPER_CUDA_H_
32#define COMMON_HELPER_CUDA_H_
52#ifdef __DRIVER_TYPES_H__
53static const char *_cudaGetErrorEnum(cudaError_t error) {
54 return cudaGetErrorName(error);
60static const char *_cudaGetErrorEnum(CUresult error) {
61 static char unknown[] =
"<unknown>";
62 const char *ret = NULL;
63 cuGetErrorName(error, &ret);
64 return ret ? ret : unknown;
70static const char *_cudaGetErrorEnum(cublasStatus_t error) {
72 case CUBLAS_STATUS_SUCCESS:
73 return "CUBLAS_STATUS_SUCCESS";
75 case CUBLAS_STATUS_NOT_INITIALIZED:
76 return "CUBLAS_STATUS_NOT_INITIALIZED";
78 case CUBLAS_STATUS_ALLOC_FAILED:
79 return "CUBLAS_STATUS_ALLOC_FAILED";
81 case CUBLAS_STATUS_INVALID_VALUE:
82 return "CUBLAS_STATUS_INVALID_VALUE";
84 case CUBLAS_STATUS_ARCH_MISMATCH:
85 return "CUBLAS_STATUS_ARCH_MISMATCH";
87 case CUBLAS_STATUS_MAPPING_ERROR:
88 return "CUBLAS_STATUS_MAPPING_ERROR";
90 case CUBLAS_STATUS_EXECUTION_FAILED:
91 return "CUBLAS_STATUS_EXECUTION_FAILED";
93 case CUBLAS_STATUS_INTERNAL_ERROR:
94 return "CUBLAS_STATUS_INTERNAL_ERROR";
96 case CUBLAS_STATUS_NOT_SUPPORTED:
97 return "CUBLAS_STATUS_NOT_SUPPORTED";
99 case CUBLAS_STATUS_LICENSE_ERROR:
100 return "CUBLAS_STATUS_LICENSE_ERROR";
109static const char *_cudaGetErrorEnum(cufftResult error) {
112 return "CUFFT_SUCCESS";
114 case CUFFT_INVALID_PLAN:
115 return "CUFFT_INVALID_PLAN";
117 case CUFFT_ALLOC_FAILED:
118 return "CUFFT_ALLOC_FAILED";
120 case CUFFT_INVALID_TYPE:
121 return "CUFFT_INVALID_TYPE";
123 case CUFFT_INVALID_VALUE:
124 return "CUFFT_INVALID_VALUE";
126 case CUFFT_INTERNAL_ERROR:
127 return "CUFFT_INTERNAL_ERROR";
129 case CUFFT_EXEC_FAILED:
130 return "CUFFT_EXEC_FAILED";
132 case CUFFT_SETUP_FAILED:
133 return "CUFFT_SETUP_FAILED";
135 case CUFFT_INVALID_SIZE:
136 return "CUFFT_INVALID_SIZE";
138 case CUFFT_UNALIGNED_DATA:
139 return "CUFFT_UNALIGNED_DATA";
141 case CUFFT_INCOMPLETE_PARAMETER_LIST:
142 return "CUFFT_INCOMPLETE_PARAMETER_LIST";
144 case CUFFT_INVALID_DEVICE:
145 return "CUFFT_INVALID_DEVICE";
147 case CUFFT_PARSE_ERROR:
148 return "CUFFT_PARSE_ERROR";
150 case CUFFT_NO_WORKSPACE:
151 return "CUFFT_NO_WORKSPACE";
153 case CUFFT_NOT_IMPLEMENTED:
154 return "CUFFT_NOT_IMPLEMENTED";
156 case CUFFT_LICENSE_ERROR:
157 return "CUFFT_LICENSE_ERROR";
159 case CUFFT_NOT_SUPPORTED:
160 return "CUFFT_NOT_SUPPORTED";
169static const char *_cudaGetErrorEnum(cusparseStatus_t error) {
171 case CUSPARSE_STATUS_SUCCESS:
172 return "CUSPARSE_STATUS_SUCCESS";
174 case CUSPARSE_STATUS_NOT_INITIALIZED:
175 return "CUSPARSE_STATUS_NOT_INITIALIZED";
177 case CUSPARSE_STATUS_ALLOC_FAILED:
178 return "CUSPARSE_STATUS_ALLOC_FAILED";
180 case CUSPARSE_STATUS_INVALID_VALUE:
181 return "CUSPARSE_STATUS_INVALID_VALUE";
183 case CUSPARSE_STATUS_ARCH_MISMATCH:
184 return "CUSPARSE_STATUS_ARCH_MISMATCH";
186 case CUSPARSE_STATUS_MAPPING_ERROR:
187 return "CUSPARSE_STATUS_MAPPING_ERROR";
189 case CUSPARSE_STATUS_EXECUTION_FAILED:
190 return "CUSPARSE_STATUS_EXECUTION_FAILED";
192 case CUSPARSE_STATUS_INTERNAL_ERROR:
193 return "CUSPARSE_STATUS_INTERNAL_ERROR";
195 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
196 return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
203#ifdef CUSOLVER_COMMON_H_
205static const char *_cudaGetErrorEnum(cusolverStatus_t error) {
207 case CUSOLVER_STATUS_SUCCESS:
208 return "CUSOLVER_STATUS_SUCCESS";
209 case CUSOLVER_STATUS_NOT_INITIALIZED:
210 return "CUSOLVER_STATUS_NOT_INITIALIZED";
211 case CUSOLVER_STATUS_ALLOC_FAILED:
212 return "CUSOLVER_STATUS_ALLOC_FAILED";
213 case CUSOLVER_STATUS_INVALID_VALUE:
214 return "CUSOLVER_STATUS_INVALID_VALUE";
215 case CUSOLVER_STATUS_ARCH_MISMATCH:
216 return "CUSOLVER_STATUS_ARCH_MISMATCH";
217 case CUSOLVER_STATUS_MAPPING_ERROR:
218 return "CUSOLVER_STATUS_MAPPING_ERROR";
219 case CUSOLVER_STATUS_EXECUTION_FAILED:
220 return "CUSOLVER_STATUS_EXECUTION_FAILED";
221 case CUSOLVER_STATUS_INTERNAL_ERROR:
222 return "CUSOLVER_STATUS_INTERNAL_ERROR";
223 case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
224 return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
225 case CUSOLVER_STATUS_NOT_SUPPORTED:
226 return "CUSOLVER_STATUS_NOT_SUPPORTED ";
227 case CUSOLVER_STATUS_ZERO_PIVOT:
228 return "CUSOLVER_STATUS_ZERO_PIVOT";
229 case CUSOLVER_STATUS_INVALID_LICENSE:
230 return "CUSOLVER_STATUS_INVALID_LICENSE";
239static const char *_cudaGetErrorEnum(curandStatus_t error) {
241 case CURAND_STATUS_SUCCESS:
242 return "CURAND_STATUS_SUCCESS";
244 case CURAND_STATUS_VERSION_MISMATCH:
245 return "CURAND_STATUS_VERSION_MISMATCH";
247 case CURAND_STATUS_NOT_INITIALIZED:
248 return "CURAND_STATUS_NOT_INITIALIZED";
250 case CURAND_STATUS_ALLOCATION_FAILED:
251 return "CURAND_STATUS_ALLOCATION_FAILED";
253 case CURAND_STATUS_TYPE_ERROR:
254 return "CURAND_STATUS_TYPE_ERROR";
256 case CURAND_STATUS_OUT_OF_RANGE:
257 return "CURAND_STATUS_OUT_OF_RANGE";
259 case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
260 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
262 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
263 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
265 case CURAND_STATUS_LAUNCH_FAILURE:
266 return "CURAND_STATUS_LAUNCH_FAILURE";
268 case CURAND_STATUS_PREEXISTING_FAILURE:
269 return "CURAND_STATUS_PREEXISTING_FAILURE";
271 case CURAND_STATUS_INITIALIZATION_FAILED:
272 return "CURAND_STATUS_INITIALIZATION_FAILED";
274 case CURAND_STATUS_ARCH_MISMATCH:
275 return "CURAND_STATUS_ARCH_MISMATCH";
277 case CURAND_STATUS_INTERNAL_ERROR:
278 return "CURAND_STATUS_INTERNAL_ERROR";
287static const char *_cudaGetErrorEnum(nvjpegStatus_t error) {
289 case NVJPEG_STATUS_SUCCESS:
290 return "NVJPEG_STATUS_SUCCESS";
292 case NVJPEG_STATUS_NOT_INITIALIZED:
293 return "NVJPEG_STATUS_NOT_INITIALIZED";
295 case NVJPEG_STATUS_INVALID_PARAMETER:
296 return "NVJPEG_STATUS_INVALID_PARAMETER";
298 case NVJPEG_STATUS_BAD_JPEG:
299 return "NVJPEG_STATUS_BAD_JPEG";
301 case NVJPEG_STATUS_JPEG_NOT_SUPPORTED:
302 return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED";
304 case NVJPEG_STATUS_ALLOCATOR_FAILURE:
305 return "NVJPEG_STATUS_ALLOCATOR_FAILURE";
307 case NVJPEG_STATUS_EXECUTION_FAILED:
308 return "NVJPEG_STATUS_EXECUTION_FAILED";
310 case NVJPEG_STATUS_ARCH_MISMATCH:
311 return "NVJPEG_STATUS_ARCH_MISMATCH";
313 case NVJPEG_STATUS_INTERNAL_ERROR:
314 return "NVJPEG_STATUS_INTERNAL_ERROR";
323static const char *_cudaGetErrorEnum(NppStatus error) {
325 case NPP_NOT_SUPPORTED_MODE_ERROR:
326 return "NPP_NOT_SUPPORTED_MODE_ERROR";
328 case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
329 return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
331 case NPP_RESIZE_NO_OPERATION_ERROR:
332 return "NPP_RESIZE_NO_OPERATION_ERROR";
334 case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
335 return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
337#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
339 case NPP_BAD_ARG_ERROR:
340 return "NPP_BAD_ARGUMENT_ERROR";
342 case NPP_COEFF_ERROR:
343 return "NPP_COEFFICIENT_ERROR";
346 return "NPP_RECTANGLE_ERROR";
349 return "NPP_QUADRANGLE_ERROR";
351 case NPP_MEM_ALLOC_ERR:
352 return "NPP_MEMORY_ALLOCATION_ERROR";
354 case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
355 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
357 case NPP_INVALID_INPUT:
358 return "NPP_INVALID_INPUT";
360 case NPP_POINTER_ERROR:
361 return "NPP_POINTER_ERROR";
364 return "NPP_WARNING";
366 case NPP_ODD_ROI_WARNING:
367 return "NPP_ODD_ROI_WARNING";
371 case NPP_BAD_ARGUMENT_ERROR:
372 return "NPP_BAD_ARGUMENT_ERROR";
374 case NPP_COEFFICIENT_ERROR:
375 return "NPP_COEFFICIENT_ERROR";
377 case NPP_RECTANGLE_ERROR:
378 return "NPP_RECTANGLE_ERROR";
380 case NPP_QUADRANGLE_ERROR:
381 return "NPP_QUADRANGLE_ERROR";
383 case NPP_MEMORY_ALLOCATION_ERR:
384 return "NPP_MEMORY_ALLOCATION_ERROR";
386 case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
387 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
389 case NPP_INVALID_HOST_POINTER_ERROR:
390 return "NPP_INVALID_HOST_POINTER_ERROR";
392 case NPP_INVALID_DEVICE_POINTER_ERROR:
393 return "NPP_INVALID_DEVICE_POINTER_ERROR";
396 case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
397 return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
399 case NPP_TEXTURE_BIND_ERROR:
400 return "NPP_TEXTURE_BIND_ERROR";
402 case NPP_WRONG_INTERSECTION_ROI_ERROR:
403 return "NPP_WRONG_INTERSECTION_ROI_ERROR";
405 case NPP_NOT_EVEN_STEP_ERROR:
406 return "NPP_NOT_EVEN_STEP_ERROR";
408 case NPP_INTERPOLATION_ERROR:
409 return "NPP_INTERPOLATION_ERROR";
411 case NPP_RESIZE_FACTOR_ERROR:
412 return "NPP_RESIZE_FACTOR_ERROR";
414 case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
415 return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
417#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
419 case NPP_MEMFREE_ERR:
420 return "NPP_MEMFREE_ERR";
423 return "NPP_MEMSET_ERR";
426 return "NPP_MEMCPY_ERROR";
428 case NPP_MIRROR_FLIP_ERR:
429 return "NPP_MIRROR_FLIP_ERR";
432 case NPP_MEMFREE_ERROR:
433 return "NPP_MEMFREE_ERROR";
435 case NPP_MEMSET_ERROR:
436 return "NPP_MEMSET_ERROR";
438 case NPP_MEMCPY_ERROR:
439 return "NPP_MEMCPY_ERROR";
441 case NPP_MIRROR_FLIP_ERROR:
442 return "NPP_MIRROR_FLIP_ERROR";
445 case NPP_ALIGNMENT_ERROR:
446 return "NPP_ALIGNMENT_ERROR";
449 return "NPP_STEP_ERROR";
452 return "NPP_SIZE_ERROR";
454 case NPP_NULL_POINTER_ERROR:
455 return "NPP_NULL_POINTER_ERROR";
457 case NPP_CUDA_KERNEL_EXECUTION_ERROR:
458 return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
460 case NPP_NOT_IMPLEMENTED_ERROR:
461 return "NPP_NOT_IMPLEMENTED_ERROR";
467 return "NPP_SUCCESS";
469 case NPP_WRONG_INTERSECTION_QUAD_WARNING:
470 return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
472 case NPP_MISALIGNED_DST_ROI_WARNING:
473 return "NPP_MISALIGNED_DST_ROI_WARNING";
475 case NPP_AFFINE_QUAD_INCORRECT_WARNING:
476 return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
478 case NPP_DOUBLE_SIZE_WARNING:
479 return "NPP_DOUBLE_SIZE_WARNING";
481 case NPP_WRONG_INTERSECTION_ROI_WARNING:
482 return "NPP_WRONG_INTERSECTION_ROI_WARNING";
484#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
486 case NPP_LUT_PALETTE_BITSIZE_ERROR:
487 return "NPP_LUT_PALETTE_BITSIZE_ERROR";
489 case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
490 return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
492 case NPP_QUALITY_INDEX_ERROR:
493 return "NPP_QUALITY_INDEX_ERROR";
495 case NPP_CHANNEL_ORDER_ERROR:
496 return "NPP_CHANNEL_ORDER_ERROR";
498 case NPP_ZERO_MASK_VALUE_ERROR:
499 return "NPP_ZERO_MASK_VALUE_ERROR";
501 case NPP_NUMBER_OF_CHANNELS_ERROR:
502 return "NPP_NUMBER_OF_CHANNELS_ERROR";
505 return "NPP_COI_ERROR";
507 case NPP_DIVISOR_ERROR:
508 return "NPP_DIVISOR_ERROR";
510 case NPP_CHANNEL_ERROR:
511 return "NPP_CHANNEL_ERROR";
513 case NPP_STRIDE_ERROR:
514 return "NPP_STRIDE_ERROR";
516 case NPP_ANCHOR_ERROR:
517 return "NPP_ANCHOR_ERROR";
519 case NPP_MASK_SIZE_ERROR:
520 return "NPP_MASK_SIZE_ERROR";
522 case NPP_MOMENT_00_ZERO_ERROR:
523 return "NPP_MOMENT_00_ZERO_ERROR";
525 case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
526 return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
528 case NPP_THRESHOLD_ERROR:
529 return "NPP_THRESHOLD_ERROR";
531 case NPP_CONTEXT_MATCH_ERROR:
532 return "NPP_CONTEXT_MATCH_ERROR";
534 case NPP_FFT_FLAG_ERROR:
535 return "NPP_FFT_FLAG_ERROR";
537 case NPP_FFT_ORDER_ERROR:
538 return "NPP_FFT_ORDER_ERROR";
540 case NPP_SCALE_RANGE_ERROR:
541 return "NPP_SCALE_RANGE_ERROR";
543 case NPP_DATA_TYPE_ERROR:
544 return "NPP_DATA_TYPE_ERROR";
546 case NPP_OUT_OFF_RANGE_ERROR:
547 return "NPP_OUT_OFF_RANGE_ERROR";
549 case NPP_DIVIDE_BY_ZERO_ERROR:
550 return "NPP_DIVIDE_BY_ZERO_ERROR";
552 case NPP_RANGE_ERROR:
553 return "NPP_RANGE_ERROR";
555 case NPP_NO_MEMORY_ERROR:
556 return "NPP_NO_MEMORY_ERROR";
558 case NPP_ERROR_RESERVED:
559 return "NPP_ERROR_RESERVED";
561 case NPP_NO_OPERATION_WARNING:
562 return "NPP_NO_OPERATION_WARNING";
564 case NPP_DIVIDE_BY_ZERO_WARNING:
565 return "NPP_DIVIDE_BY_ZERO_WARNING";
568#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
570 case NPP_OVERFLOW_ERROR:
571 return "NPP_OVERFLOW_ERROR";
573 case NPP_CORRUPTED_DATA_ERROR:
574 return "NPP_CORRUPTED_DATA_ERROR";
586 fprintf(stderr,
"CUDA error at %s:%d code=%d(%s) \"%s\" \n",
file, line,
587 static_cast<unsigned int>(result), _cudaGetErrorEnum(result),
func);
592#ifdef __DRIVER_TYPES_H__
595#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
598#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
600inline void __getLastCudaError(
const char *errorMessage,
const char *
file,
602 cudaError_t err = cudaGetLastError();
604 if (cudaSuccess != err) {
606 "%s(%i) : getLastCudaError() CUDA error :"
608 file, line, errorMessage,
static_cast<int>(err),
609 cudaGetErrorString(err));
616#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__)
618inline void __printLastCudaError(
const char *errorMessage,
const char *
file,
620 cudaError_t err = cudaGetLastError();
622 if (cudaSuccess != err) {
624 "%s(%i) : getLastCudaError() CUDA error :"
626 file, line, errorMessage,
static_cast<int>(err),
627 cudaGetErrorString(err));
633#define MAX(a, b) (a > b ? a : b)
638 return (value >= 0 ?
static_cast<int>(value + 0.5)
639 :
static_cast<int>(value - 0.5));
652 sSMtoCores nGpuArchCoresPerSM[] = {
673 while (nGpuArchCoresPerSM[index].SM != -1) {
674 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
675 return nGpuArchCoresPerSM[index].Cores;
684 "MapSMtoCores for SM %d.%d is undefined."
685 " Default to use %d Cores/SM\n",
686 major, minor, nGpuArchCoresPerSM[index - 1].Cores);
687 return nGpuArchCoresPerSM[index - 1].Cores;
699 sSMtoArchName nGpuArchNameSM[] = {
715 {-1,
"Graphics Device"}};
719 while (nGpuArchNameSM[index].SM != -1) {
720 if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) {
721 return nGpuArchNameSM[index].name;
730 "MapSMtoArchName for SM %d.%d is undefined."
731 " Default to use %s\n",
732 major, minor, nGpuArchNameSM[index - 1].name);
733 return nGpuArchNameSM[index - 1].name;
737#ifdef __CUDA_RUNTIME_H__
739inline int gpuDeviceInit(
int devID) {
741 checkCudaErrors(cudaGetDeviceCount(&device_count));
743 if (device_count == 0) {
745 "gpuDeviceInit() CUDA error: "
746 "no devices supporting CUDA.\n");
754 if (devID > device_count - 1) {
755 fprintf(stderr,
"\n");
756 fprintf(stderr,
">> %d CUDA capable GPU device(s) detected. <<\n",
759 ">> gpuDeviceInit (-device=%d) is not a valid"
762 fprintf(stderr,
"\n");
766 int computeMode = -1, major = 0, minor = 0;
767 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID));
768 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
769 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
770 if (computeMode == cudaComputeModeProhibited) {
772 "Error: device is running in <Compute Mode "
773 "Prohibited>, no threads can use cudaSetDevice().\n");
778 fprintf(stderr,
"gpuDeviceInit(): GPU device does not support CUDA.\n");
782 checkCudaErrors(cudaSetDevice(devID));
789inline int gpuGetMaxGflopsDeviceId() {
790 int current_device = 0, sm_per_multiproc = 0;
791 int max_perf_device = 0;
792 int device_count = 0;
793 int devices_prohibited = 0;
795 uint64_t max_compute_perf = 0;
796 checkCudaErrors(cudaGetDeviceCount(&device_count));
798 if (device_count == 0) {
800 "gpuGetMaxGflopsDeviceId() CUDA error:"
801 " no devices supporting CUDA.\n");
808 while (current_device < device_count) {
809 int computeMode = -1, major = 0, minor = 0;
810 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
811 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
812 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
816 if (computeMode != cudaComputeModeProhibited) {
817 if (major == 9999 && minor == 9999) {
818 sm_per_multiproc = 1;
823 int multiProcessorCount = 0, clockRate = 0;
824 checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device));
825 cudaError_t result = cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device);
826 if (result != cudaSuccess) {
829 if(result == cudaErrorInvalidValue) {
833 fprintf(stderr,
"CUDA error at %s:%d code=%d(%s) \n", __FILE__, __LINE__,
834 static_cast<unsigned int>(result), _cudaGetErrorEnum(result));
838 uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate;
840 if (compute_perf > max_compute_perf) {
841 max_compute_perf = compute_perf;
842 max_perf_device = current_device;
845 devices_prohibited++;
851 if (devices_prohibited == device_count) {
853 "gpuGetMaxGflopsDeviceId() CUDA error:"
854 " all devices have compute mode prohibited.\n");
858 return max_perf_device;
862inline int findCudaDevice(
int argc,
const char **argv) {
870 printf(
"Invalid command line parameter\n ");
873 devID = gpuDeviceInit(devID);
876 printf(
"exiting...\n");
882 devID = gpuGetMaxGflopsDeviceId();
883 checkCudaErrors(cudaSetDevice(devID));
884 int major = 0, minor = 0;
885 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
886 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
887 printf(
"GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
895inline int findIntegratedGPU() {
896 int current_device = 0;
897 int device_count = 0;
898 int devices_prohibited = 0;
900 checkCudaErrors(cudaGetDeviceCount(&device_count));
902 if (device_count == 0) {
903 fprintf(stderr,
"CUDA error: no devices supporting CUDA.\n");
908 while (current_device < device_count) {
909 int computeMode = -1, integrated = -1;
910 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
911 checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device));
914 if (integrated && (computeMode != cudaComputeModeProhibited)) {
915 checkCudaErrors(cudaSetDevice(current_device));
917 int major = 0, minor = 0;
918 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
919 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
920 printf(
"GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
923 return current_device;
925 devices_prohibited++;
931 if (devices_prohibited == device_count) {
934 " No GLES-CUDA Interop capable GPU found.\n");
942inline bool checkCudaCapabilities(
int major_version,
int minor_version) {
944 int major = 0, minor = 0;
946 checkCudaErrors(cudaGetDevice(&dev));
947 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev));
948 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev));
950 if ((major > major_version) ||
951 (major == major_version &&
952 minor >= minor_version)) {
953 printf(
" Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
958 " No GPU device was found that can support "
959 "CUDA compute capability %d.%d.\n",
960 major_version, minor_version);
#define T
Definition exp.cpp:237
int _ConvertSMVer2Cores(int major, int minor)
Definition helper_cuda.h:643
const char * _ConvertSMVer2ArchName(int major, int minor)
Definition helper_cuda.h:690
void check(T result, char const *const func, const char *const file, int const line)
Definition helper_cuda.h:583
int ftoi(float value)
Definition helper_cuda.h:637
bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
Definition helper_string.h:127
int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
Definition helper_string.h:180
file(GLOB ATen_CORE_SRCS "*.cpp") set(ATen_CPU_SRCS $
Definition CMakeLists.txt:1
double func(const Vec3 &r, const std::vector< Vec3 > &R, const std::vector< double > &a, const std::vector< double > &n)
Definition test_partition.cpp:50