31#ifndef COMMON_HELPER_CUDA_H_
32#define COMMON_HELPER_CUDA_H_
54#ifdef __DRIVER_TYPES_H__
55static const char *_cudaGetErrorEnum(cudaError_t error) {
56 return cudaGetErrorName(error);
62static const char *_cudaGetErrorEnum(CUresult error) {
63 static char unknown[] =
"<unknown>";
64 const char *ret = NULL;
65 cuGetErrorName(error, &ret);
66 return ret ? ret : unknown;
72static const char *_cudaGetErrorEnum(cublasStatus_t error) {
74 case CUBLAS_STATUS_SUCCESS:
75 return "CUBLAS_STATUS_SUCCESS";
77 case CUBLAS_STATUS_NOT_INITIALIZED:
78 return "CUBLAS_STATUS_NOT_INITIALIZED";
80 case CUBLAS_STATUS_ALLOC_FAILED:
81 return "CUBLAS_STATUS_ALLOC_FAILED";
83 case CUBLAS_STATUS_INVALID_VALUE:
84 return "CUBLAS_STATUS_INVALID_VALUE";
86 case CUBLAS_STATUS_ARCH_MISMATCH:
87 return "CUBLAS_STATUS_ARCH_MISMATCH";
89 case CUBLAS_STATUS_MAPPING_ERROR:
90 return "CUBLAS_STATUS_MAPPING_ERROR";
92 case CUBLAS_STATUS_EXECUTION_FAILED:
93 return "CUBLAS_STATUS_EXECUTION_FAILED";
95 case CUBLAS_STATUS_INTERNAL_ERROR:
96 return "CUBLAS_STATUS_INTERNAL_ERROR";
98 case CUBLAS_STATUS_NOT_SUPPORTED:
99 return "CUBLAS_STATUS_NOT_SUPPORTED";
101 case CUBLAS_STATUS_LICENSE_ERROR:
102 return "CUBLAS_STATUS_LICENSE_ERROR";
111static const char *_cudaGetErrorEnum(cufftResult error) {
118static const char *_cudaGetErrorEnum(cusparseStatus_t error) {
120 case CUSPARSE_STATUS_SUCCESS:
121 return "CUSPARSE_STATUS_SUCCESS";
123 case CUSPARSE_STATUS_NOT_INITIALIZED:
124 return "CUSPARSE_STATUS_NOT_INITIALIZED";
126 case CUSPARSE_STATUS_ALLOC_FAILED:
127 return "CUSPARSE_STATUS_ALLOC_FAILED";
129 case CUSPARSE_STATUS_INVALID_VALUE:
130 return "CUSPARSE_STATUS_INVALID_VALUE";
132 case CUSPARSE_STATUS_ARCH_MISMATCH:
133 return "CUSPARSE_STATUS_ARCH_MISMATCH";
135 case CUSPARSE_STATUS_MAPPING_ERROR:
136 return "CUSPARSE_STATUS_MAPPING_ERROR";
138 case CUSPARSE_STATUS_EXECUTION_FAILED:
139 return "CUSPARSE_STATUS_EXECUTION_FAILED";
141 case CUSPARSE_STATUS_INTERNAL_ERROR:
142 return "CUSPARSE_STATUS_INTERNAL_ERROR";
144 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
145 return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
152#ifdef CUSOLVER_COMMON_H_
154static const char *_cudaGetErrorEnum(cusolverStatus_t error) {
156 case CUSOLVER_STATUS_SUCCESS:
157 return "CUSOLVER_STATUS_SUCCESS";
158 case CUSOLVER_STATUS_NOT_INITIALIZED:
159 return "CUSOLVER_STATUS_NOT_INITIALIZED";
160 case CUSOLVER_STATUS_ALLOC_FAILED:
161 return "CUSOLVER_STATUS_ALLOC_FAILED";
162 case CUSOLVER_STATUS_INVALID_VALUE:
163 return "CUSOLVER_STATUS_INVALID_VALUE";
164 case CUSOLVER_STATUS_ARCH_MISMATCH:
165 return "CUSOLVER_STATUS_ARCH_MISMATCH";
166 case CUSOLVER_STATUS_MAPPING_ERROR:
167 return "CUSOLVER_STATUS_MAPPING_ERROR";
168 case CUSOLVER_STATUS_EXECUTION_FAILED:
169 return "CUSOLVER_STATUS_EXECUTION_FAILED";
170 case CUSOLVER_STATUS_INTERNAL_ERROR:
171 return "CUSOLVER_STATUS_INTERNAL_ERROR";
172 case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
173 return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
174 case CUSOLVER_STATUS_NOT_SUPPORTED:
175 return "CUSOLVER_STATUS_NOT_SUPPORTED ";
176 case CUSOLVER_STATUS_ZERO_PIVOT:
177 return "CUSOLVER_STATUS_ZERO_PIVOT";
178 case CUSOLVER_STATUS_INVALID_LICENSE:
179 return "CUSOLVER_STATUS_INVALID_LICENSE";
188static const char *_cudaGetErrorEnum(curandStatus_t error) {
190 case CURAND_STATUS_SUCCESS:
191 return "CURAND_STATUS_SUCCESS";
193 case CURAND_STATUS_VERSION_MISMATCH:
194 return "CURAND_STATUS_VERSION_MISMATCH";
196 case CURAND_STATUS_NOT_INITIALIZED:
197 return "CURAND_STATUS_NOT_INITIALIZED";
199 case CURAND_STATUS_ALLOCATION_FAILED:
200 return "CURAND_STATUS_ALLOCATION_FAILED";
202 case CURAND_STATUS_TYPE_ERROR:
203 return "CURAND_STATUS_TYPE_ERROR";
205 case CURAND_STATUS_OUT_OF_RANGE:
206 return "CURAND_STATUS_OUT_OF_RANGE";
208 case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
209 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
211 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
212 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
214 case CURAND_STATUS_LAUNCH_FAILURE:
215 return "CURAND_STATUS_LAUNCH_FAILURE";
217 case CURAND_STATUS_PREEXISTING_FAILURE:
218 return "CURAND_STATUS_PREEXISTING_FAILURE";
220 case CURAND_STATUS_INITIALIZATION_FAILED:
221 return "CURAND_STATUS_INITIALIZATION_FAILED";
223 case CURAND_STATUS_ARCH_MISMATCH:
224 return "CURAND_STATUS_ARCH_MISMATCH";
226 case CURAND_STATUS_INTERNAL_ERROR:
227 return "CURAND_STATUS_INTERNAL_ERROR";
236static const char *_cudaGetErrorEnum(nvjpegStatus_t error) {
238 case NVJPEG_STATUS_SUCCESS:
239 return "NVJPEG_STATUS_SUCCESS";
241 case NVJPEG_STATUS_NOT_INITIALIZED:
242 return "NVJPEG_STATUS_NOT_INITIALIZED";
244 case NVJPEG_STATUS_INVALID_PARAMETER:
245 return "NVJPEG_STATUS_INVALID_PARAMETER";
247 case NVJPEG_STATUS_BAD_JPEG:
248 return "NVJPEG_STATUS_BAD_JPEG";
250 case NVJPEG_STATUS_JPEG_NOT_SUPPORTED:
251 return "NVJPEG_STATUS_JPEG_NOT_SUPPORTED";
253 case NVJPEG_STATUS_ALLOCATOR_FAILURE:
254 return "NVJPEG_STATUS_ALLOCATOR_FAILURE";
256 case NVJPEG_STATUS_EXECUTION_FAILED:
257 return "NVJPEG_STATUS_EXECUTION_FAILED";
259 case NVJPEG_STATUS_ARCH_MISMATCH:
260 return "NVJPEG_STATUS_ARCH_MISMATCH";
262 case NVJPEG_STATUS_INTERNAL_ERROR:
263 return "NVJPEG_STATUS_INTERNAL_ERROR";
272static const char *_cudaGetErrorEnum(NppStatus error) {
274 case NPP_NOT_SUPPORTED_MODE_ERROR:
275 return "NPP_NOT_SUPPORTED_MODE_ERROR";
277 case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
278 return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
280 case NPP_RESIZE_NO_OPERATION_ERROR:
281 return "NPP_RESIZE_NO_OPERATION_ERROR";
283 case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
284 return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
286#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
288 case NPP_BAD_ARG_ERROR:
289 return "NPP_BAD_ARGUMENT_ERROR";
291 case NPP_COEFF_ERROR:
292 return "NPP_COEFFICIENT_ERROR";
295 return "NPP_RECTANGLE_ERROR";
298 return "NPP_QUADRANGLE_ERROR";
300 case NPP_MEM_ALLOC_ERR:
301 return "NPP_MEMORY_ALLOCATION_ERROR";
303 case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
304 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
306 case NPP_INVALID_INPUT:
307 return "NPP_INVALID_INPUT";
309 case NPP_POINTER_ERROR:
310 return "NPP_POINTER_ERROR";
313 return "NPP_WARNING";
315 case NPP_ODD_ROI_WARNING:
316 return "NPP_ODD_ROI_WARNING";
320 case NPP_BAD_ARGUMENT_ERROR:
321 return "NPP_BAD_ARGUMENT_ERROR";
323 case NPP_COEFFICIENT_ERROR:
324 return "NPP_COEFFICIENT_ERROR";
326 case NPP_RECTANGLE_ERROR:
327 return "NPP_RECTANGLE_ERROR";
329 case NPP_QUADRANGLE_ERROR:
330 return "NPP_QUADRANGLE_ERROR";
332 case NPP_MEMORY_ALLOCATION_ERR:
333 return "NPP_MEMORY_ALLOCATION_ERROR";
335 case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
336 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
338 case NPP_INVALID_HOST_POINTER_ERROR:
339 return "NPP_INVALID_HOST_POINTER_ERROR";
341 case NPP_INVALID_DEVICE_POINTER_ERROR:
342 return "NPP_INVALID_DEVICE_POINTER_ERROR";
345 case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
346 return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
348 case NPP_TEXTURE_BIND_ERROR:
349 return "NPP_TEXTURE_BIND_ERROR";
351 case NPP_WRONG_INTERSECTION_ROI_ERROR:
352 return "NPP_WRONG_INTERSECTION_ROI_ERROR";
354 case NPP_NOT_EVEN_STEP_ERROR:
355 return "NPP_NOT_EVEN_STEP_ERROR";
357 case NPP_INTERPOLATION_ERROR:
358 return "NPP_INTERPOLATION_ERROR";
360 case NPP_RESIZE_FACTOR_ERROR:
361 return "NPP_RESIZE_FACTOR_ERROR";
363 case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
364 return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
366#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
368 case NPP_MEMFREE_ERR:
369 return "NPP_MEMFREE_ERR";
372 return "NPP_MEMSET_ERR";
375 return "NPP_MEMCPY_ERROR";
377 case NPP_MIRROR_FLIP_ERR:
378 return "NPP_MIRROR_FLIP_ERR";
381 case NPP_MEMFREE_ERROR:
382 return "NPP_MEMFREE_ERROR";
384 case NPP_MEMSET_ERROR:
385 return "NPP_MEMSET_ERROR";
387 case NPP_MEMCPY_ERROR:
388 return "NPP_MEMCPY_ERROR";
390 case NPP_MIRROR_FLIP_ERROR:
391 return "NPP_MIRROR_FLIP_ERROR";
394 case NPP_ALIGNMENT_ERROR:
395 return "NPP_ALIGNMENT_ERROR";
398 return "NPP_STEP_ERROR";
401 return "NPP_SIZE_ERROR";
403 case NPP_NULL_POINTER_ERROR:
404 return "NPP_NULL_POINTER_ERROR";
406 case NPP_CUDA_KERNEL_EXECUTION_ERROR:
407 return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
409 case NPP_NOT_IMPLEMENTED_ERROR:
410 return "NPP_NOT_IMPLEMENTED_ERROR";
416 return "NPP_SUCCESS";
418 case NPP_WRONG_INTERSECTION_QUAD_WARNING:
419 return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
421 case NPP_MISALIGNED_DST_ROI_WARNING:
422 return "NPP_MISALIGNED_DST_ROI_WARNING";
424 case NPP_AFFINE_QUAD_INCORRECT_WARNING:
425 return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
427 case NPP_DOUBLE_SIZE_WARNING:
428 return "NPP_DOUBLE_SIZE_WARNING";
430 case NPP_WRONG_INTERSECTION_ROI_WARNING:
431 return "NPP_WRONG_INTERSECTION_ROI_WARNING";
433#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
435 case NPP_LUT_PALETTE_BITSIZE_ERROR:
436 return "NPP_LUT_PALETTE_BITSIZE_ERROR";
438 case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
439 return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
441 case NPP_QUALITY_INDEX_ERROR:
442 return "NPP_QUALITY_INDEX_ERROR";
444 case NPP_CHANNEL_ORDER_ERROR:
445 return "NPP_CHANNEL_ORDER_ERROR";
447 case NPP_ZERO_MASK_VALUE_ERROR:
448 return "NPP_ZERO_MASK_VALUE_ERROR";
450 case NPP_NUMBER_OF_CHANNELS_ERROR:
451 return "NPP_NUMBER_OF_CHANNELS_ERROR";
454 return "NPP_COI_ERROR";
456 case NPP_DIVISOR_ERROR:
457 return "NPP_DIVISOR_ERROR";
459 case NPP_CHANNEL_ERROR:
460 return "NPP_CHANNEL_ERROR";
462 case NPP_STRIDE_ERROR:
463 return "NPP_STRIDE_ERROR";
465 case NPP_ANCHOR_ERROR:
466 return "NPP_ANCHOR_ERROR";
468 case NPP_MASK_SIZE_ERROR:
469 return "NPP_MASK_SIZE_ERROR";
471 case NPP_MOMENT_00_ZERO_ERROR:
472 return "NPP_MOMENT_00_ZERO_ERROR";
474 case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
475 return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
477 case NPP_THRESHOLD_ERROR:
478 return "NPP_THRESHOLD_ERROR";
480 case NPP_CONTEXT_MATCH_ERROR:
481 return "NPP_CONTEXT_MATCH_ERROR";
483 case NPP_FFT_FLAG_ERROR:
484 return "NPP_FFT_FLAG_ERROR";
486 case NPP_FFT_ORDER_ERROR:
487 return "NPP_FFT_ORDER_ERROR";
489 case NPP_SCALE_RANGE_ERROR:
490 return "NPP_SCALE_RANGE_ERROR";
492 case NPP_DATA_TYPE_ERROR:
493 return "NPP_DATA_TYPE_ERROR";
495 case NPP_OUT_OFF_RANGE_ERROR:
496 return "NPP_OUT_OFF_RANGE_ERROR";
498 case NPP_DIVIDE_BY_ZERO_ERROR:
499 return "NPP_DIVIDE_BY_ZERO_ERROR";
501 case NPP_RANGE_ERROR:
502 return "NPP_RANGE_ERROR";
504 case NPP_NO_MEMORY_ERROR:
505 return "NPP_NO_MEMORY_ERROR";
507 case NPP_ERROR_RESERVED:
508 return "NPP_ERROR_RESERVED";
510 case NPP_NO_OPERATION_WARNING:
511 return "NPP_NO_OPERATION_WARNING";
513 case NPP_DIVIDE_BY_ZERO_WARNING:
514 return "NPP_DIVIDE_BY_ZERO_WARNING";
517#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
519 case NPP_OVERFLOW_ERROR:
520 return "NPP_OVERFLOW_ERROR";
522 case NPP_CORRUPTED_DATA_ERROR:
523 return "NPP_CORRUPTED_DATA_ERROR";
535 fprintf(stderr,
"CUDA error at %s:%d code=%d(%s) \"%s\" \n",
file, line,
536 static_cast<unsigned int>(result), _cudaGetErrorEnum(result),
func);
541#ifdef __DRIVER_TYPES_H__
544#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
547#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
549inline void __getLastCudaError(
const char *errorMessage,
const char *
file,
551 cudaError_t err = cudaGetLastError();
553 if (cudaSuccess != err) {
555 "%s(%i) : getLastCudaError() CUDA error :"
557 file, line, errorMessage,
static_cast<int>(err),
558 cudaGetErrorString(err));
565#define printLastCudaError(msg) __printLastCudaError(msg, __FILE__, __LINE__)
567inline void __printLastCudaError(
const char *errorMessage,
const char *
file,
569 cudaError_t err = cudaGetLastError();
571 if (cudaSuccess != err) {
573 "%s(%i) : getLastCudaError() CUDA error :"
575 file, line, errorMessage,
static_cast<int>(err),
576 cudaGetErrorString(err));
582#define MAX(a, b) (a > b ? a : b)
587 return (value >= 0 ?
static_cast<int>(value + 0.5)
588 :
static_cast<int>(value - 0.5));
601 sSMtoCores nGpuArchCoresPerSM[] = {
622 while (nGpuArchCoresPerSM[index].SM != -1) {
623 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
624 return nGpuArchCoresPerSM[index].Cores;
633 "MapSMtoCores for SM %d.%d is undefined."
634 " Default to use %d Cores/SM\n",
635 major, minor, nGpuArchCoresPerSM[index - 1].Cores);
636 return nGpuArchCoresPerSM[index - 1].Cores;
648 sSMtoArchName nGpuArchNameSM[] = {
664 {-1,
"Graphics Device"}};
668 while (nGpuArchNameSM[index].SM != -1) {
669 if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) {
670 return nGpuArchNameSM[index].name;
679 "MapSMtoArchName for SM %d.%d is undefined."
680 " Default to use %s\n",
681 major, minor, nGpuArchNameSM[index - 1].name);
682 return nGpuArchNameSM[index - 1].name;
686#ifdef __CUDA_RUNTIME_H__
688inline int gpuDeviceInit(
int devID) {
690 checkCudaErrors(cudaGetDeviceCount(&device_count));
692 if (device_count == 0) {
694 "gpuDeviceInit() CUDA error: "
695 "no devices supporting CUDA.\n");
703 if (devID > device_count - 1) {
704 fprintf(stderr,
"\n");
705 fprintf(stderr,
">> %d CUDA capable GPU device(s) detected. <<\n",
708 ">> gpuDeviceInit (-device=%d) is not a valid"
711 fprintf(stderr,
"\n");
715 int computeMode = -1, major = 0, minor = 0;
716 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, devID));
717 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
718 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
719 if (computeMode == cudaComputeModeProhibited) {
721 "Error: device is running in <Compute Mode "
722 "Prohibited>, no threads can use cudaSetDevice().\n");
727 fprintf(stderr,
"gpuDeviceInit(): GPU device does not support CUDA.\n");
731 checkCudaErrors(cudaSetDevice(devID));
738inline int gpuGetMaxGflopsDeviceId() {
739 int current_device = 0, sm_per_multiproc = 0;
740 int max_perf_device = 0;
741 int device_count = 0;
742 int devices_prohibited = 0;
744 uint64_t max_compute_perf = 0;
745 checkCudaErrors(cudaGetDeviceCount(&device_count));
747 if (device_count == 0) {
749 "gpuGetMaxGflopsDeviceId() CUDA error:"
750 " no devices supporting CUDA.\n");
757 while (current_device < device_count) {
758 int computeMode = -1, major = 0, minor = 0;
759 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
760 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
761 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
765 if (computeMode != cudaComputeModeProhibited) {
766 if (major == 9999 && minor == 9999) {
767 sm_per_multiproc = 1;
772 int multiProcessorCount = 0, clockRate = 0;
773 checkCudaErrors(cudaDeviceGetAttribute(&multiProcessorCount, cudaDevAttrMultiProcessorCount, current_device));
774 cudaError_t result = cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, current_device);
775 if (result != cudaSuccess) {
778 if(result == cudaErrorInvalidValue) {
782 fprintf(stderr,
"CUDA error at %s:%d code=%d(%s) \n", __FILE__, __LINE__,
783 static_cast<unsigned int>(result), _cudaGetErrorEnum(result));
787 uint64_t compute_perf = (uint64_t)multiProcessorCount * sm_per_multiproc * clockRate;
789 if (compute_perf > max_compute_perf) {
790 max_compute_perf = compute_perf;
791 max_perf_device = current_device;
794 devices_prohibited++;
800 if (devices_prohibited == device_count) {
802 "gpuGetMaxGflopsDeviceId() CUDA error:"
803 " all devices have compute mode prohibited.\n");
807 return max_perf_device;
811inline int findCudaDevice(
int argc,
const char **argv) {
819 printf(
"Invalid command line parameter\n ");
822 devID = gpuDeviceInit(devID);
825 printf(
"exiting...\n");
831 devID = gpuGetMaxGflopsDeviceId();
832 checkCudaErrors(cudaSetDevice(devID));
833 int major = 0, minor = 0;
834 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, devID));
835 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, devID));
836 printf(
"GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
844inline int findIntegratedGPU() {
845 int current_device = 0;
846 int device_count = 0;
847 int devices_prohibited = 0;
849 checkCudaErrors(cudaGetDeviceCount(&device_count));
851 if (device_count == 0) {
852 fprintf(stderr,
"CUDA error: no devices supporting CUDA.\n");
857 while (current_device < device_count) {
858 int computeMode = -1, integrated = -1;
859 checkCudaErrors(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, current_device));
860 checkCudaErrors(cudaDeviceGetAttribute(&integrated, cudaDevAttrIntegrated, current_device));
863 if (integrated && (computeMode != cudaComputeModeProhibited)) {
864 checkCudaErrors(cudaSetDevice(current_device));
866 int major = 0, minor = 0;
867 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, current_device));
868 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, current_device));
869 printf(
"GPU Device %d: \"%s\" with compute capability %d.%d\n\n",
872 return current_device;
874 devices_prohibited++;
880 if (devices_prohibited == device_count) {
883 " No GLES-CUDA Interop capable GPU found.\n");
891inline bool checkCudaCapabilities(
int major_version,
int minor_version) {
893 int major = 0, minor = 0;
895 checkCudaErrors(cudaGetDevice(&dev));
896 checkCudaErrors(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, dev));
897 checkCudaErrors(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, dev));
899 if ((major > major_version) ||
900 (major == major_version &&
901 minor >= minor_version)) {
902 printf(
" Device %d: <%16s >, Compute SM %d.%d detected\n", dev,
907 " No GPU device was found that can support "
908 "CUDA compute capability %d.%d.\n",
909 major_version, minor_version);
Compatibility layer for CUDA and NVTX headers across different CUDA Toolkit versions.
#define T
Definition exp.cpp:237
int _ConvertSMVer2Cores(int major, int minor)
Definition helper_cuda.h:592
const char * _ConvertSMVer2ArchName(int major, int minor)
Definition helper_cuda.h:639
void check(T result, char const *const func, const char *const file, int const line)
Definition helper_cuda.h:532
int ftoi(float value)
Definition helper_cuda.h:586
bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
Definition helper_string.h:127
int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
Definition helper_string.h:180
const char * cufftGetErrorStringCompat(cufftResult_t error)
Provides a cross-CUDA-version string conversion for cuFFT error codes.
Definition cuda_compat.cpp:68
file(GLOB ATen_CORE_SRCS "*.cpp") set(ATen_CPU_SRCS $
Definition CMakeLists.txt:1
double func(const Vec3 &r, const std::vector< Vec3 > &R, const std::vector< double > &a, const std::vector< double > &n)
Definition test_partition.cpp:50