COVISE Core
helper_cuda.h
Go to the documentation of this file.
1/* This file is part of COVISE.
2
3 You can use it under the terms of the GNU Lesser General Public License
4 version 2.1 or later, see lgpl-2.1.txt.
5
6 * License: LGPL 2+ */
7
20// These are CUDA Helper functions for initialization and error checking
21
22#ifndef HELPER_CUDA_H
23#define HELPER_CUDA_H
24
25#pragma once
26
27#include <stdlib.h>
28#include <stdio.h>
29#include <string.h>
30
31#include "helper_string.h"
32
33/*
34inline void __ExitInTime(int seconds)
35{
36 fprintf(stdout, "> exiting in %d seconds: ", seconds);
37 fflush(stdout);
38 time_t t;
39 int count;
40
41 for (t=time(0)+seconds, count=seconds; time(0) < t; count--) {
42 fprintf(stdout, "%d...", count);
43#if defined(WIN32)
44 Sleep(1000);
45#else
46 sleep(1);
47#endif
48 }
49
50 fprintf(stdout,"done!\n\n");
51 fflush(stdout);
52}
53
54#define EXIT_TIME_DELAY 2
55
56inline void EXIT_DELAY(int return_code)
57{
58 __ExitInTime(EXIT_TIME_DELAY);
59 exit(return_code);
60}
61*/
62
63#ifndef EXIT_WAIVED
64#define EXIT_WAIVED 2
65#endif
66
67// Note, it is required that your SDK sample to include the proper header files, please
68// refer the CUDA examples for examples of the needed CUDA headers, which may change depending
69// on which CUDA functions are used.
70
71// CUDA Runtime error messages
72#ifdef __DRIVER_TYPES_H__
73static const char *_cudaGetErrorEnum(cudaError_t error)
74{
75 switch (error)
76 {
77 case cudaSuccess:
78 return "cudaSuccess";
79
80 case cudaErrorMissingConfiguration:
81 return "cudaErrorMissingConfiguration";
82
83 case cudaErrorMemoryAllocation:
84 return "cudaErrorMemoryAllocation";
85
86 case cudaErrorInitializationError:
87 return "cudaErrorInitializationError";
88
89 case cudaErrorLaunchFailure:
90 return "cudaErrorLaunchFailure";
91
92 case cudaErrorPriorLaunchFailure:
93 return "cudaErrorPriorLaunchFailure";
94
95 case cudaErrorLaunchTimeout:
96 return "cudaErrorLaunchTimeout";
97
98 case cudaErrorLaunchOutOfResources:
99 return "cudaErrorLaunchOutOfResources";
100
101 case cudaErrorInvalidDeviceFunction:
102 return "cudaErrorInvalidDeviceFunction";
103
104 case cudaErrorInvalidConfiguration:
105 return "cudaErrorInvalidConfiguration";
106
107 case cudaErrorInvalidDevice:
108 return "cudaErrorInvalidDevice";
109
110 case cudaErrorInvalidValue:
111 return "cudaErrorInvalidValue";
112
113 case cudaErrorInvalidPitchValue:
114 return "cudaErrorInvalidPitchValue";
115
116 case cudaErrorInvalidSymbol:
117 return "cudaErrorInvalidSymbol";
118
119 case cudaErrorMapBufferObjectFailed:
120 return "cudaErrorMapBufferObjectFailed";
121
122 case cudaErrorUnmapBufferObjectFailed:
123 return "cudaErrorUnmapBufferObjectFailed";
124
125 case cudaErrorInvalidHostPointer:
126 return "cudaErrorInvalidHostPointer";
127
128 case cudaErrorInvalidDevicePointer:
129 return "cudaErrorInvalidDevicePointer";
130
131 case cudaErrorInvalidTexture:
132 return "cudaErrorInvalidTexture";
133
134 case cudaErrorInvalidTextureBinding:
135 return "cudaErrorInvalidTextureBinding";
136
137 case cudaErrorInvalidChannelDescriptor:
138 return "cudaErrorInvalidChannelDescriptor";
139
140 case cudaErrorInvalidMemcpyDirection:
141 return "cudaErrorInvalidMemcpyDirection";
142
143 case cudaErrorAddressOfConstant:
144 return "cudaErrorAddressOfConstant";
145
146 case cudaErrorTextureFetchFailed:
147 return "cudaErrorTextureFetchFailed";
148
149 case cudaErrorTextureNotBound:
150 return "cudaErrorTextureNotBound";
151
152 case cudaErrorSynchronizationError:
153 return "cudaErrorSynchronizationError";
154
155 case cudaErrorInvalidFilterSetting:
156 return "cudaErrorInvalidFilterSetting";
157
158 case cudaErrorInvalidNormSetting:
159 return "cudaErrorInvalidNormSetting";
160
161 case cudaErrorMixedDeviceExecution:
162 return "cudaErrorMixedDeviceExecution";
163
164 case cudaErrorCudartUnloading:
165 return "cudaErrorCudartUnloading";
166
167 case cudaErrorUnknown:
168 return "cudaErrorUnknown";
169
170 case cudaErrorNotYetImplemented:
171 return "cudaErrorNotYetImplemented";
172
173 case cudaErrorMemoryValueTooLarge:
174 return "cudaErrorMemoryValueTooLarge";
175
176 case cudaErrorInvalidResourceHandle:
177 return "cudaErrorInvalidResourceHandle";
178
179 case cudaErrorNotReady:
180 return "cudaErrorNotReady";
181
182 case cudaErrorInsufficientDriver:
183 return "cudaErrorInsufficientDriver";
184
185 case cudaErrorSetOnActiveProcess:
186 return "cudaErrorSetOnActiveProcess";
187
188 case cudaErrorInvalidSurface:
189 return "cudaErrorInvalidSurface";
190
191 case cudaErrorNoDevice:
192 return "cudaErrorNoDevice";
193
194 case cudaErrorECCUncorrectable:
195 return "cudaErrorECCUncorrectable";
196
197 case cudaErrorSharedObjectSymbolNotFound:
198 return "cudaErrorSharedObjectSymbolNotFound";
199
200 case cudaErrorSharedObjectInitFailed:
201 return "cudaErrorSharedObjectInitFailed";
202
203 case cudaErrorUnsupportedLimit:
204 return "cudaErrorUnsupportedLimit";
205
206 case cudaErrorDuplicateVariableName:
207 return "cudaErrorDuplicateVariableName";
208
209 case cudaErrorDuplicateTextureName:
210 return "cudaErrorDuplicateTextureName";
211
212 case cudaErrorDuplicateSurfaceName:
213 return "cudaErrorDuplicateSurfaceName";
214
215 case cudaErrorDevicesUnavailable:
216 return "cudaErrorDevicesUnavailable";
217
218 case cudaErrorInvalidKernelImage:
219 return "cudaErrorInvalidKernelImage";
220
221 case cudaErrorNoKernelImageForDevice:
222 return "cudaErrorNoKernelImageForDevice";
223
224 case cudaErrorIncompatibleDriverContext:
225 return "cudaErrorIncompatibleDriverContext";
226
227 case cudaErrorPeerAccessAlreadyEnabled:
228 return "cudaErrorPeerAccessAlreadyEnabled";
229
230 case cudaErrorPeerAccessNotEnabled:
231 return "cudaErrorPeerAccessNotEnabled";
232
233 case cudaErrorDeviceAlreadyInUse:
234 return "cudaErrorDeviceAlreadyInUse";
235
236 case cudaErrorProfilerDisabled:
237 return "cudaErrorProfilerDisabled";
238
239 case cudaErrorProfilerNotInitialized:
240 return "cudaErrorProfilerNotInitialized";
241
242 case cudaErrorProfilerAlreadyStarted:
243 return "cudaErrorProfilerAlreadyStarted";
244
245 case cudaErrorProfilerAlreadyStopped:
246 return "cudaErrorProfilerAlreadyStopped";
247
248#if __CUDA_API_VERSION >= 0x4000
249
250 case cudaErrorAssert:
251 return "cudaErrorAssert";
252
253 case cudaErrorTooManyPeers:
254 return "cudaErrorTooManyPeers";
255
256 case cudaErrorHostMemoryAlreadyRegistered:
257 return "cudaErrorHostMemoryAlreadyRegistered";
258
259 case cudaErrorHostMemoryNotRegistered:
260 return "cudaErrorHostMemoryNotRegistered";
261#endif
262
263 case cudaErrorStartupFailure:
264 return "cudaErrorStartupFailure";
265
266 case cudaErrorApiFailureBase:
267 return "cudaErrorApiFailureBase";
268
269 default:
270 break;
271 }
272
273 return "<unknown>";
274}
275#endif
276
277#ifdef __cuda_cuda_h__
278// CUDA Driver API errors
279static const char *_cudaGetErrorEnum(CUresult error)
280{
281 switch (error)
282 {
283 case CUDA_SUCCESS:
284 return "CUDA_SUCCESS";
285
286 case CUDA_ERROR_INVALID_VALUE:
287 return "CUDA_ERROR_INVALID_VALUE";
288
289 case CUDA_ERROR_OUT_OF_MEMORY:
290 return "CUDA_ERROR_OUT_OF_MEMORY";
291
292 case CUDA_ERROR_NOT_INITIALIZED:
293 return "CUDA_ERROR_NOT_INITIALIZED";
294
295 case CUDA_ERROR_DEINITIALIZED:
296 return "CUDA_ERROR_DEINITIALIZED";
297
298 case CUDA_ERROR_PROFILER_DISABLED:
299 return "CUDA_ERROR_PROFILER_DISABLED";
300
301 case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
302 return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
303
304 case CUDA_ERROR_PROFILER_ALREADY_STARTED:
305 return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
306
307 case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
308 return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
309
310 case CUDA_ERROR_NO_DEVICE:
311 return "CUDA_ERROR_NO_DEVICE";
312
313 case CUDA_ERROR_INVALID_DEVICE:
314 return "CUDA_ERROR_INVALID_DEVICE";
315
316 case CUDA_ERROR_INVALID_IMAGE:
317 return "CUDA_ERROR_INVALID_IMAGE";
318
319 case CUDA_ERROR_INVALID_CONTEXT:
320 return "CUDA_ERROR_INVALID_CONTEXT";
321
322 case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
323 return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
324
325 case CUDA_ERROR_MAP_FAILED:
326 return "CUDA_ERROR_MAP_FAILED";
327
328 case CUDA_ERROR_UNMAP_FAILED:
329 return "CUDA_ERROR_UNMAP_FAILED";
330
331 case CUDA_ERROR_ARRAY_IS_MAPPED:
332 return "CUDA_ERROR_ARRAY_IS_MAPPED";
333
334 case CUDA_ERROR_ALREADY_MAPPED:
335 return "CUDA_ERROR_ALREADY_MAPPED";
336
337 case CUDA_ERROR_NO_BINARY_FOR_GPU:
338 return "CUDA_ERROR_NO_BINARY_FOR_GPU";
339
340 case CUDA_ERROR_ALREADY_ACQUIRED:
341 return "CUDA_ERROR_ALREADY_ACQUIRED";
342
343 case CUDA_ERROR_NOT_MAPPED:
344 return "CUDA_ERROR_NOT_MAPPED";
345
346 case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
347 return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
348
349 case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
350 return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
351
352 case CUDA_ERROR_ECC_UNCORRECTABLE:
353 return "CUDA_ERROR_ECC_UNCORRECTABLE";
354
355 case CUDA_ERROR_UNSUPPORTED_LIMIT:
356 return "CUDA_ERROR_UNSUPPORTED_LIMIT";
357
358 case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
359 return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
360
361 case CUDA_ERROR_INVALID_SOURCE:
362 return "CUDA_ERROR_INVALID_SOURCE";
363
364 case CUDA_ERROR_FILE_NOT_FOUND:
365 return "CUDA_ERROR_FILE_NOT_FOUND";
366
367 case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
368 return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
369
370 case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
371 return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
372
373 case CUDA_ERROR_OPERATING_SYSTEM:
374 return "CUDA_ERROR_OPERATING_SYSTEM";
375
376 case CUDA_ERROR_INVALID_HANDLE:
377 return "CUDA_ERROR_INVALID_HANDLE";
378
379 case CUDA_ERROR_NOT_FOUND:
380 return "CUDA_ERROR_NOT_FOUND";
381
382 case CUDA_ERROR_NOT_READY:
383 return "CUDA_ERROR_NOT_READY";
384
385 case CUDA_ERROR_LAUNCH_FAILED:
386 return "CUDA_ERROR_LAUNCH_FAILED";
387
388 case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
389 return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
390
391 case CUDA_ERROR_LAUNCH_TIMEOUT:
392 return "CUDA_ERROR_LAUNCH_TIMEOUT";
393
394 case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
395 return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
396
397 case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
398 return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
399
400 case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
401 return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
402
403 case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
404 return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
405
406 case CUDA_ERROR_CONTEXT_IS_DESTROYED:
407 return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
408
409 case CUDA_ERROR_ASSERT:
410 return "CUDA_ERROR_ASSERT";
411
412 case CUDA_ERROR_TOO_MANY_PEERS:
413 return "CUDA_ERROR_TOO_MANY_PEERS";
414
415 case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
416 return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
417
418 case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
419 return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
420
421 case CUDA_ERROR_UNKNOWN:
422 return "CUDA_ERROR_UNKNOWN";
423
424 case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
425 return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
426
427 case CUDA_ERROR_INVALID_PTX:
428 return "CUDA_ERROR_INVALID_PTX";
429
430#if __CUDA_API_VERSION >= 6500
431 case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
432 return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
433#endif
434
435 case CUDA_ERROR_ILLEGAL_ADDRESS:
436 return "CUDA_ERROR_ILLEGAL_ADDRESS";
437
438 case CUDA_ERROR_HARDWARE_STACK_ERROR:
439 return "CUDA_ERROR_HARDWARE_STACK_ERROR";
440
441 case CUDA_ERROR_ILLEGAL_INSTRUCTION:
442 return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
443
444 case CUDA_ERROR_MISALIGNED_ADDRESS:
445 return "CUDA_ERROR_MISALIGNED_ADDRESS";
446
447 case CUDA_ERROR_INVALID_ADDRESS_SPACE:
448 return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
449
450 case CUDA_ERROR_INVALID_PC:
451 return "CUDA_ERROR_INVALID_PC";
452
453 case CUDA_ERROR_NOT_PERMITTED:
454 return "CUDA_ERROR_NOT_PERMITTED";
455
456 case CUDA_ERROR_NOT_SUPPORTED:
457 return "CUDA_ERROR_NOT_SUPPORTED";
458 }
459
460 return "<unknown>";
461}
462#endif
463
464#ifdef CUBLAS_API_H_
465// cuBLAS API errors
466static const char *_cudaGetErrorEnum(cublasStatus_t error)
467{
468 switch (error)
469 {
470 case CUBLAS_STATUS_SUCCESS:
471 return "CUBLAS_STATUS_SUCCESS";
472
473 case CUBLAS_STATUS_NOT_INITIALIZED:
474 return "CUBLAS_STATUS_NOT_INITIALIZED";
475
476 case CUBLAS_STATUS_ALLOC_FAILED:
477 return "CUBLAS_STATUS_ALLOC_FAILED";
478
479 case CUBLAS_STATUS_INVALID_VALUE:
480 return "CUBLAS_STATUS_INVALID_VALUE";
481
482 case CUBLAS_STATUS_ARCH_MISMATCH:
483 return "CUBLAS_STATUS_ARCH_MISMATCH";
484
485 case CUBLAS_STATUS_MAPPING_ERROR:
486 return "CUBLAS_STATUS_MAPPING_ERROR";
487
488 case CUBLAS_STATUS_EXECUTION_FAILED:
489 return "CUBLAS_STATUS_EXECUTION_FAILED";
490
491 case CUBLAS_STATUS_INTERNAL_ERROR:
492 return "CUBLAS_STATUS_INTERNAL_ERROR";
493 }
494
495 return "<unknown>";
496}
497#endif
498
499#ifdef _CUFFT_H_
500// cuFFT API errors
501static const char *_cudaGetErrorEnum(cufftResult error)
502{
503 switch (error)
504 {
505 case CUFFT_SUCCESS:
506 return "CUFFT_SUCCESS";
507
508 case CUFFT_INVALID_PLAN:
509 return "CUFFT_INVALID_PLAN";
510
511 case CUFFT_ALLOC_FAILED:
512 return "CUFFT_ALLOC_FAILED";
513
514 case CUFFT_INVALID_TYPE:
515 return "CUFFT_INVALID_TYPE";
516
517 case CUFFT_INVALID_VALUE:
518 return "CUFFT_INVALID_VALUE";
519
520 case CUFFT_INTERNAL_ERROR:
521 return "CUFFT_INTERNAL_ERROR";
522
523 case CUFFT_EXEC_FAILED:
524 return "CUFFT_EXEC_FAILED";
525
526 case CUFFT_SETUP_FAILED:
527 return "CUFFT_SETUP_FAILED";
528
529 case CUFFT_INVALID_SIZE:
530 return "CUFFT_INVALID_SIZE";
531
532 case CUFFT_UNALIGNED_DATA:
533 return "CUFFT_UNALIGNED_DATA";
534 }
535
536 return "<unknown>";
537}
538#endif
539
540#ifdef CUSPARSEAPI
541// cuSPARSE API errors
542static const char *_cudaGetErrorEnum(cusparseStatus_t error)
543{
544 switch (error)
545 {
546 case CUSPARSE_STATUS_SUCCESS:
547 return "CUSPARSE_STATUS_SUCCESS";
548
549 case CUSPARSE_STATUS_NOT_INITIALIZED:
550 return "CUSPARSE_STATUS_NOT_INITIALIZED";
551
552 case CUSPARSE_STATUS_ALLOC_FAILED:
553 return "CUSPARSE_STATUS_ALLOC_FAILED";
554
555 case CUSPARSE_STATUS_INVALID_VALUE:
556 return "CUSPARSE_STATUS_INVALID_VALUE";
557
558 case CUSPARSE_STATUS_ARCH_MISMATCH:
559 return "CUSPARSE_STATUS_ARCH_MISMATCH";
560
561 case CUSPARSE_STATUS_MAPPING_ERROR:
562 return "CUSPARSE_STATUS_MAPPING_ERROR";
563
564 case CUSPARSE_STATUS_EXECUTION_FAILED:
565 return "CUSPARSE_STATUS_EXECUTION_FAILED";
566
567 case CUSPARSE_STATUS_INTERNAL_ERROR:
568 return "CUSPARSE_STATUS_INTERNAL_ERROR";
569
570 case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
571 return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
572 }
573
574 return "<unknown>";
575}
576#endif
577
578#ifdef CURAND_H_
579// cuRAND API errors
580static const char *_cudaGetErrorEnum(curandStatus_t error)
581{
582 switch (error)
583 {
584 case CURAND_STATUS_SUCCESS:
585 return "CURAND_STATUS_SUCCESS";
586
587 case CURAND_STATUS_VERSION_MISMATCH:
588 return "CURAND_STATUS_VERSION_MISMATCH";
589
590 case CURAND_STATUS_NOT_INITIALIZED:
591 return "CURAND_STATUS_NOT_INITIALIZED";
592
593 case CURAND_STATUS_ALLOCATION_FAILED:
594 return "CURAND_STATUS_ALLOCATION_FAILED";
595
596 case CURAND_STATUS_TYPE_ERROR:
597 return "CURAND_STATUS_TYPE_ERROR";
598
599 case CURAND_STATUS_OUT_OF_RANGE:
600 return "CURAND_STATUS_OUT_OF_RANGE";
601
602 case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
603 return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
604
605 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
606 return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
607
608 case CURAND_STATUS_LAUNCH_FAILURE:
609 return "CURAND_STATUS_LAUNCH_FAILURE";
610
611 case CURAND_STATUS_PREEXISTING_FAILURE:
612 return "CURAND_STATUS_PREEXISTING_FAILURE";
613
614 case CURAND_STATUS_INITIALIZATION_FAILED:
615 return "CURAND_STATUS_INITIALIZATION_FAILED";
616
617 case CURAND_STATUS_ARCH_MISMATCH:
618 return "CURAND_STATUS_ARCH_MISMATCH";
619
620 case CURAND_STATUS_INTERNAL_ERROR:
621 return "CURAND_STATUS_INTERNAL_ERROR";
622 }
623
624 return "<unknown>";
625}
626#endif
627
628#ifdef NV_NPPIDEFS_H
629// NPP API errors
630static const char *_cudaGetErrorEnum(NppStatus error)
631{
632 switch (error)
633 {
634 case NPP_NOT_SUPPORTED_MODE_ERROR:
635 return "NPP_NOT_SUPPORTED_MODE_ERROR";
636
637 case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
638 return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
639
640 case NPP_RESIZE_NO_OPERATION_ERROR:
641 return "NPP_RESIZE_NO_OPERATION_ERROR";
642
643 case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
644 return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
645
646#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
647
648 case NPP_BAD_ARG_ERROR:
649 return "NPP_BAD_ARGUMENT_ERROR";
650
651 case NPP_COEFF_ERROR:
652 return "NPP_COEFFICIENT_ERROR";
653
654 case NPP_RECT_ERROR:
655 return "NPP_RECTANGLE_ERROR";
656
657 case NPP_QUAD_ERROR:
658 return "NPP_QUADRANGLE_ERROR";
659
660 case NPP_MEM_ALLOC_ERR:
661 return "NPP_MEMORY_ALLOCATION_ERROR";
662
663 case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
664 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
665
666 case NPP_INVALID_INPUT:
667 return "NPP_INVALID_INPUT";
668
669 case NPP_POINTER_ERROR:
670 return "NPP_POINTER_ERROR";
671
672 case NPP_WARNING:
673 return "NPP_WARNING";
674
675 case NPP_ODD_ROI_WARNING:
676 return "NPP_ODD_ROI_WARNING";
677#else
678
679 // These are for CUDA 5.5 or higher
680 case NPP_BAD_ARGUMENT_ERROR:
681 return "NPP_BAD_ARGUMENT_ERROR";
682
683 case NPP_COEFFICIENT_ERROR:
684 return "NPP_COEFFICIENT_ERROR";
685
686 case NPP_RECTANGLE_ERROR:
687 return "NPP_RECTANGLE_ERROR";
688
689 case NPP_QUADRANGLE_ERROR:
690 return "NPP_QUADRANGLE_ERROR";
691
692 case NPP_MEMORY_ALLOCATION_ERR:
693 return "NPP_MEMORY_ALLOCATION_ERROR";
694
695 case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
696 return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
697
698 case NPP_INVALID_HOST_POINTER_ERROR:
699 return "NPP_INVALID_HOST_POINTER_ERROR";
700
701 case NPP_INVALID_DEVICE_POINTER_ERROR:
702 return "NPP_INVALID_DEVICE_POINTER_ERROR";
703#endif
704
705 case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
706 return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
707
708 case NPP_TEXTURE_BIND_ERROR:
709 return "NPP_TEXTURE_BIND_ERROR";
710
711 case NPP_WRONG_INTERSECTION_ROI_ERROR:
712 return "NPP_WRONG_INTERSECTION_ROI_ERROR";
713
714 case NPP_NOT_EVEN_STEP_ERROR:
715 return "NPP_NOT_EVEN_STEP_ERROR";
716
717 case NPP_INTERPOLATION_ERROR:
718 return "NPP_INTERPOLATION_ERROR";
719
720 case NPP_RESIZE_FACTOR_ERROR:
721 return "NPP_RESIZE_FACTOR_ERROR";
722
723 case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
724 return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
725
726#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
727
728 case NPP_MEMFREE_ERR:
729 return "NPP_MEMFREE_ERR";
730
731 case NPP_MEMSET_ERR:
732 return "NPP_MEMSET_ERR";
733
734 case NPP_MEMCPY_ERR:
735 return "NPP_MEMCPY_ERROR";
736
737 case NPP_MIRROR_FLIP_ERR:
738 return "NPP_MIRROR_FLIP_ERR";
739#else
740
741 case NPP_MEMFREE_ERROR:
742 return "NPP_MEMFREE_ERROR";
743
744 case NPP_MEMSET_ERROR:
745 return "NPP_MEMSET_ERROR";
746
747 case NPP_MEMCPY_ERROR:
748 return "NPP_MEMCPY_ERROR";
749
750 case NPP_MIRROR_FLIP_ERROR:
751 return "NPP_MIRROR_FLIP_ERROR";
752#endif
753
754 case NPP_ALIGNMENT_ERROR:
755 return "NPP_ALIGNMENT_ERROR";
756
757 case NPP_STEP_ERROR:
758 return "NPP_STEP_ERROR";
759
760 case NPP_SIZE_ERROR:
761 return "NPP_SIZE_ERROR";
762
763 case NPP_NULL_POINTER_ERROR:
764 return "NPP_NULL_POINTER_ERROR";
765
766 case NPP_CUDA_KERNEL_EXECUTION_ERROR:
767 return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
768
769 case NPP_NOT_IMPLEMENTED_ERROR:
770 return "NPP_NOT_IMPLEMENTED_ERROR";
771
772 case NPP_ERROR:
773 return "NPP_ERROR";
774
775 case NPP_SUCCESS:
776 return "NPP_SUCCESS";
777
778 case NPP_WRONG_INTERSECTION_QUAD_WARNING:
779 return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
780
781 case NPP_MISALIGNED_DST_ROI_WARNING:
782 return "NPP_MISALIGNED_DST_ROI_WARNING";
783
784 case NPP_AFFINE_QUAD_INCORRECT_WARNING:
785 return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
786
787 case NPP_DOUBLE_SIZE_WARNING:
788 return "NPP_DOUBLE_SIZE_WARNING";
789
790 case NPP_WRONG_INTERSECTION_ROI_WARNING:
791 return "NPP_WRONG_INTERSECTION_ROI_WARNING";
792 }
793
794 return "<unknown>";
795}
796#endif
797
798#ifdef __DRIVER_TYPES_H__
799#ifndef DEVICE_RESET
800#define DEVICE_RESET cudaDeviceReset();
801#endif
802#else
803#ifndef DEVICE_RESET
804#define DEVICE_RESET
805#endif
806#endif
807
808template <typename T>
809void check(T result, char const *const func, const char *const file, int const line)
810{
811 if (result)
812 {
813 fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
814 file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
816 // Make sure we call CUDA Device Reset before exiting
817 exit(EXIT_FAILURE);
818 }
819}
820
821#ifdef __DRIVER_TYPES_H__
822// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
823#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
824
825// This will output the proper error string when calling cudaGetLastError
826#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
827
828inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
829{
830 cudaError_t err = cudaGetLastError();
831
832 if (cudaSuccess != err)
833 {
834 fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
835 file, line, errorMessage, (int)err, cudaGetErrorString(err));
837 exit(EXIT_FAILURE);
838 }
839}
840#endif
841
842#ifndef MAX
843#define MAX(a, b) (a > b ? a : b)
844#endif
845
846// Beginning of GPU Architecture definitions
847inline int _ConvertSMVer2Cores(int major, int minor)
848{
849 // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
850 typedef struct
851 {
852 int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
853 int Cores;
854 } sSMtoCores;
855
856 sSMtoCores nGpuArchCoresPerSM[] = {
857 { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
858 { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
859 { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
860 { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
861 { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
862 { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
863 { 0x30, 192 }, // Kepler Generation (SM 3.0) GK10x class
864 { 0x32, 192 }, // Kepler Generation (SM 3.2) GK10x class
865 { 0x35, 192 }, // Kepler Generation (SM 3.5) GK11x class
866 { 0x50, 128 }, // Maxwell Generation (SM 5.0) GM10x class
867 { -1, -1 }
868 };
869
870 int index = 0;
871
872 while (nGpuArchCoresPerSM[index].SM != -1)
873 {
874 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
875 {
876 return nGpuArchCoresPerSM[index].Cores;
877 }
878
879 index++;
880 }
881
882 // If we don't find the values, we default use the previous one to run properly
883 printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[7].Cores);
884 return nGpuArchCoresPerSM[7].Cores;
885}
886// end of GPU Architecture definitions
887
888#ifdef __CUDA_RUNTIME_H__
889// General GPU Device CUDA Initialization
890inline int gpuDeviceInit(int devID)
891{
892 int device_count;
893 checkCudaErrors(cudaGetDeviceCount(&device_count));
894
895 if (device_count == 0)
896 {
897 fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
898 exit(EXIT_FAILURE);
899 }
900
901 if (devID < 0)
902 {
903 devID = 0;
904 }
905
906 if (devID > device_count - 1)
907 {
908 fprintf(stderr, "\n");
909 fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
910 fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
911 fprintf(stderr, "\n");
912 return -devID;
913 }
914
915 cudaDeviceProp deviceProp;
916 checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
917
918 if (deviceProp.computeMode == cudaComputeModeProhibited)
919 {
920 fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
921 return -1;
922 }
923
924 if (deviceProp.major < 1)
925 {
926 fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
927 exit(EXIT_FAILURE);
928 }
929
930 checkCudaErrors(cudaSetDevice(devID));
931 printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
932
933 return devID;
934}
935
936// This function returns the best GPU (with maximum GFLOPS)
937inline int gpuGetMaxGflopsDeviceId()
938{
939 int current_device = 0, sm_per_multiproc = 0;
940 int max_perf_device = 0;
941 int device_count = 0, best_SM_arch = 0;
942
943 unsigned long long max_compute_perf = 0;
944 cudaDeviceProp deviceProp;
945 cudaGetDeviceCount(&device_count);
946
947 checkCudaErrors(cudaGetDeviceCount(&device_count));
948
949 if (device_count == 0)
950 {
951 fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
952 exit(EXIT_FAILURE);
953 }
954
955 // Find the best major SM Architecture GPU device
956 while (current_device < device_count)
957 {
958 cudaGetDeviceProperties(&deviceProp, current_device);
959
960 // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
961 if (deviceProp.computeMode != cudaComputeModeProhibited)
962 {
963 if (deviceProp.major > 0 && deviceProp.major < 9999)
964 {
965 best_SM_arch = MAX(best_SM_arch, deviceProp.major);
966 }
967 }
968
969 current_device++;
970 }
971
972 // Find the best CUDA capable GPU device
973 current_device = 0;
974
975 while (current_device < device_count)
976 {
977 cudaGetDeviceProperties(&deviceProp, current_device);
978
979 // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
980 if (deviceProp.computeMode != cudaComputeModeProhibited)
981 {
982 if (deviceProp.major == 9999 && deviceProp.minor == 9999)
983 {
984 sm_per_multiproc = 1;
985 }
986 else
987 {
988 sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
989 }
990
991 unsigned long long compute_perf = (unsigned long long)deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
992
993 if (compute_perf > max_compute_perf)
994 {
995 // If we find GPU with SM major > 2, search only these
996 if (best_SM_arch > 2)
997 {
998 // If our device==dest_SM_arch, choose this, or else pass
999 if (deviceProp.major == best_SM_arch)
1000 {
1001 max_compute_perf = compute_perf;
1002 max_perf_device = current_device;
1003 }
1004 }
1005 else
1006 {
1007 max_compute_perf = compute_perf;
1008 max_perf_device = current_device;
1009 }
1010 }
1011 }
1012
1013 ++current_device;
1014 }
1015
1016 return max_perf_device;
1017}
1018
1019// Initialization code to find the best CUDA Device
1020inline int findCudaDevice(int argc, const char **argv)
1021{
1022 cudaDeviceProp deviceProp;
1023 int devID = 0;
1024
1025 // If the command-line has a device number specified, use it
1026 if (checkCmdLineFlag(argc, argv, "device"))
1027 {
1028 devID = getCmdLineArgumentInt(argc, argv, "device=");
1029
1030 if (devID < 0)
1031 {
1032 printf("Invalid command line parameter\n ");
1033 exit(EXIT_FAILURE);
1034 }
1035 else
1036 {
1037 devID = gpuDeviceInit(devID);
1038
1039 if (devID < 0)
1040 {
1041 printf("exiting...\n");
1042 exit(EXIT_FAILURE);
1043 }
1044 }
1045 }
1046 else
1047 {
1048 // Otherwise pick the device with highest Gflops/s
1049 devID = gpuGetMaxGflopsDeviceId();
1050 checkCudaErrors(cudaSetDevice(devID));
1051 checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
1052 printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
1053 }
1054
1055 return devID;
1056}
1057
1058// General check for CUDA GPU SM Capabilities
1059inline bool checkCudaCapabilities(int major_version, int minor_version)
1060{
1061 cudaDeviceProp deviceProp;
1062 deviceProp.major = 0;
1063 deviceProp.minor = 0;
1064 int dev;
1065
1066 checkCudaErrors(cudaGetDevice(&dev));
1067 checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
1068
1069 if ((deviceProp.major > major_version) || (deviceProp.major == major_version && deviceProp.minor >= minor_version))
1070 {
1071 printf(" GPU Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
1072 return true;
1073 }
1074 else
1075 {
1076 printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
1077 return false;
1078 }
1079}
1080#endif
1081
1082// end of CUDA Helper Functions
1083
1084#endif
GLenum func
Definition: khronos-glext.h:6719
GLuint64EXT * result
Definition: khronos-glext.h:12573
GLuint index
Definition: khronos-glext.h:6722
int _ConvertSMVer2Cores(int major, int minor)
Definition: helper_cuda.h:847
void check(T result, char const *const func, const char *const file, int const line)
Definition: helper_cuda.h:809
#define DEVICE_RESET
Definition: helper_cuda.h:804
#define MAX(a, b)
Definition: helper_cuda.h:843
bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
Definition: helper_string.h:125
int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
Definition: helper_string.h:183