COVISE Core
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros
helper_cuda.h
Go to the documentation of this file.
1 /* This file is part of COVISE.
2 
3  You can use it under the terms of the GNU Lesser General Public License
4  version 2.1 or later, see lgpl-2.1.txt.
5 
6  * License: LGPL 2+ */
7 
19 // These are CUDA Helper functions for initialization and error checking
21 
22 #ifndef HELPER_CUDA_H
23 #define HELPER_CUDA_H
24 
25 #pragma once
26 
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30 
31 #include "helper_string.h"
32 
33 /*
34 inline void __ExitInTime(int seconds)
35 {
36  fprintf(stdout, "> exiting in %d seconds: ", seconds);
37  fflush(stdout);
38  time_t t;
39  int count;
40 
41  for (t=time(0)+seconds, count=seconds; time(0) < t; count--) {
42  fprintf(stdout, "%d...", count);
43 #if defined(WIN32)
44  Sleep(1000);
45 #else
46  sleep(1);
47 #endif
48  }
49 
50  fprintf(stdout,"done!\n\n");
51  fflush(stdout);
52 }
53 
54 #define EXIT_TIME_DELAY 2
55 
56 inline void EXIT_DELAY(int return_code)
57 {
58  __ExitInTime(EXIT_TIME_DELAY);
59  exit(return_code);
60 }
61 */
62 
63 #ifndef EXIT_WAIVED
64 #define EXIT_WAIVED 2
65 #endif
66 
67 // Note, it is required that your SDK sample to include the proper header files, please
68 // refer the CUDA examples for examples of the needed CUDA headers, which may change depending
69 // on which CUDA functions are used.
70 
71 // CUDA Runtime error messages
72 #ifdef __DRIVER_TYPES_H__
73 static const char *_cudaGetErrorEnum(cudaError_t error)
74 {
75  switch (error)
76  {
77  case cudaSuccess:
78  return "cudaSuccess";
79 
80  case cudaErrorMissingConfiguration:
81  return "cudaErrorMissingConfiguration";
82 
83  case cudaErrorMemoryAllocation:
84  return "cudaErrorMemoryAllocation";
85 
86  case cudaErrorInitializationError:
87  return "cudaErrorInitializationError";
88 
89  case cudaErrorLaunchFailure:
90  return "cudaErrorLaunchFailure";
91 
92  case cudaErrorPriorLaunchFailure:
93  return "cudaErrorPriorLaunchFailure";
94 
95  case cudaErrorLaunchTimeout:
96  return "cudaErrorLaunchTimeout";
97 
98  case cudaErrorLaunchOutOfResources:
99  return "cudaErrorLaunchOutOfResources";
100 
101  case cudaErrorInvalidDeviceFunction:
102  return "cudaErrorInvalidDeviceFunction";
103 
104  case cudaErrorInvalidConfiguration:
105  return "cudaErrorInvalidConfiguration";
106 
107  case cudaErrorInvalidDevice:
108  return "cudaErrorInvalidDevice";
109 
110  case cudaErrorInvalidValue:
111  return "cudaErrorInvalidValue";
112 
113  case cudaErrorInvalidPitchValue:
114  return "cudaErrorInvalidPitchValue";
115 
116  case cudaErrorInvalidSymbol:
117  return "cudaErrorInvalidSymbol";
118 
119  case cudaErrorMapBufferObjectFailed:
120  return "cudaErrorMapBufferObjectFailed";
121 
122  case cudaErrorUnmapBufferObjectFailed:
123  return "cudaErrorUnmapBufferObjectFailed";
124 
125  case cudaErrorInvalidHostPointer:
126  return "cudaErrorInvalidHostPointer";
127 
128  case cudaErrorInvalidDevicePointer:
129  return "cudaErrorInvalidDevicePointer";
130 
131  case cudaErrorInvalidTexture:
132  return "cudaErrorInvalidTexture";
133 
134  case cudaErrorInvalidTextureBinding:
135  return "cudaErrorInvalidTextureBinding";
136 
137  case cudaErrorInvalidChannelDescriptor:
138  return "cudaErrorInvalidChannelDescriptor";
139 
140  case cudaErrorInvalidMemcpyDirection:
141  return "cudaErrorInvalidMemcpyDirection";
142 
143  case cudaErrorAddressOfConstant:
144  return "cudaErrorAddressOfConstant";
145 
146  case cudaErrorTextureFetchFailed:
147  return "cudaErrorTextureFetchFailed";
148 
149  case cudaErrorTextureNotBound:
150  return "cudaErrorTextureNotBound";
151 
152  case cudaErrorSynchronizationError:
153  return "cudaErrorSynchronizationError";
154 
155  case cudaErrorInvalidFilterSetting:
156  return "cudaErrorInvalidFilterSetting";
157 
158  case cudaErrorInvalidNormSetting:
159  return "cudaErrorInvalidNormSetting";
160 
161  case cudaErrorMixedDeviceExecution:
162  return "cudaErrorMixedDeviceExecution";
163 
164  case cudaErrorCudartUnloading:
165  return "cudaErrorCudartUnloading";
166 
167  case cudaErrorUnknown:
168  return "cudaErrorUnknown";
169 
170  case cudaErrorNotYetImplemented:
171  return "cudaErrorNotYetImplemented";
172 
173  case cudaErrorMemoryValueTooLarge:
174  return "cudaErrorMemoryValueTooLarge";
175 
176  case cudaErrorInvalidResourceHandle:
177  return "cudaErrorInvalidResourceHandle";
178 
179  case cudaErrorNotReady:
180  return "cudaErrorNotReady";
181 
182  case cudaErrorInsufficientDriver:
183  return "cudaErrorInsufficientDriver";
184 
185  case cudaErrorSetOnActiveProcess:
186  return "cudaErrorSetOnActiveProcess";
187 
188  case cudaErrorInvalidSurface:
189  return "cudaErrorInvalidSurface";
190 
191  case cudaErrorNoDevice:
192  return "cudaErrorNoDevice";
193 
194  case cudaErrorECCUncorrectable:
195  return "cudaErrorECCUncorrectable";
196 
197  case cudaErrorSharedObjectSymbolNotFound:
198  return "cudaErrorSharedObjectSymbolNotFound";
199 
200  case cudaErrorSharedObjectInitFailed:
201  return "cudaErrorSharedObjectInitFailed";
202 
203  case cudaErrorUnsupportedLimit:
204  return "cudaErrorUnsupportedLimit";
205 
206  case cudaErrorDuplicateVariableName:
207  return "cudaErrorDuplicateVariableName";
208 
209  case cudaErrorDuplicateTextureName:
210  return "cudaErrorDuplicateTextureName";
211 
212  case cudaErrorDuplicateSurfaceName:
213  return "cudaErrorDuplicateSurfaceName";
214 
215  case cudaErrorDevicesUnavailable:
216  return "cudaErrorDevicesUnavailable";
217 
218  case cudaErrorInvalidKernelImage:
219  return "cudaErrorInvalidKernelImage";
220 
221  case cudaErrorNoKernelImageForDevice:
222  return "cudaErrorNoKernelImageForDevice";
223 
224  case cudaErrorIncompatibleDriverContext:
225  return "cudaErrorIncompatibleDriverContext";
226 
227  case cudaErrorPeerAccessAlreadyEnabled:
228  return "cudaErrorPeerAccessAlreadyEnabled";
229 
230  case cudaErrorPeerAccessNotEnabled:
231  return "cudaErrorPeerAccessNotEnabled";
232 
233  case cudaErrorDeviceAlreadyInUse:
234  return "cudaErrorDeviceAlreadyInUse";
235 
236  case cudaErrorProfilerDisabled:
237  return "cudaErrorProfilerDisabled";
238 
239  case cudaErrorProfilerNotInitialized:
240  return "cudaErrorProfilerNotInitialized";
241 
242  case cudaErrorProfilerAlreadyStarted:
243  return "cudaErrorProfilerAlreadyStarted";
244 
245  case cudaErrorProfilerAlreadyStopped:
246  return "cudaErrorProfilerAlreadyStopped";
247 
248 #if __CUDA_API_VERSION >= 0x4000
249 
250  case cudaErrorAssert:
251  return "cudaErrorAssert";
252 
253  case cudaErrorTooManyPeers:
254  return "cudaErrorTooManyPeers";
255 
256  case cudaErrorHostMemoryAlreadyRegistered:
257  return "cudaErrorHostMemoryAlreadyRegistered";
258 
259  case cudaErrorHostMemoryNotRegistered:
260  return "cudaErrorHostMemoryNotRegistered";
261 #endif
262 
263  case cudaErrorStartupFailure:
264  return "cudaErrorStartupFailure";
265 
266  case cudaErrorApiFailureBase:
267  return "cudaErrorApiFailureBase";
268 
269  default:
270  break;
271  }
272 
273  return "<unknown>";
274 }
275 #endif
276 
277 #ifdef __cuda_cuda_h__
278 // CUDA Driver API errors
279 static const char *_cudaGetErrorEnum(CUresult error)
280 {
281  switch (error)
282  {
283  case CUDA_SUCCESS:
284  return "CUDA_SUCCESS";
285 
286  case CUDA_ERROR_INVALID_VALUE:
287  return "CUDA_ERROR_INVALID_VALUE";
288 
289  case CUDA_ERROR_OUT_OF_MEMORY:
290  return "CUDA_ERROR_OUT_OF_MEMORY";
291 
292  case CUDA_ERROR_NOT_INITIALIZED:
293  return "CUDA_ERROR_NOT_INITIALIZED";
294 
295  case CUDA_ERROR_DEINITIALIZED:
296  return "CUDA_ERROR_DEINITIALIZED";
297 
298  case CUDA_ERROR_PROFILER_DISABLED:
299  return "CUDA_ERROR_PROFILER_DISABLED";
300 
301  case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
302  return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
303 
304  case CUDA_ERROR_PROFILER_ALREADY_STARTED:
305  return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
306 
307  case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
308  return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
309 
310  case CUDA_ERROR_NO_DEVICE:
311  return "CUDA_ERROR_NO_DEVICE";
312 
313  case CUDA_ERROR_INVALID_DEVICE:
314  return "CUDA_ERROR_INVALID_DEVICE";
315 
316  case CUDA_ERROR_INVALID_IMAGE:
317  return "CUDA_ERROR_INVALID_IMAGE";
318 
319  case CUDA_ERROR_INVALID_CONTEXT:
320  return "CUDA_ERROR_INVALID_CONTEXT";
321 
322  case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
323  return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
324 
325  case CUDA_ERROR_MAP_FAILED:
326  return "CUDA_ERROR_MAP_FAILED";
327 
328  case CUDA_ERROR_UNMAP_FAILED:
329  return "CUDA_ERROR_UNMAP_FAILED";
330 
331  case CUDA_ERROR_ARRAY_IS_MAPPED:
332  return "CUDA_ERROR_ARRAY_IS_MAPPED";
333 
334  case CUDA_ERROR_ALREADY_MAPPED:
335  return "CUDA_ERROR_ALREADY_MAPPED";
336 
337  case CUDA_ERROR_NO_BINARY_FOR_GPU:
338  return "CUDA_ERROR_NO_BINARY_FOR_GPU";
339 
340  case CUDA_ERROR_ALREADY_ACQUIRED:
341  return "CUDA_ERROR_ALREADY_ACQUIRED";
342 
343  case CUDA_ERROR_NOT_MAPPED:
344  return "CUDA_ERROR_NOT_MAPPED";
345 
346  case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
347  return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
348 
349  case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
350  return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
351 
352  case CUDA_ERROR_ECC_UNCORRECTABLE:
353  return "CUDA_ERROR_ECC_UNCORRECTABLE";
354 
355  case CUDA_ERROR_UNSUPPORTED_LIMIT:
356  return "CUDA_ERROR_UNSUPPORTED_LIMIT";
357 
358  case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
359  return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
360 
361  case CUDA_ERROR_INVALID_SOURCE:
362  return "CUDA_ERROR_INVALID_SOURCE";
363 
364  case CUDA_ERROR_FILE_NOT_FOUND:
365  return "CUDA_ERROR_FILE_NOT_FOUND";
366 
367  case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
368  return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
369 
370  case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
371  return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
372 
373  case CUDA_ERROR_OPERATING_SYSTEM:
374  return "CUDA_ERROR_OPERATING_SYSTEM";
375 
376  case CUDA_ERROR_INVALID_HANDLE:
377  return "CUDA_ERROR_INVALID_HANDLE";
378 
379  case CUDA_ERROR_NOT_FOUND:
380  return "CUDA_ERROR_NOT_FOUND";
381 
382  case CUDA_ERROR_NOT_READY:
383  return "CUDA_ERROR_NOT_READY";
384 
385  case CUDA_ERROR_LAUNCH_FAILED:
386  return "CUDA_ERROR_LAUNCH_FAILED";
387 
388  case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
389  return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
390 
391  case CUDA_ERROR_LAUNCH_TIMEOUT:
392  return "CUDA_ERROR_LAUNCH_TIMEOUT";
393 
394  case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
395  return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
396 
397  case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
398  return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
399 
400  case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
401  return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
402 
403  case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
404  return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
405 
406  case CUDA_ERROR_CONTEXT_IS_DESTROYED:
407  return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
408 
409  case CUDA_ERROR_ASSERT:
410  return "CUDA_ERROR_ASSERT";
411 
412  case CUDA_ERROR_TOO_MANY_PEERS:
413  return "CUDA_ERROR_TOO_MANY_PEERS";
414 
415  case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
416  return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
417 
418  case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
419  return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
420 
421  case CUDA_ERROR_UNKNOWN:
422  return "CUDA_ERROR_UNKNOWN";
423 
424  case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
425  return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
426 
427  case CUDA_ERROR_INVALID_PTX:
428  return "CUDA_ERROR_INVALID_PTX";
429 
430 #if __CUDA_API_VERSION >= 6500
431  case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
432  return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
433 #endif
434 
435  case CUDA_ERROR_ILLEGAL_ADDRESS:
436  return "CUDA_ERROR_ILLEGAL_ADDRESS";
437 
438  case CUDA_ERROR_HARDWARE_STACK_ERROR:
439  return "CUDA_ERROR_HARDWARE_STACK_ERROR";
440 
441  case CUDA_ERROR_ILLEGAL_INSTRUCTION:
442  return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
443 
444  case CUDA_ERROR_MISALIGNED_ADDRESS:
445  return "CUDA_ERROR_MISALIGNED_ADDRESS";
446 
447  case CUDA_ERROR_INVALID_ADDRESS_SPACE:
448  return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
449 
450  case CUDA_ERROR_INVALID_PC:
451  return "CUDA_ERROR_INVALID_PC";
452 
453  case CUDA_ERROR_NOT_PERMITTED:
454  return "CUDA_ERROR_NOT_PERMITTED";
455 
456  case CUDA_ERROR_NOT_SUPPORTED:
457  return "CUDA_ERROR_NOT_SUPPORTED";
458  }
459 
460  return "<unknown>";
461 }
462 #endif
463 
464 #ifdef CUBLAS_API_H_
465 // cuBLAS API errors
466 static const char *_cudaGetErrorEnum(cublasStatus_t error)
467 {
468  switch (error)
469  {
470  case CUBLAS_STATUS_SUCCESS:
471  return "CUBLAS_STATUS_SUCCESS";
472 
473  case CUBLAS_STATUS_NOT_INITIALIZED:
474  return "CUBLAS_STATUS_NOT_INITIALIZED";
475 
476  case CUBLAS_STATUS_ALLOC_FAILED:
477  return "CUBLAS_STATUS_ALLOC_FAILED";
478 
479  case CUBLAS_STATUS_INVALID_VALUE:
480  return "CUBLAS_STATUS_INVALID_VALUE";
481 
482  case CUBLAS_STATUS_ARCH_MISMATCH:
483  return "CUBLAS_STATUS_ARCH_MISMATCH";
484 
485  case CUBLAS_STATUS_MAPPING_ERROR:
486  return "CUBLAS_STATUS_MAPPING_ERROR";
487 
488  case CUBLAS_STATUS_EXECUTION_FAILED:
489  return "CUBLAS_STATUS_EXECUTION_FAILED";
490 
491  case CUBLAS_STATUS_INTERNAL_ERROR:
492  return "CUBLAS_STATUS_INTERNAL_ERROR";
493  }
494 
495  return "<unknown>";
496 }
497 #endif
498 
499 #ifdef _CUFFT_H_
500 // cuFFT API errors
501 static const char *_cudaGetErrorEnum(cufftResult error)
502 {
503  switch (error)
504  {
505  case CUFFT_SUCCESS:
506  return "CUFFT_SUCCESS";
507 
508  case CUFFT_INVALID_PLAN:
509  return "CUFFT_INVALID_PLAN";
510 
511  case CUFFT_ALLOC_FAILED:
512  return "CUFFT_ALLOC_FAILED";
513 
514  case CUFFT_INVALID_TYPE:
515  return "CUFFT_INVALID_TYPE";
516 
517  case CUFFT_INVALID_VALUE:
518  return "CUFFT_INVALID_VALUE";
519 
520  case CUFFT_INTERNAL_ERROR:
521  return "CUFFT_INTERNAL_ERROR";
522 
523  case CUFFT_EXEC_FAILED:
524  return "CUFFT_EXEC_FAILED";
525 
526  case CUFFT_SETUP_FAILED:
527  return "CUFFT_SETUP_FAILED";
528 
529  case CUFFT_INVALID_SIZE:
530  return "CUFFT_INVALID_SIZE";
531 
532  case CUFFT_UNALIGNED_DATA:
533  return "CUFFT_UNALIGNED_DATA";
534  }
535 
536  return "<unknown>";
537 }
538 #endif
539 
540 #ifdef CUSPARSEAPI
541 // cuSPARSE API errors
542 static const char *_cudaGetErrorEnum(cusparseStatus_t error)
543 {
544  switch (error)
545  {
546  case CUSPARSE_STATUS_SUCCESS:
547  return "CUSPARSE_STATUS_SUCCESS";
548 
549  case CUSPARSE_STATUS_NOT_INITIALIZED:
550  return "CUSPARSE_STATUS_NOT_INITIALIZED";
551 
552  case CUSPARSE_STATUS_ALLOC_FAILED:
553  return "CUSPARSE_STATUS_ALLOC_FAILED";
554 
555  case CUSPARSE_STATUS_INVALID_VALUE:
556  return "CUSPARSE_STATUS_INVALID_VALUE";
557 
558  case CUSPARSE_STATUS_ARCH_MISMATCH:
559  return "CUSPARSE_STATUS_ARCH_MISMATCH";
560 
561  case CUSPARSE_STATUS_MAPPING_ERROR:
562  return "CUSPARSE_STATUS_MAPPING_ERROR";
563 
564  case CUSPARSE_STATUS_EXECUTION_FAILED:
565  return "CUSPARSE_STATUS_EXECUTION_FAILED";
566 
567  case CUSPARSE_STATUS_INTERNAL_ERROR:
568  return "CUSPARSE_STATUS_INTERNAL_ERROR";
569 
570  case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
571  return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
572  }
573 
574  return "<unknown>";
575 }
576 #endif
577 
578 #ifdef CURAND_H_
579 // cuRAND API errors
580 static const char *_cudaGetErrorEnum(curandStatus_t error)
581 {
582  switch (error)
583  {
584  case CURAND_STATUS_SUCCESS:
585  return "CURAND_STATUS_SUCCESS";
586 
587  case CURAND_STATUS_VERSION_MISMATCH:
588  return "CURAND_STATUS_VERSION_MISMATCH";
589 
590  case CURAND_STATUS_NOT_INITIALIZED:
591  return "CURAND_STATUS_NOT_INITIALIZED";
592 
593  case CURAND_STATUS_ALLOCATION_FAILED:
594  return "CURAND_STATUS_ALLOCATION_FAILED";
595 
596  case CURAND_STATUS_TYPE_ERROR:
597  return "CURAND_STATUS_TYPE_ERROR";
598 
599  case CURAND_STATUS_OUT_OF_RANGE:
600  return "CURAND_STATUS_OUT_OF_RANGE";
601 
602  case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
603  return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
604 
605  case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
606  return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
607 
608  case CURAND_STATUS_LAUNCH_FAILURE:
609  return "CURAND_STATUS_LAUNCH_FAILURE";
610 
611  case CURAND_STATUS_PREEXISTING_FAILURE:
612  return "CURAND_STATUS_PREEXISTING_FAILURE";
613 
614  case CURAND_STATUS_INITIALIZATION_FAILED:
615  return "CURAND_STATUS_INITIALIZATION_FAILED";
616 
617  case CURAND_STATUS_ARCH_MISMATCH:
618  return "CURAND_STATUS_ARCH_MISMATCH";
619 
620  case CURAND_STATUS_INTERNAL_ERROR:
621  return "CURAND_STATUS_INTERNAL_ERROR";
622  }
623 
624  return "<unknown>";
625 }
626 #endif
627 
628 #ifdef NV_NPPIDEFS_H
629 // NPP API errors
630 static const char *_cudaGetErrorEnum(NppStatus error)
631 {
632  switch (error)
633  {
634  case NPP_NOT_SUPPORTED_MODE_ERROR:
635  return "NPP_NOT_SUPPORTED_MODE_ERROR";
636 
637  case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
638  return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
639 
640  case NPP_RESIZE_NO_OPERATION_ERROR:
641  return "NPP_RESIZE_NO_OPERATION_ERROR";
642 
643  case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
644  return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
645 
646 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
647 
648  case NPP_BAD_ARG_ERROR:
649  return "NPP_BAD_ARGUMENT_ERROR";
650 
651  case NPP_COEFF_ERROR:
652  return "NPP_COEFFICIENT_ERROR";
653 
654  case NPP_RECT_ERROR:
655  return "NPP_RECTANGLE_ERROR";
656 
657  case NPP_QUAD_ERROR:
658  return "NPP_QUADRANGLE_ERROR";
659 
660  case NPP_MEM_ALLOC_ERR:
661  return "NPP_MEMORY_ALLOCATION_ERROR";
662 
663  case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
664  return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
665 
666  case NPP_INVALID_INPUT:
667  return "NPP_INVALID_INPUT";
668 
669  case NPP_POINTER_ERROR:
670  return "NPP_POINTER_ERROR";
671 
672  case NPP_WARNING:
673  return "NPP_WARNING";
674 
675  case NPP_ODD_ROI_WARNING:
676  return "NPP_ODD_ROI_WARNING";
677 #else
678 
679  // These are for CUDA 5.5 or higher
680  case NPP_BAD_ARGUMENT_ERROR:
681  return "NPP_BAD_ARGUMENT_ERROR";
682 
683  case NPP_COEFFICIENT_ERROR:
684  return "NPP_COEFFICIENT_ERROR";
685 
686  case NPP_RECTANGLE_ERROR:
687  return "NPP_RECTANGLE_ERROR";
688 
689  case NPP_QUADRANGLE_ERROR:
690  return "NPP_QUADRANGLE_ERROR";
691 
692  case NPP_MEMORY_ALLOCATION_ERR:
693  return "NPP_MEMORY_ALLOCATION_ERROR";
694 
695  case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
696  return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
697 
698  case NPP_INVALID_HOST_POINTER_ERROR:
699  return "NPP_INVALID_HOST_POINTER_ERROR";
700 
701  case NPP_INVALID_DEVICE_POINTER_ERROR:
702  return "NPP_INVALID_DEVICE_POINTER_ERROR";
703 #endif
704 
705  case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
706  return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
707 
708  case NPP_TEXTURE_BIND_ERROR:
709  return "NPP_TEXTURE_BIND_ERROR";
710 
711  case NPP_WRONG_INTERSECTION_ROI_ERROR:
712  return "NPP_WRONG_INTERSECTION_ROI_ERROR";
713 
714  case NPP_NOT_EVEN_STEP_ERROR:
715  return "NPP_NOT_EVEN_STEP_ERROR";
716 
717  case NPP_INTERPOLATION_ERROR:
718  return "NPP_INTERPOLATION_ERROR";
719 
720  case NPP_RESIZE_FACTOR_ERROR:
721  return "NPP_RESIZE_FACTOR_ERROR";
722 
723  case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
724  return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
725 
726 #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
727 
728  case NPP_MEMFREE_ERR:
729  return "NPP_MEMFREE_ERR";
730 
731  case NPP_MEMSET_ERR:
732  return "NPP_MEMSET_ERR";
733 
734  case NPP_MEMCPY_ERR:
735  return "NPP_MEMCPY_ERROR";
736 
737  case NPP_MIRROR_FLIP_ERR:
738  return "NPP_MIRROR_FLIP_ERR";
739 #else
740 
741  case NPP_MEMFREE_ERROR:
742  return "NPP_MEMFREE_ERROR";
743 
744  case NPP_MEMSET_ERROR:
745  return "NPP_MEMSET_ERROR";
746 
747  case NPP_MEMCPY_ERROR:
748  return "NPP_MEMCPY_ERROR";
749 
750  case NPP_MIRROR_FLIP_ERROR:
751  return "NPP_MIRROR_FLIP_ERROR";
752 #endif
753 
754  case NPP_ALIGNMENT_ERROR:
755  return "NPP_ALIGNMENT_ERROR";
756 
757  case NPP_STEP_ERROR:
758  return "NPP_STEP_ERROR";
759 
760  case NPP_SIZE_ERROR:
761  return "NPP_SIZE_ERROR";
762 
763  case NPP_NULL_POINTER_ERROR:
764  return "NPP_NULL_POINTER_ERROR";
765 
766  case NPP_CUDA_KERNEL_EXECUTION_ERROR:
767  return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
768 
769  case NPP_NOT_IMPLEMENTED_ERROR:
770  return "NPP_NOT_IMPLEMENTED_ERROR";
771 
772  case NPP_ERROR:
773  return "NPP_ERROR";
774 
775  case NPP_SUCCESS:
776  return "NPP_SUCCESS";
777 
778  case NPP_WRONG_INTERSECTION_QUAD_WARNING:
779  return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
780 
781  case NPP_MISALIGNED_DST_ROI_WARNING:
782  return "NPP_MISALIGNED_DST_ROI_WARNING";
783 
784  case NPP_AFFINE_QUAD_INCORRECT_WARNING:
785  return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
786 
787  case NPP_DOUBLE_SIZE_WARNING:
788  return "NPP_DOUBLE_SIZE_WARNING";
789 
790  case NPP_WRONG_INTERSECTION_ROI_WARNING:
791  return "NPP_WRONG_INTERSECTION_ROI_WARNING";
792  }
793 
794  return "<unknown>";
795 }
796 #endif
797 
798 #ifdef __DRIVER_TYPES_H__
799 #ifndef DEVICE_RESET
800 #define DEVICE_RESET cudaDeviceReset();
801 #endif
802 #else
803 #ifndef DEVICE_RESET
804 #define DEVICE_RESET
805 #endif
806 #endif
807 
808 template <typename T>
809 void check(T result, char const *const func, const char *const file, int const line)
810 {
811  if (result)
812  {
813  fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
814  file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
816  // Make sure we call CUDA Device Reset before exiting
817  exit(EXIT_FAILURE);
818  }
819 }
820 
821 #ifdef __DRIVER_TYPES_H__
822 // This will output the proper CUDA error strings in the event that a CUDA host call returns an error
823 #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
824 
825 // This will output the proper error string when calling cudaGetLastError
826 #define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
827 
828 inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
829 {
830  cudaError_t err = cudaGetLastError();
831 
832  if (cudaSuccess != err)
833  {
834  fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
835  file, line, errorMessage, (int)err, cudaGetErrorString(err));
837  exit(EXIT_FAILURE);
838  }
839 }
840 #endif
841 
842 #ifndef MAX
843 #define MAX(a, b) (a > b ? a : b)
844 #endif
845 
846 // Beginning of GPU Architecture definitions
847 inline int _ConvertSMVer2Cores(int major, int minor)
848 {
849  // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
850  typedef struct
851  {
852  int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
853  int Cores;
854  } sSMtoCores;
855 
856  sSMtoCores nGpuArchCoresPerSM[] = {
857  { 0x10, 8 }, // Tesla Generation (SM 1.0) G80 class
858  { 0x11, 8 }, // Tesla Generation (SM 1.1) G8x class
859  { 0x12, 8 }, // Tesla Generation (SM 1.2) G9x class
860  { 0x13, 8 }, // Tesla Generation (SM 1.3) GT200 class
861  { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
862  { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
863  { 0x30, 192 }, // Kepler Generation (SM 3.0) GK10x class
864  { 0x32, 192 }, // Kepler Generation (SM 3.2) GK10x class
865  { 0x35, 192 }, // Kepler Generation (SM 3.5) GK11x class
866  { 0x50, 128 }, // Maxwell Generation (SM 5.0) GM10x class
867  { -1, -1 }
868  };
869 
870  int index = 0;
871 
872  while (nGpuArchCoresPerSM[index].SM != -1)
873  {
874  if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
875  {
876  return nGpuArchCoresPerSM[index].Cores;
877  }
878 
879  index++;
880  }
881 
882  // If we don't find the values, we default use the previous one to run properly
883  printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[7].Cores);
884  return nGpuArchCoresPerSM[7].Cores;
885 }
886 // end of GPU Architecture definitions
887 
888 #ifdef __CUDA_RUNTIME_H__
889 // General GPU Device CUDA Initialization
890 inline int gpuDeviceInit(int devID)
891 {
892  int device_count;
893  checkCudaErrors(cudaGetDeviceCount(&device_count));
894 
895  if (device_count == 0)
896  {
897  fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
898  exit(EXIT_FAILURE);
899  }
900 
901  if (devID < 0)
902  {
903  devID = 0;
904  }
905 
906  if (devID > device_count - 1)
907  {
908  fprintf(stderr, "\n");
909  fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
910  fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
911  fprintf(stderr, "\n");
912  return -devID;
913  }
914 
915  cudaDeviceProp deviceProp;
916  checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
917 
918  if (deviceProp.computeMode == cudaComputeModeProhibited)
919  {
920  fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
921  return -1;
922  }
923 
924  if (deviceProp.major < 1)
925  {
926  fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
927  exit(EXIT_FAILURE);
928  }
929 
930  checkCudaErrors(cudaSetDevice(devID));
931  printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
932 
933  return devID;
934 }
935 
936 // This function returns the best GPU (with maximum GFLOPS)
937 inline int gpuGetMaxGflopsDeviceId()
938 {
939  int current_device = 0, sm_per_multiproc = 0;
940  int max_perf_device = 0;
941  int device_count = 0, best_SM_arch = 0;
942 
943  unsigned long long max_compute_perf = 0;
944  cudaDeviceProp deviceProp;
945  cudaGetDeviceCount(&device_count);
946 
947  checkCudaErrors(cudaGetDeviceCount(&device_count));
948 
949  if (device_count == 0)
950  {
951  fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
952  exit(EXIT_FAILURE);
953  }
954 
955  // Find the best major SM Architecture GPU device
956  while (current_device < device_count)
957  {
958  cudaGetDeviceProperties(&deviceProp, current_device);
959 
960  // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
961  if (deviceProp.computeMode != cudaComputeModeProhibited)
962  {
963  if (deviceProp.major > 0 && deviceProp.major < 9999)
964  {
965  best_SM_arch = MAX(best_SM_arch, deviceProp.major);
966  }
967  }
968 
969  current_device++;
970  }
971 
972  // Find the best CUDA capable GPU device
973  current_device = 0;
974 
975  while (current_device < device_count)
976  {
977  cudaGetDeviceProperties(&deviceProp, current_device);
978 
979  // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
980  if (deviceProp.computeMode != cudaComputeModeProhibited)
981  {
982  if (deviceProp.major == 9999 && deviceProp.minor == 9999)
983  {
984  sm_per_multiproc = 1;
985  }
986  else
987  {
988  sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
989  }
990 
991  unsigned long long compute_perf = (unsigned long long)deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
992 
993  if (compute_perf > max_compute_perf)
994  {
995  // If we find GPU with SM major > 2, search only these
996  if (best_SM_arch > 2)
997  {
998  // If our device==dest_SM_arch, choose this, or else pass
999  if (deviceProp.major == best_SM_arch)
1000  {
1001  max_compute_perf = compute_perf;
1002  max_perf_device = current_device;
1003  }
1004  }
1005  else
1006  {
1007  max_compute_perf = compute_perf;
1008  max_perf_device = current_device;
1009  }
1010  }
1011  }
1012 
1013  ++current_device;
1014  }
1015 
1016  return max_perf_device;
1017 }
1018 
1019 // Initialization code to find the best CUDA Device
1020 inline int findCudaDevice(int argc, const char **argv)
1021 {
1022  cudaDeviceProp deviceProp;
1023  int devID = 0;
1024 
1025  // If the command-line has a device number specified, use it
1026  if (checkCmdLineFlag(argc, argv, "device"))
1027  {
1028  devID = getCmdLineArgumentInt(argc, argv, "device=");
1029 
1030  if (devID < 0)
1031  {
1032  printf("Invalid command line parameter\n ");
1033  exit(EXIT_FAILURE);
1034  }
1035  else
1036  {
1037  devID = gpuDeviceInit(devID);
1038 
1039  if (devID < 0)
1040  {
1041  printf("exiting...\n");
1042  exit(EXIT_FAILURE);
1043  }
1044  }
1045  }
1046  else
1047  {
1048  // Otherwise pick the device with highest Gflops/s
1049  devID = gpuGetMaxGflopsDeviceId();
1050  checkCudaErrors(cudaSetDevice(devID));
1051  checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
1052  printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
1053  }
1054 
1055  return devID;
1056 }
1057 
1058 // General check for CUDA GPU SM Capabilities
1059 inline bool checkCudaCapabilities(int major_version, int minor_version)
1060 {
1061  cudaDeviceProp deviceProp;
1062  deviceProp.major = 0;
1063  deviceProp.minor = 0;
1064  int dev;
1065 
1066  checkCudaErrors(cudaGetDevice(&dev));
1067  checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
1068 
1069  if ((deviceProp.major > major_version) || (deviceProp.major == major_version && deviceProp.minor >= minor_version))
1070  {
1071  printf(" GPU Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
1072  return true;
1073  }
1074  else
1075  {
1076  printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
1077  return false;
1078  }
1079 }
1080 #endif
1081 
1082 // end of CUDA Helper Functions
1083 
1084 #endif
void check(T result, char const *const func, const char *const file, int const line)
Definition: helper_cuda.h:809
#define DEVICE_RESET
Definition: helper_cuda.h:804
int _ConvertSMVer2Cores(int major, int minor)
Definition: helper_cuda.h:847
GLenum func
Definition: khronos-glext.h:6719
int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
Definition: helper_string.h:183
bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
Definition: helper_string.h:125
GLuint index
Definition: khronos-glext.h:6722
GLuint64EXT * result
Definition: khronos-glext.h:12573
#define MAX(a, b)
Definition: helper_cuda.h:843