comparison src/gpu/ptx/vm/gpu_ptx.hpp @ 12653:1a7e7011a341

* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler. * Change dynamic loading of CUDA driver API functions to load 32-bit or 64-bit versions of depending on the the host architecture. * Add ability to generate PTX kernels to be launched both on 32-bit and 64-bit hosts. * Use Unified Virtual Memory APIs to perform array argument marshalling. * PTX array storage test runs on the device and returns correct results. * More integer test failures on GPU fixed.
author S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com>
date Fri, 01 Nov 2013 18:34:03 -0400
parents c7abc8411011
children 220ed109bf77
comparison
equal deleted inserted replaced
12652:0dd597c6c9c7 12653:1a7e7011a341
44 #define GRAAL_CU_JIT_THREADS_PER_BLOCK 1 44 #define GRAAL_CU_JIT_THREADS_PER_BLOCK 1
45 #define GRAAL_CU_JIT_INFO_LOG_BUFFER 3 45 #define GRAAL_CU_JIT_INFO_LOG_BUFFER 3
46 #define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES 4 46 #define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES 4
47 #define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU 209 47 #define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU 209
48 48
49 /*
50 * Flags for cuMemHostRegister
51 */
52
53 #define GRAAL_CU_MEMHOSTREGISTER_PORTABLE 1
54 #define GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP 2
55
49 /** 56 /**
50 * End of array terminator for the extra parameter to 57 * End of array terminator for the extra parameter to
51 * ::cuLaunchKernel 58 * ::cuLaunchKernel
52 */ 59 */
53 #define GRAAL_CU_LAUNCH_PARAM_END ((void*) 0x00) 60 #define GRAAL_CU_LAUNCH_PARAM_END ((void*) 0x00)
71 * in the extra array if the value associated with 78 * in the extra array if the value associated with
72 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. 79 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
73 */ 80 */
74 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void*) 0x02) 81 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void*) 0x02)
75 82
83 /*
84 * Context creation flags
85 */
86
87 #define GRAAL_CU_CTX_MAP_HOST 0x08
88
76 class Ptx { 89 class Ptx {
77 friend class gpu; 90 friend class gpu;
78 91
79 protected: 92 protected:
80 static bool probe_linkage(); 93 static bool probe_linkage();
88 typedef unsigned long long CUdeviceptr; 101 typedef unsigned long long CUdeviceptr;
89 #else 102 #else
90 typedef unsigned int CUdeviceptr; 103 typedef unsigned int CUdeviceptr;
91 #endif 104 #endif
92 105
106 typedef int CUdevice; /**< CUDA device */
107
93 private: 108 private:
94 typedef int (*cuda_cu_init_func_t)(unsigned int); 109 typedef int (*cuda_cu_init_func_t)(unsigned int);
95 typedef int (*cuda_cu_ctx_create_func_t)(void*, int, int); 110 typedef int (*cuda_cu_ctx_create_func_t)(void*, unsigned int, CUdevice);
96 typedef int (*cuda_cu_ctx_destroy_func_t)(void*); 111 typedef int (*cuda_cu_ctx_destroy_func_t)(void*);
97 typedef int (*cuda_cu_ctx_synchronize_func_t)(void); 112 typedef int (*cuda_cu_ctx_synchronize_func_t)(void);
98 typedef int (*cuda_cu_ctx_set_current_func_t)(void*); 113 typedef int (*cuda_cu_ctx_set_current_func_t)(void*);
99 typedef int (*cuda_cu_device_get_count_func_t)(int*); 114 typedef int (*cuda_cu_device_get_count_func_t)(int*);
100 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); 115 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int);
105 unsigned int, unsigned int, unsigned int, 120 unsigned int, unsigned int, unsigned int,
106 unsigned int, unsigned int, unsigned int, 121 unsigned int, unsigned int, unsigned int,
107 unsigned int, void*, void**, void**); 122 unsigned int, void*, void**, void**);
108 typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*); 123 typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*);
109 typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**); 124 typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**);
110 typedef int (*cuda_cu_memalloc_func_t)(void*, size_t); 125 typedef int (*cuda_cu_memalloc_func_t)(gpu::Ptx::CUdeviceptr*, size_t);
111 typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr); 126 typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr);
112 typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int); 127 typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int);
113 typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr, unsigned int); 128 typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr, unsigned int);
129 typedef int (*cuda_cu_mem_host_register_func_t)(void*, size_t, unsigned int);
130 typedef int (*cuda_cu_mem_host_get_device_pointer_func_t)(gpu::Ptx::CUdeviceptr*, void*, unsigned int);
131 typedef int (*cuda_cu_mem_host_unregister_func_t)(void*);
114 132
115 public: 133 public:
116 static cuda_cu_init_func_t _cuda_cu_init; 134 static cuda_cu_init_func_t _cuda_cu_init;
117 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; 135 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create;
118 static cuda_cu_ctx_destroy_func_t _cuda_cu_ctx_destroy; 136 static cuda_cu_ctx_destroy_func_t _cuda_cu_ctx_destroy;
128 static cuda_cu_memalloc_func_t _cuda_cu_memalloc; 146 static cuda_cu_memalloc_func_t _cuda_cu_memalloc;
129 static cuda_cu_memfree_func_t _cuda_cu_memfree; 147 static cuda_cu_memfree_func_t _cuda_cu_memfree;
130 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; 148 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod;
131 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; 149 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh;
132 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current; 150 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current;
151 static cuda_cu_mem_host_register_func_t _cuda_cu_mem_host_register;
152 static cuda_cu_mem_host_get_device_pointer_func_t _cuda_cu_mem_host_get_device_pointer;
153 static cuda_cu_mem_host_unregister_func_t _cuda_cu_mem_host_unregister;
133 154
134 protected: 155 protected:
135 static void* _device_context; 156 static void* _device_context;
136 static int _cu_device; 157 static int _cu_device;
137 }; 158 };