Mercurial > hg > graal-jvmci-8
comparison src/gpu/ptx/vm/gpu_ptx.hpp @ 12653:1a7e7011a341
* PTX kernel argument buffer now has naturally aligned arguments as required by PTX JIT compiler.
* Change dynamic loading of CUDA driver API functions to load 32-bit or 64-bit versions of depending on the the host architecture.
* Add ability to generate PTX kernels to be launched both on 32-bit and 64-bit hosts.
* Use Unified Virtual Memory APIs to perform array argument marshalling.
* PTX array storage test runs on the device and returns correct results.
* More integer test failures on GPU fixed.
author | S.Bharadwaj Yadavalli <bharadwaj.yadavalli@oracle.com> |
---|---|
date | Fri, 01 Nov 2013 18:34:03 -0400 |
parents | c7abc8411011 |
children | 220ed109bf77 |
comparison
equal
deleted
inserted
replaced
12652:0dd597c6c9c7 | 12653:1a7e7011a341 |
---|---|
44 #define GRAAL_CU_JIT_THREADS_PER_BLOCK 1 | 44 #define GRAAL_CU_JIT_THREADS_PER_BLOCK 1 |
45 #define GRAAL_CU_JIT_INFO_LOG_BUFFER 3 | 45 #define GRAAL_CU_JIT_INFO_LOG_BUFFER 3 |
46 #define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES 4 | 46 #define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES 4 |
47 #define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU 209 | 47 #define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU 209 |
48 | 48 |
49 /* | |
50 * Flags for cuMemHostRegister | |
51 */ | |
52 | |
53 #define GRAAL_CU_MEMHOSTREGISTER_PORTABLE 1 | |
54 #define GRAAL_CU_MEMHOSTREGISTER_DEVICEMAP 2 | |
55 | |
49 /** | 56 /** |
50 * End of array terminator for the extra parameter to | 57 * End of array terminator for the extra parameter to |
51 * ::cuLaunchKernel | 58 * ::cuLaunchKernel |
52 */ | 59 */ |
53 #define GRAAL_CU_LAUNCH_PARAM_END ((void*) 0x00) | 60 #define GRAAL_CU_LAUNCH_PARAM_END ((void*) 0x00) |
71 * in the extra array if the value associated with | 78 * in the extra array if the value associated with |
72 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. | 79 * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. |
73 */ | 80 */ |
74 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void*) 0x02) | 81 #define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE ((void*) 0x02) |
75 | 82 |
83 /* | |
84 * Context creation flags | |
85 */ | |
86 | |
87 #define GRAAL_CU_CTX_MAP_HOST 0x08 | |
88 | |
76 class Ptx { | 89 class Ptx { |
77 friend class gpu; | 90 friend class gpu; |
78 | 91 |
79 protected: | 92 protected: |
80 static bool probe_linkage(); | 93 static bool probe_linkage(); |
88 typedef unsigned long long CUdeviceptr; | 101 typedef unsigned long long CUdeviceptr; |
89 #else | 102 #else |
90 typedef unsigned int CUdeviceptr; | 103 typedef unsigned int CUdeviceptr; |
91 #endif | 104 #endif |
92 | 105 |
106 typedef int CUdevice; /**< CUDA device */ | |
107 | |
93 private: | 108 private: |
94 typedef int (*cuda_cu_init_func_t)(unsigned int); | 109 typedef int (*cuda_cu_init_func_t)(unsigned int); |
95 typedef int (*cuda_cu_ctx_create_func_t)(void*, int, int); | 110 typedef int (*cuda_cu_ctx_create_func_t)(void*, unsigned int, CUdevice); |
96 typedef int (*cuda_cu_ctx_destroy_func_t)(void*); | 111 typedef int (*cuda_cu_ctx_destroy_func_t)(void*); |
97 typedef int (*cuda_cu_ctx_synchronize_func_t)(void); | 112 typedef int (*cuda_cu_ctx_synchronize_func_t)(void); |
98 typedef int (*cuda_cu_ctx_set_current_func_t)(void*); | 113 typedef int (*cuda_cu_ctx_set_current_func_t)(void*); |
99 typedef int (*cuda_cu_device_get_count_func_t)(int*); | 114 typedef int (*cuda_cu_device_get_count_func_t)(int*); |
100 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); | 115 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); |
105 unsigned int, unsigned int, unsigned int, | 120 unsigned int, unsigned int, unsigned int, |
106 unsigned int, unsigned int, unsigned int, | 121 unsigned int, unsigned int, unsigned int, |
107 unsigned int, void*, void**, void**); | 122 unsigned int, void*, void**, void**); |
108 typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*); | 123 typedef int (*cuda_cu_module_get_function_func_t)(void*, void*, const char*); |
109 typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**); | 124 typedef int (*cuda_cu_module_load_data_ex_func_t)(void*, void*, unsigned int, void*, void**); |
110 typedef int (*cuda_cu_memalloc_func_t)(void*, size_t); | 125 typedef int (*cuda_cu_memalloc_func_t)(gpu::Ptx::CUdeviceptr*, size_t); |
111 typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr); | 126 typedef int (*cuda_cu_memfree_func_t)(gpu::Ptx::CUdeviceptr); |
112 typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int); | 127 typedef int (*cuda_cu_memcpy_htod_func_t)(gpu::Ptx::CUdeviceptr, const void*, unsigned int); |
113 typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr, unsigned int); | 128 typedef int (*cuda_cu_memcpy_dtoh_func_t)(const void*, gpu::Ptx::CUdeviceptr, unsigned int); |
129 typedef int (*cuda_cu_mem_host_register_func_t)(void*, size_t, unsigned int); | |
130 typedef int (*cuda_cu_mem_host_get_device_pointer_func_t)(gpu::Ptx::CUdeviceptr*, void*, unsigned int); | |
131 typedef int (*cuda_cu_mem_host_unregister_func_t)(void*); | |
114 | 132 |
115 public: | 133 public: |
116 static cuda_cu_init_func_t _cuda_cu_init; | 134 static cuda_cu_init_func_t _cuda_cu_init; |
117 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; | 135 static cuda_cu_ctx_create_func_t _cuda_cu_ctx_create; |
118 static cuda_cu_ctx_destroy_func_t _cuda_cu_ctx_destroy; | 136 static cuda_cu_ctx_destroy_func_t _cuda_cu_ctx_destroy; |
128 static cuda_cu_memalloc_func_t _cuda_cu_memalloc; | 146 static cuda_cu_memalloc_func_t _cuda_cu_memalloc; |
129 static cuda_cu_memfree_func_t _cuda_cu_memfree; | 147 static cuda_cu_memfree_func_t _cuda_cu_memfree; |
130 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; | 148 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; |
131 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; | 149 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; |
132 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current; | 150 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current; |
151 static cuda_cu_mem_host_register_func_t _cuda_cu_mem_host_register; | |
152 static cuda_cu_mem_host_get_device_pointer_func_t _cuda_cu_mem_host_get_device_pointer; | |
153 static cuda_cu_mem_host_unregister_func_t _cuda_cu_mem_host_unregister; | |
133 | 154 |
134 protected: | 155 protected: |
135 static void* _device_context; | 156 static void* _device_context; |
136 static int _cu_device; | 157 static int _cu_device; |
137 }; | 158 }; |