Mercurial > hg > truffle
comparison src/gpu/ptx/vm/gpu_ptx.hpp @ 13753:80cd5c3b8827
partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Fri, 24 Jan 2014 18:01:36 +0100 |
parents | 27acedac70b7 |
children | 49db2c1e3bee |
comparison
equal
deleted
inserted
replaced
13752:34ab58984118 | 13753:80cd5c3b8827 |
---|---|
82 | 82 |
83 /* | 83 /* |
84 * Context creation flags | 84 * Context creation flags |
85 */ | 85 */ |
86 | 86 |
87 #define GRAAL_CU_CTX_MAP_HOST 0x08 | 87 #define GRAAL_CU_CTX_MAP_HOST 0x08 |
88 #define GRAAL_CU_CTX_SCHED_BLOCKING_SYNC 0x04 | |
88 | 89 |
89 class Ptx { | 90 class Ptx { |
90 friend class gpu; | 91 friend class gpu; |
92 friend class PtxCall; | |
91 | 93 |
92 protected: | 94 protected: |
93 static bool probe_linkage(); | 95 static bool probe_linkage(); |
94 static bool initialize_gpu(); | 96 static bool initialize_gpu(); |
95 static unsigned int total_cores(); | 97 static unsigned int total_cores(); |
96 static void * generate_kernel(unsigned char *code, int code_len, const char *name); | 98 static void* get_context(); |
99 static void* generate_kernel(unsigned char *code, int code_len, const char *name); | |
97 static bool execute_warp(int dimX, int dimY, int dimZ, address kernel, PTXKernelArguments & ka, JavaValue &ret); | 100 static bool execute_warp(int dimX, int dimY, int dimZ, address kernel, PTXKernelArguments & ka, JavaValue &ret); |
98 static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret); | 101 static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret); |
99 public: | 102 public: |
100 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) | 103 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) |
101 typedef unsigned long long CUdeviceptr; | 104 typedef unsigned long long CUdeviceptr; |
104 #endif | 107 #endif |
105 | 108 |
106 typedef int CUdevice; /* CUDA device */ | 109 typedef int CUdevice; /* CUDA device */ |
107 | 110 |
108 static jlong execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ, | 111 static jlong execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ, |
109 jlong parametersAndReturnValueBuffer, | 112 jlong buffer, |
110 jint parametersAndReturnValueBufferSize, | 113 jint bufferSize, |
114 jint objectParametersCount, | |
115 jlong objectParametersOffsets, | |
116 jlong pinnedObjects, | |
111 int encodedReturnTypeSize); | 117 int encodedReturnTypeSize); |
112 | 118 |
113 private: | 119 private: |
114 typedef int (*cuda_cu_init_func_t)(unsigned int); | 120 typedef int (*cuda_cu_init_func_t)(unsigned int); |
115 typedef int (*cuda_cu_ctx_create_func_t)(void*, unsigned int, CUdevice); | 121 typedef int (*cuda_cu_ctx_create_func_t)(void*, unsigned int, CUdevice); |
116 typedef int (*cuda_cu_ctx_destroy_func_t)(void*); | 122 typedef int (*cuda_cu_ctx_destroy_func_t)(void*); |
117 typedef int (*cuda_cu_ctx_synchronize_func_t)(void); | 123 typedef int (*cuda_cu_ctx_synchronize_func_t)(void); |
124 typedef int (*cuda_cu_ctx_get_current_func_t)(void*); | |
118 typedef int (*cuda_cu_ctx_set_current_func_t)(void*); | 125 typedef int (*cuda_cu_ctx_set_current_func_t)(void*); |
119 typedef int (*cuda_cu_device_get_count_func_t)(int*); | 126 typedef int (*cuda_cu_device_get_count_func_t)(int*); |
120 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); | 127 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); |
121 typedef int (*cuda_cu_device_get_func_t)(int*, int); | 128 typedef int (*cuda_cu_device_get_func_t)(int*, int); |
122 typedef int (*cuda_cu_device_compute_capability_func_t)(int*, int*, int); | 129 typedef int (*cuda_cu_device_compute_capability_func_t)(int*, int*, int); |
150 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; | 157 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; |
151 static cuda_cu_memalloc_func_t _cuda_cu_memalloc; | 158 static cuda_cu_memalloc_func_t _cuda_cu_memalloc; |
152 static cuda_cu_memfree_func_t _cuda_cu_memfree; | 159 static cuda_cu_memfree_func_t _cuda_cu_memfree; |
153 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; | 160 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; |
154 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; | 161 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; |
162 static cuda_cu_ctx_get_current_func_t _cuda_cu_ctx_get_current; | |
155 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current; | 163 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current; |
156 static cuda_cu_mem_host_register_func_t _cuda_cu_mem_host_register; | 164 static cuda_cu_mem_host_register_func_t _cuda_cu_mem_host_register; |
157 static cuda_cu_mem_host_get_device_pointer_func_t _cuda_cu_mem_host_get_device_pointer; | 165 static cuda_cu_mem_host_get_device_pointer_func_t _cuda_cu_mem_host_get_device_pointer; |
158 static cuda_cu_mem_host_unregister_func_t _cuda_cu_mem_host_unregister; | 166 static cuda_cu_mem_host_unregister_func_t _cuda_cu_mem_host_unregister; |
159 | 167 |