comparison src/gpu/ptx/vm/gpu_ptx.hpp @ 13753:80cd5c3b8827

partially fixed passing of object parameters to PTX kernels; use a C++ object for managing resource allocation and cleanup around a PTX kernel execution
author Doug Simon <doug.simon@oracle.com>
date Fri, 24 Jan 2014 18:01:36 +0100
parents 27acedac70b7
children 49db2c1e3bee
comparison
equal deleted inserted replaced
13752:34ab58984118 13753:80cd5c3b8827
82 82
83 /* 83 /*
84 * Context creation flags 84 * Context creation flags
85 */ 85 */
86 86
87 #define GRAAL_CU_CTX_MAP_HOST 0x08 87 #define GRAAL_CU_CTX_MAP_HOST 0x08
88 #define GRAAL_CU_CTX_SCHED_BLOCKING_SYNC 0x04
88 89
89 class Ptx { 90 class Ptx {
90 friend class gpu; 91 friend class gpu;
92 friend class PtxCall;
91 93
92 protected: 94 protected:
93 static bool probe_linkage(); 95 static bool probe_linkage();
94 static bool initialize_gpu(); 96 static bool initialize_gpu();
95 static unsigned int total_cores(); 97 static unsigned int total_cores();
96 static void * generate_kernel(unsigned char *code, int code_len, const char *name); 98 static void* get_context();
99 static void* generate_kernel(unsigned char *code, int code_len, const char *name);
97 static bool execute_warp(int dimX, int dimY, int dimZ, address kernel, PTXKernelArguments & ka, JavaValue &ret); 100 static bool execute_warp(int dimX, int dimY, int dimZ, address kernel, PTXKernelArguments & ka, JavaValue &ret);
98 static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret); 101 static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret);
99 public: 102 public:
100 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) 103 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
101 typedef unsigned long long CUdeviceptr; 104 typedef unsigned long long CUdeviceptr;
104 #endif 107 #endif
105 108
106 typedef int CUdevice; /* CUDA device */ 109 typedef int CUdevice; /* CUDA device */
107 110
108 static jlong execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ, 111 static jlong execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ,
109 jlong parametersAndReturnValueBuffer, 112 jlong buffer,
110 jint parametersAndReturnValueBufferSize, 113 jint bufferSize,
114 jint objectParametersCount,
115 jlong objectParametersOffsets,
116 jlong pinnedObjects,
111 int encodedReturnTypeSize); 117 int encodedReturnTypeSize);
112 118
113 private: 119 private:
114 typedef int (*cuda_cu_init_func_t)(unsigned int); 120 typedef int (*cuda_cu_init_func_t)(unsigned int);
115 typedef int (*cuda_cu_ctx_create_func_t)(void*, unsigned int, CUdevice); 121 typedef int (*cuda_cu_ctx_create_func_t)(void*, unsigned int, CUdevice);
116 typedef int (*cuda_cu_ctx_destroy_func_t)(void*); 122 typedef int (*cuda_cu_ctx_destroy_func_t)(void*);
117 typedef int (*cuda_cu_ctx_synchronize_func_t)(void); 123 typedef int (*cuda_cu_ctx_synchronize_func_t)(void);
124 typedef int (*cuda_cu_ctx_get_current_func_t)(void*);
118 typedef int (*cuda_cu_ctx_set_current_func_t)(void*); 125 typedef int (*cuda_cu_ctx_set_current_func_t)(void*);
119 typedef int (*cuda_cu_device_get_count_func_t)(int*); 126 typedef int (*cuda_cu_device_get_count_func_t)(int*);
120 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int); 127 typedef int (*cuda_cu_device_get_name_func_t)(char*, int, int);
121 typedef int (*cuda_cu_device_get_func_t)(int*, int); 128 typedef int (*cuda_cu_device_get_func_t)(int*, int);
122 typedef int (*cuda_cu_device_compute_capability_func_t)(int*, int*, int); 129 typedef int (*cuda_cu_device_compute_capability_func_t)(int*, int*, int);
150 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex; 157 static cuda_cu_module_load_data_ex_func_t _cuda_cu_module_load_data_ex;
151 static cuda_cu_memalloc_func_t _cuda_cu_memalloc; 158 static cuda_cu_memalloc_func_t _cuda_cu_memalloc;
152 static cuda_cu_memfree_func_t _cuda_cu_memfree; 159 static cuda_cu_memfree_func_t _cuda_cu_memfree;
153 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod; 160 static cuda_cu_memcpy_htod_func_t _cuda_cu_memcpy_htod;
154 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh; 161 static cuda_cu_memcpy_dtoh_func_t _cuda_cu_memcpy_dtoh;
162 static cuda_cu_ctx_get_current_func_t _cuda_cu_ctx_get_current;
155 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current; 163 static cuda_cu_ctx_set_current_func_t _cuda_cu_ctx_set_current;
156 static cuda_cu_mem_host_register_func_t _cuda_cu_mem_host_register; 164 static cuda_cu_mem_host_register_func_t _cuda_cu_mem_host_register;
157 static cuda_cu_mem_host_get_device_pointer_func_t _cuda_cu_mem_host_get_device_pointer; 165 static cuda_cu_mem_host_get_device_pointer_func_t _cuda_cu_mem_host_get_device_pointer;
158 static cuda_cu_mem_host_unregister_func_t _cuda_cu_mem_host_unregister; 166 static cuda_cu_mem_host_unregister_func_t _cuda_cu_mem_host_unregister;
159 167