comparison src/gpu/ptx/vm/gpu_ptx.cpp @ 11527:c99e65785936

Improvements to PTX codegen; allows more PTX tests that run on the device to pass.
author bharadwaj
date Wed, 04 Sep 2013 10:47:37 -0400
parents 49bb1bc983c6
children 91e5f927af63
comparison
equal deleted inserted replaced
11526:db297343d44e 11527:c99e65785936
36 36
37 gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init; 37 gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init;
38 gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create; 38 gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
39 gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy; 39 gpu::Ptx::cuda_cu_ctx_destroy_func_t gpu::Ptx::_cuda_cu_ctx_destroy;
40 gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize; 40 gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
41 gpu::Ptx::cuda_cu_ctx_set_current_func_t gpu::Ptx::_cuda_cu_ctx_set_current;
41 gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count; 42 gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
42 gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name; 43 gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
43 gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get; 44 gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get;
44 gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability; 45 gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability;
45 gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute; 46 gpu::Ptx::cuda_cu_device_get_attribute_func_t gpu::Ptx::_cuda_cu_device_get_attribute;
85 int status = _cuda_cu_init(0); 86 int status = _cuda_cu_init(0);
86 if (status != GRAAL_CUDA_SUCCESS) { 87 if (status != GRAAL_CUDA_SUCCESS) {
87 tty->print_cr("Failed to initialize CUDA device"); 88 tty->print_cr("Failed to initialize CUDA device");
88 return false; 89 return false;
89 } 90 }
90 91
91 if (TraceGPUInteraction) { 92 if (TraceGPUInteraction) {
92 tty->print_cr("CUDA driver initialization: Success"); 93 tty->print_cr("CUDA driver initialization: Success");
93 } 94 }
94 95
95 /* Get the number of compute-capable device count */ 96 /* Get the number of compute-capable device count */
106 } 107 }
107 108
108 if (TraceGPUInteraction) { 109 if (TraceGPUInteraction) {
109 tty->print_cr("[CUDA] Number of compute-capable devices found: %d", device_count); 110 tty->print_cr("[CUDA] Number of compute-capable devices found: %d", device_count);
110 } 111 }
111 112
112 /* Get the handle to the first compute device */ 113 /* Get the handle to the first compute device */
113 int device_id = 0; 114 int device_id = 0;
114 /* Compute-capable device handle */ 115 /* Compute-capable device handle */
115 status = _cuda_cu_device_get(&_cu_device, device_id); 116 status = _cuda_cu_device_get(&_cu_device, device_id);
116 117
193 // 3. set pointer to set the Maximum # of registers (32) for the kernel 194 // 3. set pointer to set the Maximum # of registers (32) for the kernel
194 int jit_register_count = 32; 195 int jit_register_count = 32;
195 jit_options[2] = GRAAL_CU_JIT_MAX_REGISTERS; 196 jit_options[2] = GRAAL_CU_JIT_MAX_REGISTERS;
196 jit_option_values[2] = (void *)(size_t)jit_register_count; 197 jit_option_values[2] = (void *)(size_t)jit_register_count;
197 198
199 /* Create CUDA context to compile and execute the kernel */
200 int status = _cuda_cu_ctx_create(&_device_context, 0, _cu_device);
201
202 if (status != GRAAL_CUDA_SUCCESS) {
203 tty->print_cr("[CUDA] Failed to create CUDA context for device: %d", _cu_device);
204 return NULL;
205 }
206
207 if (TraceGPUInteraction) {
208 tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device);
209 }
210
211 status = _cuda_cu_ctx_set_current(_device_context);
212
213 if (status != GRAAL_CUDA_SUCCESS) {
214 tty->print_cr("[CUDA] Failed to set current context for device: %d", _cu_device);
215 return NULL;
216 }
217
218 if (TraceGPUInteraction) {
219 tty->print_cr("[CUDA] Success: Set current context for device: %d", _cu_device);
220 }
221
198 if (TraceGPUInteraction) { 222 if (TraceGPUInteraction) {
199 tty->print_cr("[CUDA] PTX Kernel\n%s", code); 223 tty->print_cr("[CUDA] PTX Kernel\n%s", code);
200 tty->print_cr("[CUDA] Function name : %s", name); 224 tty->print_cr("[CUDA] Function name : %s", name);
201 225
202 }
203
204 /* Create CUDA context to compile and execute the kernel */
205 int status = _cuda_cu_ctx_create(&_device_context, 0, _cu_device);
206
207 if (status != GRAAL_CUDA_SUCCESS) {
208 tty->print_cr("[CUDA] Failed to create CUDA context for device: %d", _cu_device);
209 return NULL;
210 }
211
212 if (TraceGPUInteraction) {
213 tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device);
214 } 226 }
215 227
216 /* Load module's data with compiler options */ 228 /* Load module's data with compiler options */
217 status = _cuda_cu_module_load_data_ex(&cu_module, (void*) code, jit_num_options, 229 status = _cuda_cu_module_load_data_ex(&cu_module, (void*) code, jit_num_options,
218 jit_options, (void **)jit_option_values); 230 jit_options, (void **)jit_option_values);
219 if (status != GRAAL_CUDA_SUCCESS) { 231 if (status != GRAAL_CUDA_SUCCESS) {
220 if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) { 232 if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) {
221 tty->print_cr("[CUDA] Check for malformed PTX kernel or incorrect PTX compilation options"); 233 tty->print_cr("[CUDA] Check for malformed PTX kernel or incorrect PTX compilation options");
222 } 234 }
223 tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s", 235 tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s",
224 status, name); 236 status, name);
225 return NULL; 237 return NULL;
226 } 238 }
227 239
228 if (TraceGPUInteraction) { 240 if (TraceGPUInteraction) {
253 265
254 // thread dimensionality 266 // thread dimensionality
255 unsigned int blockX = 1; 267 unsigned int blockX = 1;
256 unsigned int blockY = 1; 268 unsigned int blockY = 1;
257 unsigned int blockZ = 1; 269 unsigned int blockZ = 1;
258 270
259 struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel; 271 struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel;
260 272
261 void * config[5] = { 273 void * config[5] = {
262 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, ptxka._kernelArgBuffer, 274 GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, ptxka._kernelArgBuffer,
263 GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &(ptxka._bufferOffset), 275 GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &(ptxka._bufferOffset),
364 376
365 bool gpu::Ptx::probe_linkage() { 377 bool gpu::Ptx::probe_linkage() {
366 if (cuda_library_name != NULL) { 378 if (cuda_library_name != NULL) {
367 char *buffer = (char*)malloc(STD_BUFFER_SIZE); 379 char *buffer = (char*)malloc(STD_BUFFER_SIZE);
368 void *handle = os::dll_load(cuda_library_name, buffer, STD_BUFFER_SIZE); 380 void *handle = os::dll_load(cuda_library_name, buffer, STD_BUFFER_SIZE);
369 free(buffer); 381 free(buffer);
370 if (handle != NULL) { 382 if (handle != NULL) {
371 _cuda_cu_init = 383 _cuda_cu_init =
372 CAST_TO_FN_PTR(cuda_cu_init_func_t, os::dll_lookup(handle, "cuInit")); 384 CAST_TO_FN_PTR(cuda_cu_init_func_t, os::dll_lookup(handle, "cuInit"));
373 _cuda_cu_ctx_create = 385 _cuda_cu_ctx_create =
374 CAST_TO_FN_PTR(cuda_cu_ctx_create_func_t, os::dll_lookup(handle, "cuCtxCreate")); 386 CAST_TO_FN_PTR(cuda_cu_ctx_create_func_t, os::dll_lookup(handle, "cuCtxCreate"));
375 _cuda_cu_ctx_destroy = 387 _cuda_cu_ctx_destroy =
376 CAST_TO_FN_PTR(cuda_cu_ctx_destroy_func_t, os::dll_lookup(handle, "cuCtxDestroy")); 388 CAST_TO_FN_PTR(cuda_cu_ctx_destroy_func_t, os::dll_lookup(handle, "cuCtxDestroy"));
377 _cuda_cu_ctx_synchronize = 389 _cuda_cu_ctx_synchronize =
378 CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, os::dll_lookup(handle, "cuCtxSynchronize")); 390 CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, os::dll_lookup(handle, "cuCtxSynchronize"));
391 _cuda_cu_ctx_set_current =
392 CAST_TO_FN_PTR(cuda_cu_ctx_set_current_func_t, os::dll_lookup(handle, "cuCtxSetCurrent"));
379 _cuda_cu_device_get_count = 393 _cuda_cu_device_get_count =
380 CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, os::dll_lookup(handle, "cuDeviceGetCount")); 394 CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, os::dll_lookup(handle, "cuDeviceGetCount"));
381 _cuda_cu_device_get_name = 395 _cuda_cu_device_get_name =
382 CAST_TO_FN_PTR(cuda_cu_device_get_name_func_t, os::dll_lookup(handle, "cuDeviceGetName")); 396 CAST_TO_FN_PTR(cuda_cu_device_get_name_func_t, os::dll_lookup(handle, "cuDeviceGetName"));
383 _cuda_cu_device_get = 397 _cuda_cu_device_get =
414 return false; 428 return false;
415 } 429 }
416 tty->print_cr("Failed to find CUDA linkage"); 430 tty->print_cr("Failed to find CUDA linkage");
417 return false; 431 return false;
418 } 432 }
419