Mercurial > hg > graal-compiler
comparison src/gpu/ptx/gpu_ptx.cpp @ 10577:9c7d9e2c8326
PTX kernel execution - no args or return value
author | Morris Meyer <morris.meyer@oracle.com> |
---|---|
date | Sat, 29 Jun 2013 21:29:34 -0400 |
parents | a6632ef9c84d |
children | b0b368d38b40 |
comparison
equal
deleted
inserted
replaced
10576:aee899c96b0b | 10577:9c7d9e2c8326 |
---|---|
61 } else { | 61 } else { |
62 return NULL; | 62 return NULL; |
63 } | 63 } |
64 } | 64 } |
65 | 65 |
66 bool gpu::execute_kernel(address kernel) { | |
67 if (gpu::has_gpu_linkage()) { | |
68 return (gpu::Ptx::execute_kernel(kernel)); | |
69 } else { | |
70 return false; | |
71 } | |
72 } | |
73 | |
66 #define __CUDA_API_VERSION 5000 | 74 #define __CUDA_API_VERSION 5000 |
67 | 75 |
68 bool gpu::Ptx::initialize_gpu() { | 76 bool gpu::Ptx::initialize_gpu() { |
69 int status = _cuda_cu_init(0, __CUDA_API_VERSION); | 77 int status = _cuda_cu_init(0, __CUDA_API_VERSION); |
70 if (TraceWarpLoading) { | 78 if (TraceWarpLoading) { |
137 tty->print_cr("gpu_ptx::_cuda_cu_module_get_function(%s):%x %d", name, cu_function, status); | 145 tty->print_cr("gpu_ptx::_cuda_cu_module_get_function(%s):%x %d", name, cu_function, status); |
138 } | 146 } |
139 return cu_function; | 147 return cu_function; |
140 } | 148 } |
141 | 149 |
150 bool gpu::Ptx::execute_kernel(address kernel) { | |
151 // grid dimensionality | |
152 unsigned int gridX = 1; | |
153 unsigned int gridY = 1; | |
154 unsigned int gridZ = 1; | |
155 | |
156 // thread dimensionality | |
157 unsigned int blockX = 1; | |
158 unsigned int blockY = 1; | |
159 unsigned int blockZ = 1; | |
160 | |
161 int *cu_function = (int *)kernel; | |
162 | |
163 int status = _cuda_cu_launch_kernel(cu_function, | |
164 gridX, gridY, gridZ, | |
165 blockX, blockY, blockZ, | |
166 0, NULL, NULL, NULL); | |
167 tty->print_cr("gpu_ptx::_cuda_cu_launch_kernel(%x): %d", kernel, status); | |
168 return status == 0; // CUDA_SUCCESS | |
169 } | |
142 | 170 |
143 #ifdef __APPLE__ | 171 #ifdef __APPLE__ |
144 bool gpu::Ptx::probe_linkage_apple() { | 172 bool gpu::Ptx::probe_linkage_apple() { |
145 void *handle = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_LAZY); | 173 void *handle = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_LAZY); |
146 if (handle != NULL) { | 174 if (handle != NULL) { |
162 CAST_TO_FN_PTR(cuda_cu_device_compute_capability_func_t, dlsym(handle, "cuDeviceComputeCapability")); | 190 CAST_TO_FN_PTR(cuda_cu_device_compute_capability_func_t, dlsym(handle, "cuDeviceComputeCapability")); |
163 _cuda_cu_module_get_function = | 191 _cuda_cu_module_get_function = |
164 CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction")); | 192 CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction")); |
165 _cuda_cu_module_load_data_ex = | 193 _cuda_cu_module_load_data_ex = |
166 CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx")); | 194 CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx")); |
195 _cuda_cu_launch_kernel = | |
196 CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, dlsym(handle, "cuLaunchKernel")); | |
167 return true; | 197 return true; |
168 } | 198 } |
169 return false; | 199 return false; |
170 } | 200 } |
171 #endif | 201 #endif |