comparison src/gpu/ptx/gpu_ptx.cpp @ 10577:9c7d9e2c8326

PTX kernel execution - no args or return value
author Morris Meyer <morris.meyer@oracle.com>
date Sat, 29 Jun 2013 21:29:34 -0400
parents a6632ef9c84d
children b0b368d38b40
comparison
equal deleted inserted replaced
10576:aee899c96b0b 10577:9c7d9e2c8326
61 } else { 61 } else {
62 return NULL; 62 return NULL;
63 } 63 }
64 } 64 }
65 65
66 bool gpu::execute_kernel(address kernel) {
67 if (gpu::has_gpu_linkage()) {
68 return (gpu::Ptx::execute_kernel(kernel));
69 } else {
70 return false;
71 }
72 }
73
66 #define __CUDA_API_VERSION 5000 74 #define __CUDA_API_VERSION 5000
67 75
68 bool gpu::Ptx::initialize_gpu() { 76 bool gpu::Ptx::initialize_gpu() {
69 int status = _cuda_cu_init(0, __CUDA_API_VERSION); 77 int status = _cuda_cu_init(0, __CUDA_API_VERSION);
70 if (TraceWarpLoading) { 78 if (TraceWarpLoading) {
137 tty->print_cr("gpu_ptx::_cuda_cu_module_get_function(%s):%x %d", name, cu_function, status); 145 tty->print_cr("gpu_ptx::_cuda_cu_module_get_function(%s):%x %d", name, cu_function, status);
138 } 146 }
139 return cu_function; 147 return cu_function;
140 } 148 }
141 149
150 bool gpu::Ptx::execute_kernel(address kernel) {
151 // grid dimensionality
152 unsigned int gridX = 1;
153 unsigned int gridY = 1;
154 unsigned int gridZ = 1;
155
156 // thread dimensionality
157 unsigned int blockX = 1;
158 unsigned int blockY = 1;
159 unsigned int blockZ = 1;
160
161 int *cu_function = (int *)kernel;
162
163 int status = _cuda_cu_launch_kernel(cu_function,
164 gridX, gridY, gridZ,
165 blockX, blockY, blockZ,
166 0, NULL, NULL, NULL);
167 tty->print_cr("gpu_ptx::_cuda_cu_launch_kernel(%x): %d", kernel, status);
168 return status == 0; // CUDA_SUCCESS
169 }
142 170
143 #ifdef __APPLE__ 171 #ifdef __APPLE__
144 bool gpu::Ptx::probe_linkage_apple() { 172 bool gpu::Ptx::probe_linkage_apple() {
145 void *handle = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_LAZY); 173 void *handle = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_LAZY);
146 if (handle != NULL) { 174 if (handle != NULL) {
162 CAST_TO_FN_PTR(cuda_cu_device_compute_capability_func_t, dlsym(handle, "cuDeviceComputeCapability")); 190 CAST_TO_FN_PTR(cuda_cu_device_compute_capability_func_t, dlsym(handle, "cuDeviceComputeCapability"));
163 _cuda_cu_module_get_function = 191 _cuda_cu_module_get_function =
164 CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction")); 192 CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction"));
165 _cuda_cu_module_load_data_ex = 193 _cuda_cu_module_load_data_ex =
166 CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx")); 194 CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx"));
195 _cuda_cu_launch_kernel =
196 CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, dlsym(handle, "cuLaunchKernel"));
167 return true; 197 return true;
168 } 198 }
169 return false; 199 return false;
170 } 200 }
171 #endif 201 #endif