diff src/gpu/ptx/gpu_ptx.cpp @ 10577:9c7d9e2c8326

PTX kernel execution - no args or return value
author Morris Meyer <morris.meyer@oracle.com>
date Sat, 29 Jun 2013 21:29:34 -0400
parents a6632ef9c84d
children b0b368d38b40
line wrap: on
line diff
--- a/src/gpu/ptx/gpu_ptx.cpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/gpu/ptx/gpu_ptx.cpp	Sat Jun 29 21:29:34 2013 -0400
@@ -63,6 +63,14 @@
   }
 }
 
+bool gpu::execute_kernel(address kernel) {
+  if (gpu::has_gpu_linkage()) {
+    return (gpu::Ptx::execute_kernel(kernel));
+  } else {
+    return false;
+  }
+}
+
 #define __CUDA_API_VERSION 5000
 
 bool gpu::Ptx::initialize_gpu() {
@@ -139,6 +147,26 @@
   return cu_function;
 }
 
+bool gpu::Ptx::execute_kernel(address kernel) {
+  // grid dimensionality
+  unsigned int gridX = 1;
+  unsigned int gridY = 1;
+  unsigned int gridZ = 1;
+
+  // thread dimensionality
+  unsigned int blockX = 1;
+  unsigned int blockY = 1;
+  unsigned int blockZ = 1;
+  
+  int *cu_function = (int *)kernel;
+
+  int status = _cuda_cu_launch_kernel(cu_function,
+                                      gridX, gridY, gridZ,
+                                      blockX, blockY, blockZ,
+                                      0, NULL, NULL, NULL);
+  tty->print_cr("gpu_ptx::_cuda_cu_launch_kernel(%x): %d", kernel, status);
+  return status == 0;  // CUDA_SUCCESS
+}
 
 #ifdef __APPLE__
 bool gpu::Ptx::probe_linkage_apple() {
@@ -164,6 +192,8 @@
         CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction"));
     _cuda_cu_module_load_data_ex =
         CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx"));
+    _cuda_cu_launch_kernel =
+        CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, dlsym(handle, "cuLaunchKernel"));
     return true;
   }
   return false;