changeset 11283:1cd1f8ff70a1

CR-20: PTX kernel invocation with arguments - from Bharadwaj
author Morris Meyer <morris.meyer@oracle.com>
date Sat, 10 Aug 2013 14:38:30 -0400
parents 12661a449226
children d876002b98e6
files graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java src/gpu/ptx/vm/gpu_ptx.cpp src/gpu/ptx/vm/gpu_ptx.hpp src/share/vm/graal/graalCompilerToGPU.cpp src/share/vm/runtime/gpu.hpp
diffstat 6 files changed, 101 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java	Sat Aug 10 10:08:56 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/IntegerPTXTest.java	Sat Aug 10 14:38:30 2013 -0400
@@ -31,11 +31,11 @@
 
     @Test
     public void testAdd() {
-        compile("testAdd2I");
-        compile("testAdd2L");
-        compile("testAdd2B");
-        compile("testAddIConst");
-        compile("testAddConstI");
+        invoke(compile("testAdd2I"), 8, 4);
+        invoke(compile("testAdd2L"), 12, 6);
+        invoke(compile("testAdd2B"), 6, 4);
+        invoke(compile("testAddIConst"), 5);
+        invoke(compile("testAddConstI"), 7);
     }
 
     public static int testAdd2I(int a, int b) {
@@ -60,10 +60,10 @@
 
     @Test
     public void testSub() {
-        compile("testSub2I");
-        compile("testSub2L");
-        compile("testSubIConst");
-        compile("testSubConstI");
+        invoke(compile("testSub2I"), 8, 4);
+        invoke(compile("testSub2L"), 12, 6);
+        invoke(compile("testSubIConst"), 35);
+        invoke(compile("testSubConstI"), 12);
     }
 
     public static int testSub2I(int a, int b) {
@@ -84,10 +84,10 @@
 
     @Test
     public void testMul() {
-        compile("testMul2I");
-        compile("testMul2L");
-        compile("testMulIConst");
-        compile("testMulConstI");
+        invoke(compile("testMul2I"), 8, 4);
+        invoke(compile("testMul2L"), 12, 6);
+        invoke(compile("testMulIConst"), 4);
+        invoke(compile("testMulConstI"), 5);
     }
 
     public static int testMul2I(int a, int b) {
@@ -108,10 +108,10 @@
 
     @Test
     public void testDiv() {
-        compile("testDiv2I");
-        compile("testDiv2L");
-        compile("testDivIConst");
-        compile("testDivConstI");
+        invoke(compile("testDiv2I"), 8, 4);
+        invoke(compile("testDiv2L"), 12, 6);
+        invoke(compile("testDivIConst"), 64);
+        invoke(compile("testDivConstI"), 8);
     }
 
     public static int testDiv2I(int a, int b) {
@@ -132,8 +132,8 @@
 
     @Test
     public void testRem() {
-        compile("testRem2I");
-        compile("testRem2L");
+        invoke(compile("testRem2I"), 8, 4);
+        invoke(compile("testRem2L"), 12, 6);
     }
 
     public static int testRem2I(int a, int b) {
@@ -146,12 +146,12 @@
 
     @Test
     public void testIntConversion() {
-        compile("testI2L");
-        compile("testL2I");
-        compile("testI2C");
-        compile("testI2B");
-        compile("testI2F");
-        compile("testI2D");
+        invoke(compile("testI2L"), 8);
+        invoke(compile("testL2I"), 12);
+        invoke(compile("testI2C"), 65);
+        invoke(compile("testI2B"), 9);
+        invoke(compile("testI2F"), 17);
+        invoke(compile("testI2D"), 22);
     }
 
     public static long testI2L(int a) {
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Sat Aug 10 10:08:56 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Sat Aug 10 14:38:30 2013 -0400
@@ -33,6 +33,7 @@
 import com.oracle.graal.compiler.test.GraalCompilerTest;
 import com.oracle.graal.debug.Debug;
 import com.oracle.graal.hotspot.meta.HotSpotRuntime;
+import com.oracle.graal.hotspot.meta.HotSpotResolvedJavaMethod;
 import com.oracle.graal.java.GraphBuilderConfiguration;
 import com.oracle.graal.java.GraphBuilderPhase;
 import com.oracle.graal.nodes.StructuredGraph;
@@ -42,6 +43,7 @@
 import com.oracle.graal.phases.PhasePlan.PhasePosition;
 import com.oracle.graal.phases.tiers.*;
 import com.oracle.graal.ptx.PTX;
+import java.lang.reflect.Modifier;
 
 public abstract class PTXTestBase extends GraalCompilerTest {
 
@@ -69,7 +71,8 @@
          * of ECC failure on kernel invocation.  
          */
         CompilationResult result = GraalCompiler.compileGraph(graph, cc, graph.method(), runtime,
-                                                              graalRuntime().getReplacements(), ptxBackend, target, null, phasePlan,
+                                                              graalRuntime().getReplacements(), ptxBackend, target,
+                                                              null, phasePlan,
                                                               OptimisticOptimizations.NONE, new SpeculationLog(),
                                                               Suites.createDefaultSuites(), new ExternalCompilationResult());
         return result;
@@ -81,7 +84,15 @@
 
     protected void invoke(CompilationResult result, Object... args) {
         try {
-            Object[] executeArgs = argsWithReceiver(this, args);
+            if (((ExternalCompilationResult) result).getEntryPoint() == 0) {
+                Debug.dump(result, "[CUDA] *** Null entry point - Not launching kernel");
+                return;
+            }
+
+            /* Check if the method compiled is static */
+            HotSpotResolvedJavaMethod compiledMethod = (HotSpotResolvedJavaMethod) sg.method();
+            boolean isStatic = Modifier.isStatic(compiledMethod.getModifiers());
+            Object[] executeArgs = argsWithReceiver((isStatic ? null : this), args);
             HotSpotRuntime hsr = (HotSpotRuntime) runtime;
             InstalledCode installedCode = hsr.addExternalMethod(sg.method(), result, sg);
             installedCode.executeVarargs(executeArgs);
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Sat Aug 10 10:08:56 2013 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Sat Aug 10 14:38:30 2013 -0400
@@ -66,9 +66,9 @@
   }
 }
 
-bool gpu::execute_kernel(address kernel) {
+bool gpu::execute_kernel(address kernel, JavaCallArguments * jca) {
   if (gpu::has_gpu_linkage()) {
-    return (gpu::Ptx::execute_kernel(kernel));
+    return (gpu::Ptx::execute_kernel(kernel, jca));
   } else {
     return false;
   }
@@ -120,7 +120,7 @@
   }
 
   /* Get device attributes */
-  int minor, major;
+  int minor, major, unified_addressing;
   status = _cuda_cu_device_get_attribute(&minor, GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cu_device);
 
   if (status != GRAAL_CUDA_SUCCESS) {
@@ -139,6 +139,18 @@
     tty->print_cr("[CUDA] Compatibility version of device %d: %d.%d", cu_device, major, minor);
   }
 
+  status = _cuda_cu_device_get_attribute(&unified_addressing, GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING, cu_device);
+
+  if (status != GRAAL_CUDA_SUCCESS) {
+    tty->print_cr("[CUDA] Failed to query unified addressing mode of device: %d", cu_device);
+    return false;
+  }
+
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Unified addressing support on device %d: %d", cu_device, unified_addressing);
+  }
+
+
   /* Get device name */
   char device_name[256];
   status = _cuda_cu_device_get_name(device_name, 256, cu_device);
@@ -218,7 +230,7 @@
   status = _cuda_cu_module_get_function(&cu_function, cu_module, name);
 
   if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] Failed to get function %s", name);
+    tty->print_cr("[CUDA] *** Error: Failed to get function %s", name);
     return NULL;
   }
 
@@ -228,7 +240,7 @@
   return cu_function;
 }
 
-bool gpu::Ptx::execute_kernel(address kernel) {
+bool gpu::Ptx::execute_kernel(address kernel, JavaCallArguments * jca) {
   // grid dimensionality
   unsigned int gridX = 1;
   unsigned int gridY = 1;
@@ -241,6 +253,15 @@
   
   int *cu_function = (int *)kernel;
 
+  char * paramBuffer = (char *) jca->parameters();
+  size_t paramBufferSz = (size_t) jca->size_of_parameters();
+
+  void * config[] = {
+    GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, paramBuffer,
+    GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &paramBufferSz,
+    GRAAL_CU_LAUNCH_PARAM_END
+  };
+
   if (kernel == NULL) {
     return false;
   }
@@ -251,7 +272,7 @@
   int status = _cuda_cu_launch_kernel(cu_function,
                                       gridX, gridY, gridZ,
                                       blockX, blockY, blockZ,
-                                      0, NULL, NULL, NULL);
+                                      0, NULL, NULL, config);
   if (status != GRAAL_CUDA_SUCCESS) {
     tty->print_cr("[CUDA] Failed to launch kernel");
     return false;
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Sat Aug 10 10:08:56 2013 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Sat Aug 10 14:38:30 2013 -0400
@@ -30,6 +30,8 @@
  * These definitions are for convenience.
  */
 #define GRAAL_CUDA_SUCCESS                                   0
+/**< Device shares a unified address space with the host */
+#define GRAAL_CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING        41
 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR  75
 #define GRAAL_CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR  76
 #define GRAAL_CU_JIT_MAX_REGISTERS                           0
@@ -38,6 +40,33 @@
 #define GRAAL_CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES              4
 #define GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU                 209
 
+/**
+ * End of array terminator for the extra parameter to
+ * ::cuLaunchKernel
+ */
+#define GRAAL_CU_LAUNCH_PARAM_END            ((void*)0x00)
+
+/**
+ * Indicator that the next value in the  extra parameter to
+ * ::cuLaunchKernel will be a pointer to a buffer containing all kernel
+ * parameters used for launching kernel f.  This buffer needs to
+ * honor all alignment/padding requirements of the individual parameters.
+ * If ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the
+ *  extra array, then ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER will have no
+ * effect.
+ */
+#define GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
+
+/**
+ * Indicator that the next value in the  extra parameter to
+ * ::cuLaunchKernel will be a pointer to a size_t which contains the
+ * size of the buffer specified with ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER.
+ * It is required that ::GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER also be specified
+ * in the extra array if the value associated with
+ * ::GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
+ */
+#define GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE    ((void*)0x02)
+
 class Ptx {
   friend class gpu;
 
@@ -45,7 +74,7 @@
   static bool probe_linkage();
   static bool initialize_gpu();
   static void * generate_kernel(unsigned char *code, int code_len, const char *name);
-  static bool execute_kernel(address kernel);
+  static bool execute_kernel(address kernel, JavaCallArguments *);
   
 private:
   typedef int (*cuda_cu_init_func_t)(unsigned int);
--- a/src/share/vm/graal/graalCompilerToGPU.cpp	Sat Aug 10 10:08:56 2013 +0200
+++ b/src/share/vm/graal/graalCompilerToGPU.cpp	Sat Aug 10 14:38:30 2013 -0400
@@ -91,8 +91,11 @@
   jlong startValue = HotSpotInstalledCode::start(hotspotInstalledCode);
 
   // JavaCalls::call(&result, mh, &jca, CHECK_NULL);
-  tty->print_cr("executeExternalMethodVarargs: start: %x", (address)startValue);
-  gpu::execute_kernel((address)startValue);
+  // tty->print_cr("executeExternalMethodVarargs: start: %x", (address)startValue);
+
+  if (!gpu::execute_kernel((address)startValue, &jca)) {
+    return NULL;
+  }
 
   if (jap.get_ret_type() == T_VOID) {
     return NULL;
--- a/src/share/vm/runtime/gpu.hpp	Sat Aug 10 10:08:56 2013 +0200
+++ b/src/share/vm/runtime/gpu.hpp	Sat Aug 10 14:38:30 2013 -0400
@@ -43,7 +43,7 @@
   
   static void * generate_kernel(unsigned char *code, int code_len, const char *name);
 
-  static bool execute_kernel(address kernel);
+  static bool execute_kernel(address kernel, JavaCallArguments * jca);
 
   static void set_available(bool value) {
     _available = value;