changeset 11822:365d8f385fb5

PTX: one-dimensional parallel warp invocation, ParallelOver annotation
author Morris Meyer <morris.meyer@oracle.com>
date Sun, 29 Sep 2013 14:47:12 -0400
parents d8659ad83fcc
children 6440f50c1ea8
files graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/ArrayPTXTest.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/ParallelOver.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/ThreadDimension.java graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/Warp.java graal/com.oracle.graal.lir/src/com/oracle/graal/lir/Variable.java src/gpu/ptx/vm/gpu_ptx.cpp src/gpu/ptx/vm/gpu_ptx.hpp src/share/vm/graal/graalCompilerToGPU.cpp src/share/vm/runtime/gpu.cpp src/share/vm/runtime/gpu.hpp
diffstat 16 files changed, 268 insertions(+), 93 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Sat Sep 28 21:06:12 2013 -0400
+++ b/graal/com.oracle.graal.asm.ptx/src/com/oracle/graal/asm/ptx/PTXAssembler.java	Sun Sep 29 14:47:12 2013 -0400
@@ -288,7 +288,13 @@
         }
 
         public String emitVariable(Variable v) {
-            return (" %r" + v.index);
+            String name = v.getName();
+
+            if (name == null) {
+                return (" %r" + v.index);
+            } else {
+                return name;
+            }
         }
     }
 
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/ArrayPTXTest.java	Sat Sep 28 21:06:12 2013 -0400
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/ArrayPTXTest.java	Sun Sep 29 14:47:12 2013 -0400
@@ -22,9 +22,11 @@
  */
 package com.oracle.graal.compiler.ptx.test;
 
-import static com.oracle.graal.lir.ptx.Warp.ThreadDimension.*;
+import static com.oracle.graal.lir.ptx.ThreadDimension.*;
 
+import com.oracle.graal.lir.ptx.ParallelOver;
 import com.oracle.graal.lir.ptx.Warp;
+
 import java.lang.reflect.Method;
 import java.util.Arrays;
 import org.junit.Test;
@@ -33,81 +35,41 @@
 
     @Test
     public void testArray() {
-        int[] arrayI = {
-            1, 2, 3, 4, 5, 6, 7, 8, 9,
+        int[] array1 = {
+            1, 2, 3, 4, 5, 6, 7, 8, 9
+        };
+        int[] array2 = {
+            1, 2, 3, 4, 5, 6, 7, 8, 9
+        };
+        int[] array3 = {
+            1, 2, 3, 4, 5, 6, 7, 8, 9
         };
-        invoke(compile("testStoreArray1I"), arrayI, 2);
-        printReport("testStoreArray1I: " + Arrays.toString(arrayI));
-        // compile("testArray1J");
-        // compile("testArray1B");
-        // compile("testArray1S");
-        // compile("testArray1C");
-        // compile("testArray1F");
-        // compile("testArray1D");
-        // compile("testArray1L");
-        // compile("testStoreArray1I");
-        // compile("testStoreArray1J");
-        // compile("testStoreArray1B");
-        // compile("testStoreArray1S");
-        // compile("testStoreArray1F");
-        // compile("testStoreArray1D");
-    }
+
+        invoke(compile("testStoreArray1I"), array1, 2);
+        printReport("testStoreArray1I: " + Arrays.toString(array1));
 
-    public static int testArray1I(int[] array, int i) {
-        return array[i];
+        invoke(compile("testStoreArrayWarp0"), array2, 2);
+        printReport("testStoreArrayWarp0: " + Arrays.toString(array2));
+
+        invoke(compile("testStoreArrayWarp1I"), array3, 2);
+        printReport("testStoreArrayWarp1I: " + Arrays.toString(array3));
+
     }
 
-    public static long testArray1J(long[] array, int i) {
-        return array[i];
-    }
-
-    public static byte testArray1B(byte[] array, int i) {
-        return array[i];
-    }
-
-    public static short testArray1S(short[] array, int i) {
-        return array[i];
-    }
-
-    public static char testArray1C(char[] array, int i) {
-        return array[i];
-    }
-
-    public static float testArray1F(float[] array, int i) {
-        return array[i];
-    }
-
-    public static double testArray1D(double[] array, int i) {
-        return array[i];
-    }
-
-    public static Object testArray1L(Object[] array, int i) {
-        return array[i];
-    }
-
-    public static void testStoreArray1I(int[] array, @Warp(dimension = X) int i) {
+    public static void testStoreArray1I(int[] array, int i) {
         array[i] = 42;
     }
 
-    public static void testStoreArray1B(byte[] array, int i, byte val) {
-        array[i] = val;
-    }
-
-    public static void testStoreArray1S(short[] array, int i, short val) {
-        array[i] = val;
+    public static void testStoreArrayWarp0(int[] array,
+                                           @Warp(dimension = X) int i) {
+        array[i] = 42;
     }
 
-    public static void testStoreArray1J(long[] array, int i, long val) {
-        array[i] = val;
+    public static void testStoreArrayWarp1I(@ParallelOver(dimension = X) int[] array,
+                                            @Warp(dimension = X) int i) {
+        array[i] = 42;
     }
 
-    public static void testStoreArray1F(float[] array, int i, float val) {
-        array[i] = val;
-    }
-
-    public static void testStoreArray1D(double[] array, int i, double val) {
-        array[i] = val;
-    }
 
     public static void printReport(String message) {
         // CheckStyle: stop system..print check
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Sat Sep 28 21:06:12 2013 -0400
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Sun Sep 29 14:47:12 2013 -0400
@@ -32,11 +32,13 @@
 import com.oracle.graal.compiler.ptx.PTXBackend;
 import com.oracle.graal.compiler.test.GraalCompilerTest;
 import com.oracle.graal.debug.Debug;
+import com.oracle.graal.hotspot.meta.HotSpotNmethod;
 import com.oracle.graal.hotspot.meta.HotSpotRuntime;
 import com.oracle.graal.hotspot.meta.HotSpotResolvedJavaMethod;
 import com.oracle.graal.hotspot.ptx.PTXHotSpotRuntime;
 import com.oracle.graal.java.GraphBuilderConfiguration;
 import com.oracle.graal.java.GraphBuilderPhase;
+import com.oracle.graal.lir.ptx.ParallelOver;
 import com.oracle.graal.nodes.StructuredGraph;
 import com.oracle.graal.nodes.spi.GraalCodeCacheProvider;
 import com.oracle.graal.phases.OptimisticOptimizations;
@@ -44,6 +46,8 @@
 import com.oracle.graal.phases.PhasePlan.PhasePosition;
 import com.oracle.graal.phases.tiers.*;
 import com.oracle.graal.ptx.PTX;
+
+import java.lang.annotation.Annotation;
 import java.lang.reflect.Modifier;
 
 public abstract class PTXTestBase extends GraalCompilerTest {
@@ -102,8 +106,42 @@
             boolean isStatic = Modifier.isStatic(compiledMethod.getModifiers());
             Object[] executeArgs = argsWithReceiver((isStatic ? null : this), args);
             HotSpotRuntime hsr = (HotSpotRuntime) runtime;
-            InstalledCode installedCode = hsr.addExternalMethod(sg.method(), result, sg);
-            Object r = installedCode.executeVarargs(executeArgs);
+            InstalledCode installedCode = hsr.addExternalMethod(compiledMethod, result, sg);
+            Annotation[][] params = compiledMethod.getParameterAnnotations();
+
+            int dimensionX = 1;
+            int dimensionY = 1;
+            int dimensionZ = 1;
+
+            for (int p = 0; p < params.length; p++) {
+                Annotation[] annos = params[p];
+                if (annos != null) {
+                    for (int a = 0; a < annos.length; a++) {
+                        Annotation aa = annos[a];
+                        if (args[p] instanceof int[] && aa.annotationType().equals(ParallelOver.class)) {
+                            int[] iarray = (int[]) args[p];
+                            ParallelOver threadBlockDimension = (ParallelOver) aa;
+                            switch (threadBlockDimension.dimension()) {
+                                case X:
+                                    dimensionX = iarray.length;
+                                    break;
+                                case Y:
+                                    dimensionY = iarray.length;
+                                    break;
+                                case Z:
+                                    dimensionZ = iarray.length;
+                                    break;
+                            }
+                        }
+                    }
+                }
+            }
+            Object r;
+            if (dimensionX != 1 || dimensionY != 1 || dimensionZ != 1) {
+                r = ((HotSpotNmethod) installedCode).executeParallel(dimensionX, dimensionY, dimensionZ, executeArgs);
+            } else {
+                r = installedCode.executeVarargs(executeArgs);
+            }
             return r;
         } catch (Throwable th) {
             th.printStackTrace();
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Sat Sep 28 21:06:12 2013 -0400
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXLIRGenerator.java	Sun Sep 29 14:47:12 2013 -0400
@@ -123,7 +123,9 @@
             if (isRegister(value)) {
                 return asRegister(value).asValue(value.getKind().getStackKind());
             } else if (isStackSlot(value)) {
-                return StackSlot.get(value.getKind().getStackKind(), asStackSlot(value).getRawOffset(), asStackSlot(value).getRawAddFrameSize());
+                return StackSlot.get(value.getKind().getStackKind(),
+                                     asStackSlot(value).getRawOffset(),
+                                     asStackSlot(value).getRawAddFrameSize());
             } else {
                 throw GraalInternalError.shouldNotReachHere();
             }
@@ -164,14 +166,30 @@
                 }
             }
             if (warpAnnotation != null) {
-                // setResult(local, emitWarpParam(param.getKind(), warpAnnotation));
+                setResult(local, emitWarpParam(param.getKind(), warpAnnotation));
+            } else {
+                setResult(local, emitLoadParam(param.getKind(), param, null));
             }
-            setResult(local, emitLoadParam(param.getKind(), param, null));
         }
     }
 
-    public Variable emitWarpParam(Kind kind, @SuppressWarnings("unused") Warp annotation) {
+    public Variable emitWarpParam(Kind kind, Warp annotation) {
         Variable result = newVariable(kind);
+        Variable tid = newVariable(Kind.Char);
+
+        switch (annotation.dimension()) {
+            case X:
+                tid.setName("%tid.x");
+                break;
+            case Y:
+                tid.setName("%tid.y");
+                break;
+            case Z:
+                tid.setName("%tid.y");
+                break;
+        }
+        emitMove(result, tid);
+
         return result;
     }
 
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java	Sat Sep 28 21:06:12 2013 -0400
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java	Sun Sep 29 14:47:12 2013 -0400
@@ -55,4 +55,7 @@
     long generateKernel(byte[] code, String name) throws InvalidInstalledCodeException;
 
     Object executeExternalMethodVarargs(Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
+
+    Object executeParallelMethodVarargs(int dimX, int dimY, int dimZ,
+                                        Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
 }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java	Sat Sep 28 21:06:12 2013 -0400
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java	Sun Sep 29 14:47:12 2013 -0400
@@ -39,4 +39,7 @@
     public native boolean deviceDetach();
 
     public native Object executeExternalMethodVarargs(Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
+
+    public native Object executeParallelMethodVarargs(int dimX, int dimY, int dimZ,
+                                                      Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
 }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Sat Sep 28 21:06:12 2013 -0400
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Sun Sep 29 14:47:12 2013 -0400
@@ -123,6 +123,15 @@
         return true;
     }
 
+    public Object executeParallel(int dimX, int dimY, int dimZ, Object... args) throws InvalidInstalledCodeException {
+        assert checkArgs(args);
+
+        assert isExternal(); // for now
+
+        return graalRuntime().getCompilerToGPU().executeParallelMethodVarargs(dimX, dimY, dimZ, args, this);
+
+    }
+
     @Override
     public Object executeVarargs(Object... args) throws InvalidInstalledCodeException {
         assert checkArgs(args);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/ParallelOver.java	Sun Sep 29 14:47:12 2013 -0400
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.lir.ptx;
+
+import static com.oracle.graal.lir.ptx.ThreadDimension.*;
+
+import java.lang.annotation.*;
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.PARAMETER})
+public @interface ParallelOver {
+
+    String value() default "";
+
+    ThreadDimension dimension() default X;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/ThreadDimension.java	Sun Sep 29 14:47:12 2013 -0400
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.lir.ptx;
+
+public enum ThreadDimension {
+X,
+Y,
+Z
+}
+
--- a/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/Warp.java	Sat Sep 28 21:06:12 2013 -0400
+++ b/graal/com.oracle.graal.lir.ptx/src/com/oracle/graal/lir/ptx/Warp.java	Sun Sep 29 14:47:12 2013 -0400
@@ -22,20 +22,16 @@
  */
 package com.oracle.graal.lir.ptx;
 
-import static com.oracle.graal.lir.ptx.Warp.ThreadDimension.*;
+import static com.oracle.graal.lir.ptx.ThreadDimension.*;
 
 import java.lang.annotation.*;
 
 @Retention(RetentionPolicy.RUNTIME)
 @Target({ElementType.PARAMETER})
 public @interface Warp {
-    public enum ThreadDimension {
-        X,
-        Y,
-        Z
-    }
 
     String value() default "";
 
     ThreadDimension dimension() default X;
 }
+
--- a/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/Variable.java	Sat Sep 28 21:06:12 2013 -0400
+++ b/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/Variable.java	Sun Sep 29 14:47:12 2013 -0400
@@ -38,6 +38,8 @@
      */
     public final int index;
 
+    private String name;
+
     /**
      * Creates a new variable.
      * 
@@ -50,9 +52,21 @@
         this.index = index;
     }
 
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public String getName() {
+        return name;
+    }
+
     @Override
     public String toString() {
-        return "v" + index + getKindSuffix();
+        if (name != null) {
+            return name;
+        } else {
+            return "v" + index + getKindSuffix();
+        }
     }
 
     @Override
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Sat Sep 28 21:06:12 2013 -0400
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Sun Sep 29 14:47:12 2013 -0400
@@ -228,15 +228,20 @@
 }
 
 bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
+    return gpu::Ptx::execute_warp(1, 1, 1, kernel, ptxka, ret);
+}
+
+bool gpu::Ptx::execute_warp(int dimX, int dimY, int dimZ,
+                            address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
   // grid dimensionality
   unsigned int gridX = 1;
   unsigned int gridY = 1;
   unsigned int gridZ = 1;
 
   // thread dimensionality
-  unsigned int blockX = 1;
-  unsigned int blockY = 1;
-  unsigned int blockZ = 1;
+  unsigned int blockX = dimX;
+  unsigned int blockY = dimY;
+  unsigned int blockZ = dimZ;
 
   struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel;
 
@@ -264,7 +269,7 @@
   }
 
   if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Success: Kernel Launch");
+    tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", blockX, blockY, blockZ);
   }
 
   status = _cuda_cu_ctx_synchronize();
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Sat Sep 28 21:06:12 2013 -0400
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Sun Sep 29 14:47:12 2013 -0400
@@ -74,6 +74,7 @@
   static bool probe_linkage();
   static bool initialize_gpu();
   static void * generate_kernel(unsigned char *code, int code_len, const char *name);
+  static bool execute_warp(int dimX, int dimY, int dimZ, address kernel, PTXKernelArguments & ka, JavaValue &ret);
   static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret);
 public:
 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
--- a/src/share/vm/graal/graalCompilerToGPU.cpp	Sat Sep 28 21:06:12 2013 -0400
+++ b/src/share/vm/graal/graalCompilerToGPU.cpp	Sun Sep 29 14:47:12 2013 -0400
@@ -103,6 +103,45 @@
 
 C2V_END
 
+C2V_VMENTRY(jobject, executeParallelMethodVarargs, (JNIEnv *env,
+                                                          jobject,
+                                                          jint dimX, jint dimY, jint dimZ,
+                                                          jobject args, jobject hotspotInstalledCode))
+  ResourceMark rm;
+  HandleMark hm;
+
+  if (gpu::is_available() == false || gpu::has_gpu_linkage() == false && gpu::is_initialized()) {
+    tty->print_cr("executeExternalMethodVarargs - not available / no linkage / not initialized");
+    return NULL;
+  }
+  jlong nmethodValue = HotSpotInstalledCode::codeBlob(hotspotInstalledCode);
+  nmethod* nm = (nmethod*) (address) nmethodValue;
+  methodHandle mh = nm->method();
+  Symbol* signature = mh->signature();
+
+  // start value is the kernel
+  jlong startValue = HotSpotInstalledCode::codeStart(hotspotInstalledCode);
+
+  PTXKernelArguments ptxka(signature, (arrayOop) JNIHandles::resolve(args), mh->is_static());
+  JavaValue result(ptxka.get_ret_type());
+if (!gpu::execute_warp(dimX, dimY, dimZ, (address)startValue, ptxka, result)) {
+    return NULL;
+  }
+
+  if (ptxka.get_ret_type() == T_VOID) {
+    return NULL;
+  } else if (ptxka.get_ret_type() == T_OBJECT || ptxka.get_ret_type() == T_ARRAY) {
+    return JNIHandles::make_local((oop) result.get_jobject());
+  } else {
+    oop o = java_lang_boxing_object::create(ptxka.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL);
+    if (TraceGPUInteraction) {
+      tty->print_cr("GPU execution returned %d", result.get_jint());
+    }
+    return JNIHandles::make_local(o);
+  }
+
+C2V_END
+
 C2V_VMENTRY(jboolean, deviceInit, (JNIEnv *env, jobject))
   if (gpu::is_available() == false || gpu::has_gpu_linkage() == false) {
     tty->print_cr("deviceInit - not available / no linkage");
@@ -157,10 +196,11 @@
 #define GPUSPACE_METHOD       "J"
 
 JNINativeMethod CompilerToGPU_methods[] = {
-  {CC"generateKernel",                CC"([B" STRING ")"GPUSPACE_METHOD,        FN_PTR(generateKernel)},
-  {CC"deviceInit",                    CC"()Z",                                  FN_PTR(deviceInit)},
-  {CC"deviceDetach",                  CC"()Z",                                  FN_PTR(deviceDetach)},
-  {CC"executeExternalMethodVarargs",  CC"(["OBJECT HS_INSTALLED_CODE")"OBJECT,  FN_PTR(executeExternalMethodVarargs)},
+  {CC"generateKernel",                CC"([B" STRING ")"GPUSPACE_METHOD,          FN_PTR(generateKernel)},
+  {CC"deviceInit",                    CC"()Z",                                    FN_PTR(deviceInit)},
+  {CC"deviceDetach",                  CC"()Z",                                    FN_PTR(deviceDetach)},
+  {CC"executeExternalMethodVarargs",  CC"(["OBJECT HS_INSTALLED_CODE")"OBJECT,    FN_PTR(executeExternalMethodVarargs)},
+  {CC"executeParallelMethodVarargs",  CC"(III["OBJECT HS_INSTALLED_CODE")"OBJECT, FN_PTR(executeParallelMethodVarargs)},
 };
 
 int CompilerToGPU_methods_count() {
--- a/src/share/vm/runtime/gpu.cpp	Sat Sep 28 21:06:12 2013 -0400
+++ b/src/share/vm/runtime/gpu.cpp	Sun Sep 29 14:47:12 2013 -0400
@@ -61,12 +61,23 @@
 }
 
 bool gpu::execute_kernel(address kernel, PTXKernelArguments & ptxka, JavaValue& ret) {
-  if (gpu::has_gpu_linkage()) {
-    if (gpu::get_target_il_type() == gpu::PTX) {
-      return (gpu::Ptx::execute_kernel(kernel, ptxka, ret));
+    if (gpu::has_gpu_linkage()) {
+        if (gpu::get_target_il_type() == gpu::PTX) {
+            return (gpu::Ptx::execute_kernel(kernel, ptxka, ret));
+        }
+        // Add kernel execution functionality of other GPUs here
     }
-    // Add kernel execution functionality of other GPUs here
-  }
-  return false;
+    return false;
 }
 
+bool gpu::execute_warp(int dimX, int dimY, int dimZ,
+                       address kernel, PTXKernelArguments & ptxka, JavaValue& ret) {
+    if (gpu::has_gpu_linkage()) {
+        if (gpu::get_target_il_type() == gpu::PTX) {
+            return (gpu::Ptx::execute_warp(dimX, dimY, dimZ, kernel, ptxka, ret));
+        }
+        // Add kernel execution functionality of other GPUs here
+    }
+    return false;
+}
+
--- a/src/share/vm/runtime/gpu.hpp	Sat Sep 28 21:06:12 2013 -0400
+++ b/src/share/vm/runtime/gpu.hpp	Sun Sep 29 14:47:12 2013 -0400
@@ -46,6 +46,9 @@
   
   static void * generate_kernel(unsigned char *code, int code_len, const char *name);
 
+  static bool execute_warp(int dimX, int dimY, int dimZ,
+                           address kernel, PTXKernelArguments & ptxka, JavaValue & ret);
+
   static bool execute_kernel(address kernel, PTXKernelArguments & ptxka, JavaValue & ret);
 
   static void set_available(bool value) {