changeset 10577:9c7d9e2c8326

PTX kernel execution - no args or return value
author Morris Meyer <morris.meyer@oracle.com>
date Sat, 29 Jun 2013 21:29:34 -0400
parents aee899c96b0b
children de164e8e7c4d
files graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/ExternalCompilationResult.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/GraalCompilerTest.java graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/InfopointReasonTest.java graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalCompiler.java graal/com.oracle.graal.hotspot.test/src/com/oracle/graal/hotspot/test/AheadOfTimeCompilationTest.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/CompilationTask.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotRuntime.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/stubs/Stub.java graal/com.oracle.graal.java.decompiler.test/src/com/oracle/graal/java/decompiler/test/TestUtil.java graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/TruffleCompilerImpl.java src/gpu/ptx/gpu_ptx.cpp src/gpu/ptx/gpu_ptx.hpp src/share/vm/classfile/systemDictionary.hpp src/share/vm/classfile/vmSymbols.hpp src/share/vm/code/nmethod.hpp src/share/vm/graal/graalCompilerToGPU.cpp src/share/vm/graal/graalCompilerToVM.cpp src/share/vm/graal/graalEnv.cpp src/share/vm/graal/graalJavaAccess.hpp src/share/vm/runtime/gpu.hpp
diffstat 26 files changed, 218 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/ExternalCompilationResult.java	Sat Jun 29 21:29:34 2013 -0400
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.api.code;
+
+
+public class ExternalCompilationResult extends CompilationResult {
+  
+    private long kernel;
+  
+    public ExternalCompilationResult() {
+        super();
+    }
+  
+    public void setKernel(long k) {
+        kernel = k;
+    }
+  
+    public long getKernel() {
+      return kernel;
+    }
+}
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Sat Jun 29 21:29:34 2013 -0400
@@ -34,16 +34,16 @@
 
     @Test
     public void testAdd() {
-        compile("testAddConst1I");
+        compile("testConstI");
     }
 
-    @Ignore
-    public void testAddInvoke() {
-        invoke(compile("testAddConst1I"), new Integer(42));
+    @Test
+    public void testInvoke() {
+        invoke(compile("testConstI"));
     }
 
-    public int testAddConst1I(int a) {
-        return a + 1;
+    public int testConstI() {
+        return 42;
     }
 
     public static void main(String[] args) {
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Sat Jun 29 21:29:34 2013 -0400
@@ -32,6 +32,7 @@
 import com.oracle.graal.compiler.ptx.PTXBackend;
 import com.oracle.graal.compiler.test.GraalCompilerTest;
 import com.oracle.graal.debug.Debug;
+import com.oracle.graal.hotspot.meta.HotSpotRuntime;
 import com.oracle.graal.java.GraphBuilderConfiguration;
 import com.oracle.graal.java.GraphBuilderPhase;
 import com.oracle.graal.nodes.StructuredGraph;
@@ -63,9 +64,14 @@
          * GraalCompilerTest.suites variable contains the Suites for the HotSpotRuntime. This code
          * will not run on hotspot, so it should use the plain Graal default suites, without hotspot
          * specific phases.
+         *
+         * Ultimately we might want to have both the kernel and the code natively compiled for GPU fallback to CPU in cases
+         * of ECC failure on kernel invocation.  
          */
-        CompilationResult result = GraalCompiler.compileGraph(graph, cc, graph.method(), runtime, graalRuntime().getReplacements(), ptxBackend, target, null, phasePlan, OptimisticOptimizations.NONE,
-                        new SpeculationLog(), Suites.createDefaultSuites());
+        CompilationResult result = GraalCompiler.compileGraph(graph, cc, graph.method(), runtime,
+                                                              graalRuntime().getReplacements(), ptxBackend, target, null, phasePlan,
+                                                              OptimisticOptimizations.NONE, new SpeculationLog(),
+                                                              Suites.createDefaultSuites(), new ExternalCompilationResult());
         return result;
     }
 
@@ -76,11 +82,10 @@
     @SuppressWarnings("unused")
     protected void invoke(CompilationResult result, Object... args) {
         try {
-            // not quite yet - need multi-architecture Method changes from JDK-8013168
-            // Object[] executeArgs = argsWithReceiver(this, args);
-            // InstalledCode installedCode =
-            // runtime.addMethod(getStructuredGraph().method(), result);
-            // installedCode.executeVarargs(executeArgs);
+            Object[] executeArgs = argsWithReceiver(this, args);
+            HotSpotRuntime hsr = (HotSpotRuntime)runtime;
+            InstalledCode installedCode = hsr.addExternalMethod(sg.method(), result, sg);
+            installedCode.executeVarargs(executeArgs);
         } catch (Throwable th) {
             th.printStackTrace();
         }
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java	Sat Jun 29 21:29:34 2013 -0400
@@ -38,7 +38,8 @@
 
     // detach ??
 
-    public PTXTargetMethodAssembler(TargetDescription target, CodeCacheProvider runtime, FrameMap frameMap, AbstractAssembler asm, FrameContext frameContext, CompilationResult compilationResult) {
+    public PTXTargetMethodAssembler(TargetDescription target, CodeCacheProvider runtime, FrameMap frameMap,
+                                    AbstractAssembler asm, FrameContext frameContext, CompilationResult compilationResult) {
         super(target, runtime, frameMap, asm, frameContext, compilationResult);
     }
 
@@ -46,16 +47,17 @@
     public CompilationResult finishTargetMethod(StructuredGraph graph) {
         ResolvedJavaMethod method = graph.method();
         assert method != null : graph + " is not associated wth a method";
-        CompilationResult graalCompile = super.finishTargetMethod(graph);
+        ExternalCompilationResult graalCompile = (ExternalCompilationResult)super.finishTargetMethod(graph);
 
         try {
             if (validDevice) {
-                toGPU.generateKernel(graalCompile.getTargetCode(), method.getName());
+                long kernel = toGPU.generateKernel(graalCompile.getTargetCode(), method.getName());
+                graalCompile.setKernel(kernel);
             }
         } catch (Throwable th) {
             th.printStackTrace();
         }
 
-        return graalCompile;  // for now
+        return graalCompile;
     }
 }
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/GraalCompilerTest.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/GraalCompilerTest.java	Sat Jun 29 21:29:34 2013 -0400
@@ -442,7 +442,7 @@
                 editPhasePlan(method, graph, phasePlan);
                 CallingConvention cc = getCallingConvention(runtime, Type.JavaCallee, graph.method(), false);
                 final CompilationResult compResult = GraalCompiler.compileGraph(graph, cc, method, runtime, replacements, backend, runtime().getTarget(), null, phasePlan, OptimisticOptimizations.ALL,
-                                new SpeculationLog(), suites);
+                                new SpeculationLog(), suites, new CompilationResult());
                 if (printCompilation) {
                     TTY.println(String.format("@%-6d Graal %-70s %-45s %-50s | %4dms %5dB", id, "", "", "", System.currentTimeMillis() - start, compResult.getTargetCodeSize()));
                 }
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/InfopointReasonTest.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/InfopointReasonTest.java	Sat Jun 29 21:29:34 2013 -0400
@@ -60,7 +60,7 @@
         final StructuredGraph graph = parse(method);
         CallingConvention cc = getCallingConvention(runtime, Type.JavaCallee, graph.method(), false);
         final CompilationResult cr = GraalCompiler.compileGraph(graph, cc, graph.method(), runtime, replacements, backend, runtime.getTarget(), null, getDefaultPhasePlan(),
-                        OptimisticOptimizations.ALL, new SpeculationLog(), suites);
+                        OptimisticOptimizations.ALL, new SpeculationLog(), suites, new CompilationResult());
         for (Infopoint sp : cr.getInfopoints()) {
             assertNotNull(sp.reason);
             if (sp instanceof Call) {
@@ -82,7 +82,7 @@
         assertTrue(graphLineSPs > 0);
         CallingConvention cc = getCallingConvention(runtime, Type.JavaCallee, graph.method(), false);
         final CompilationResult cr = GraalCompiler.compileGraph(graph, cc, graph.method(), runtime, replacements, backend, runtime.getTarget(), null, getDefaultPhasePlan(true),
-                        OptimisticOptimizations.ALL, new SpeculationLog(), suites);
+                        OptimisticOptimizations.ALL, new SpeculationLog(), suites, new CompilationResult());
         int lineSPs = 0;
         for (Infopoint sp : cr.getInfopoints()) {
             assertNotNull(sp.reason);
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalCompiler.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalCompiler.java	Sat Jun 29 21:29:34 2013 -0400
@@ -72,10 +72,13 @@
      *            argument can be null.
      * @return the result of the compilation
      */
-    public static CompilationResult compileGraph(final StructuredGraph graph, final CallingConvention cc, final ResolvedJavaMethod installedCodeOwner, final GraalCodeCacheProvider runtime,
-                    final Replacements replacements, final Backend backend, final TargetDescription target, final GraphCache cache, final PhasePlan plan, final OptimisticOptimizations optimisticOpts,
-                    final SpeculationLog speculationLog, final Suites suites) {
-        final CompilationResult compilationResult = new CompilationResult();
+    public static CompilationResult compileGraph(final StructuredGraph graph, final CallingConvention cc,
+                                                 final ResolvedJavaMethod installedCodeOwner, final GraalCodeCacheProvider runtime,
+                                                 final Replacements replacements, final Backend backend,
+                                                 final TargetDescription target, final GraphCache cache,
+                                                 final PhasePlan plan, final OptimisticOptimizations optimisticOpts,
+                                                 final SpeculationLog speculationLog, final Suites suites,
+                                                 final CompilationResult compilationResult) {
         Debug.scope("GraalCompiler", new Object[]{graph, runtime}, new Runnable() {
 
             public void run() {
--- a/graal/com.oracle.graal.hotspot.test/src/com/oracle/graal/hotspot/test/AheadOfTimeCompilationTest.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.hotspot.test/src/com/oracle/graal/hotspot/test/AheadOfTimeCompilationTest.java	Sat Jun 29 21:29:34 2013 -0400
@@ -207,7 +207,7 @@
         // create suites everytime, as we modify options for the compiler
         final Suites suitesLocal = Graal.getRequiredCapability(SuitesProvider.class).createSuites();
         final CompilationResult compResult = GraalCompiler.compileGraph(graph, cc, method, runtime, replacements, backend, runtime().getTarget(), null, phasePlan, OptimisticOptimizations.ALL,
-                        new SpeculationLog(), suitesLocal);
+                        new SpeculationLog(), suitesLocal, new CompilationResult());
         addMethod(method, compResult);
 
         AOTCompilation.setValue(originalSetting);
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/CompilationTask.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/CompilationTask.java	Sat Jun 29 21:29:34 2013 -0400
@@ -161,7 +161,7 @@
                         HotSpotRuntime runtime = graalRuntime.getRuntime();
                         CallingConvention cc = getCallingConvention(runtime, Type.JavaCallee, graph.method(), false);
                         return GraalCompiler.compileGraph(graph, cc, method, runtime, replacements, graalRuntime.getBackend(), graalRuntime.getTarget(), graalRuntime.getCache(), plan, optimisticOpts,
-                                        method.getSpeculationLog(), suitesProvider.getDefaultSuites());
+                                        method.getSpeculationLog(), suitesProvider.getDefaultSuites(), new CompilationResult());
                     }
                 });
             } finally {
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java	Sat Jun 29 21:29:34 2013 -0400
@@ -24,6 +24,7 @@
 package com.oracle.graal.hotspot.bridge;
 
 import com.oracle.graal.api.code.InvalidInstalledCodeException;
+import com.oracle.graal.hotspot.meta.HotSpotInstalledCode;
 
 /**
  * Calls from Java into the GPU.
@@ -52,4 +53,6 @@
      * @return the value of the bound kernel in GPU space.
      */
     long generateKernel(byte[] code, String name) throws InvalidInstalledCodeException;
+
+    Object executeExternalMethodVarargs(Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
 }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java	Sat Jun 29 21:29:34 2013 -0400
@@ -24,6 +24,7 @@
 package com.oracle.graal.hotspot.bridge;
 
 import com.oracle.graal.api.code.InvalidInstalledCodeException;
+import com.oracle.graal.hotspot.meta.HotSpotInstalledCode;
 
 
 /**
@@ -37,4 +38,5 @@
 
     public native boolean deviceDetach();
 
+    public native Object executeExternalMethodVarargs(Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
 }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Sat Jun 29 21:29:34 2013 -0400
@@ -45,17 +45,30 @@
 
     private final HotSpotResolvedJavaMethod method;
     private final boolean isDefault;
+    private final boolean isExternal;
     private final Graph graph;
 
     public HotSpotNmethod(HotSpotResolvedJavaMethod method, boolean isDefault, Graph graph) {
         this.method = method;
         this.isDefault = isDefault;
+        this.isExternal = false;
+        this.graph = graph;
+    }
+
+    public HotSpotNmethod(HotSpotResolvedJavaMethod method, boolean isDefault, boolean isExternal, Graph graph) {
+        this.method = method;
+        this.isDefault = isDefault;
+        this.isExternal = isExternal;
         this.graph = graph;
     }
 
     public boolean isDefault() {
         return isDefault;
     }
+  
+    public boolean isExternal() {
+        return isExternal;
+    }
 
     public Graph getGraph() {
         return graph;
@@ -107,7 +120,11 @@
     @Override
     public Object executeVarargs(Object... args) throws InvalidInstalledCodeException {
         assert checkArgs(args);
-        return graalRuntime().getCompilerToVM().executeCompiledMethodVarargs(args, this);
+        if (isExternal()) {
+            return graalRuntime().getCompilerToGPU().executeExternalMethodVarargs(args, this);
+        } else {
+            return graalRuntime().getCompilerToVM().executeCompiledMethodVarargs(args, this);
+        }
     }
 
     @Override
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotRuntime.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotRuntime.java	Sat Jun 29 21:29:34 2013 -0400
@@ -1053,6 +1053,21 @@
         return code;
     }
 
+    public InstalledCode addExternalMethod(ResolvedJavaMethod method, CompilationResult compResult, Graph graph) {
+      
+        // compResult.getTargetCode() == assembled PTX method string
+      
+        HotSpotResolvedJavaMethod javaMethod = (HotSpotResolvedJavaMethod) method;
+        HotSpotInstalledCode icode = new HotSpotNmethod(javaMethod, false, true, graph);
+        HotSpotCompiledNmethod compiled = new HotSpotCompiledNmethod(javaMethod, -1, compResult);
+        CompilerToVM vm = graalRuntime.getCompilerToVM();
+        CodeInstallResult result = vm.installCode(compiled, icode, null);
+        if (result != CodeInstallResult.OK) {
+            return null;
+        }
+        return icode;
+    }
+
     @Override
     public int encodeDeoptActionAndReason(DeoptimizationAction action, DeoptimizationReason reason) {
         final int actionShift = 0;
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/stubs/Stub.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/stubs/Stub.java	Sat Jun 29 21:29:34 2013 -0400
@@ -156,7 +156,7 @@
                     phasePlan.addPhase(PhasePosition.AFTER_PARSING, graphBuilderPhase);
                     CallingConvention cc = linkage.getCallingConvention();
                     final CompilationResult compResult = GraalCompiler.compileGraph(graph, cc, getInstalledCodeOwner(), runtime, replacements, backend, runtime.getTarget(), null, phasePlan,
-                                    OptimisticOptimizations.ALL, new SpeculationLog(), runtime.getDefaultSuites());
+                                    OptimisticOptimizations.ALL, new SpeculationLog(), runtime.getDefaultSuites(), new CompilationResult());
 
                     assert destroyedRegisters != null;
                     code = Debug.scope("CodeInstall", new Callable<InstalledCode>() {
--- a/graal/com.oracle.graal.java.decompiler.test/src/com/oracle/graal/java/decompiler/test/TestUtil.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.java.decompiler.test/src/com/oracle/graal/java/decompiler/test/TestUtil.java	Sat Jun 29 21:29:34 2013 -0400
@@ -50,6 +50,6 @@
         phasePlan.addPhase(PhasePosition.AFTER_PARSING, graphBuilderPhase);
         CallingConvention cc = getCallingConvention(runtime, Type.JavaCallee, graph.method(), false);
         Backend backend = Graal.getRequiredCapability(Backend.class);
-        GraalCompiler.compileGraph(graph, cc, method, runtime, replacements, backend, runtime.getTarget(), null, phasePlan, OptimisticOptimizations.ALL, new SpeculationLog(), suites);
+        GraalCompiler.compileGraph(graph, cc, method, runtime, replacements, backend, runtime.getTarget(), null, phasePlan, OptimisticOptimizations.ALL, new SpeculationLog(), suites, new CompilationResult());
     }
 }
--- a/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/TruffleCompilerImpl.java	Sat Jun 29 11:40:52 2013 +0200
+++ b/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/TruffleCompilerImpl.java	Sat Jun 29 21:29:34 2013 -0400
@@ -142,7 +142,7 @@
             @Override
             public CompilationResult call() {
                 CallingConvention cc = getCallingConvention(runtime, Type.JavaCallee, graph.method(), false);
-                return GraalCompiler.compileGraph(graph, cc, graph.method(), runtime, replacements, backend, runtime.getTarget(), null, plan, OptimisticOptimizations.ALL, new SpeculationLog(), suites);
+                return GraalCompiler.compileGraph(graph, cc, graph.method(), runtime, replacements, backend, runtime.getTarget(), null, plan, OptimisticOptimizations.ALL, new SpeculationLog(), suites, new CompilationResult());
             }
         });
 
--- a/src/gpu/ptx/gpu_ptx.cpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/gpu/ptx/gpu_ptx.cpp	Sat Jun 29 21:29:34 2013 -0400
@@ -63,6 +63,14 @@
   }
 }
 
+bool gpu::execute_kernel(address kernel) {
+  if (gpu::has_gpu_linkage()) {
+    return (gpu::Ptx::execute_kernel(kernel));
+  } else {
+    return false;
+  }
+}
+
 #define __CUDA_API_VERSION 5000
 
 bool gpu::Ptx::initialize_gpu() {
@@ -139,6 +147,26 @@
   return cu_function;
 }
 
+bool gpu::Ptx::execute_kernel(address kernel) {
+  // grid dimensionality
+  unsigned int gridX = 1;
+  unsigned int gridY = 1;
+  unsigned int gridZ = 1;
+
+  // thread dimensionality
+  unsigned int blockX = 1;
+  unsigned int blockY = 1;
+  unsigned int blockZ = 1;
+  
+  int *cu_function = (int *)kernel;
+
+  int status = _cuda_cu_launch_kernel(cu_function,
+                                      gridX, gridY, gridZ,
+                                      blockX, blockY, blockZ,
+                                      0, NULL, NULL, NULL);
+  tty->print_cr("gpu_ptx::_cuda_cu_launch_kernel(%x): %d", kernel, status);
+  return status == 0;  // CUDA_SUCCESS
+}
 
 #ifdef __APPLE__
 bool gpu::Ptx::probe_linkage_apple() {
@@ -164,6 +192,8 @@
         CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction"));
     _cuda_cu_module_load_data_ex =
         CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx"));
+    _cuda_cu_launch_kernel =
+        CAST_TO_FN_PTR(cuda_cu_launch_kernel_func_t, dlsym(handle, "cuLaunchKernel"));
     return true;
   }
   return false;
--- a/src/gpu/ptx/gpu_ptx.hpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/gpu/ptx/gpu_ptx.hpp	Sat Jun 29 21:29:34 2013 -0400
@@ -35,6 +35,7 @@
 #endif
   static bool initialize_gpu();
   static void * generate_kernel(unsigned char *code, int code_len, const char *name);
+  static bool execute_kernel(address kernel);
   
 private:
   typedef int (*cuda_cu_init_func_t)(unsigned int, int);
@@ -45,7 +46,10 @@
   typedef int (*cuda_cu_device_get_name_func_t)(char *, int, int);
   typedef int (*cuda_cu_device_get_func_t)(int *, int);
   typedef int (*cuda_cu_device_compute_capability_func_t)(int *, int *, int);
-  typedef int (*cuda_cu_launch_kernel_func_t)(int *, int *, int);
+  typedef int (*cuda_cu_launch_kernel_func_t)(void *,
+                                              unsigned int, unsigned int, unsigned int,
+                                              unsigned int, unsigned int, unsigned int,
+                                              unsigned int, void *, void **, void **);
   typedef int (*cuda_cu_module_get_function_func_t)(void *, void *, const char *);
   typedef int (*cuda_cu_module_load_data_ex_func_t)(void *, void *, unsigned int, int *, void **);
 
--- a/src/share/vm/classfile/systemDictionary.hpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/share/vm/classfile/systemDictionary.hpp	Sat Jun 29 21:29:34 2013 -0400
@@ -220,6 +220,7 @@
   do_klass(CompilationResult_Mark_klass,          com_oracle_graal_api_code_CompilationResult_Mark,             Opt) \
   do_klass(CompilationResult_Infopoint_klass,     com_oracle_graal_api_code_CompilationResult_Infopoint,        Opt) \
   do_klass(CompilationResult_Site_klass,          com_oracle_graal_api_code_CompilationResult_Site,             Opt) \
+  do_klass(ExternalCompilationResult_klass,       com_oracle_graal_api_code_ExternalCompilationResult,          Opt) \
   do_klass(InfopointReason_klass,                 com_oracle_graal_api_code_InfopointReason,                    Opt) \
   do_klass(code_Register_klass,                   com_oracle_graal_api_code_Register,                           Opt) \
   do_klass(RegisterValue_klass,                   com_oracle_graal_api_code_RegisterValue,                      Opt) \
--- a/src/share/vm/classfile/vmSymbols.hpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/share/vm/classfile/vmSymbols.hpp	Sat Jun 29 21:29:34 2013 -0400
@@ -338,6 +338,7 @@
   template(com_oracle_graal_api_code_CompilationResult_Mark,         "com/oracle/graal/api/code/CompilationResult$Mark")              \
   template(com_oracle_graal_api_code_CompilationResult_Infopoint,    "com/oracle/graal/api/code/CompilationResult$Infopoint")         \
   template(com_oracle_graal_api_code_CompilationResult_Site,         "com/oracle/graal/api/code/CompilationResult$Site")              \
+  template(com_oracle_graal_api_code_ExternalCompilationResult,      "com/oracle/graal/api/code/ExternalCompilationResult")           \
   template(com_oracle_graal_api_code_InfopointReason,                "com/oracle/graal/api/code/InfopointReason")                     \
   template(com_oracle_graal_api_code_BytecodeFrame,                  "com/oracle/graal/api/code/BytecodeFrame")                       \
   template(com_oracle_graal_api_code_BytecodePosition,               "com/oracle/graal/api/code/BytecodePosition")                    \
--- a/src/share/vm/code/nmethod.hpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/share/vm/code/nmethod.hpp	Sat Jun 29 21:29:34 2013 -0400
@@ -185,6 +185,7 @@
   unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes?
   unsigned int _lazy_critical_native:1;      // Lazy JNI critical native
   unsigned int _has_wide_vectors:1;          // Preserve wide vectors at safepoints
+  unsigned int _external_method:1;           // Set for GPU methods
 
   // Protected by Patching_lock
   unsigned char _state;                      // {alive, not_entrant, zombie, unloaded}
@@ -462,6 +463,9 @@
   bool  is_speculatively_disconnected() const     { return _speculatively_disconnected; }
   void  set_speculatively_disconnected(bool z)    { _speculatively_disconnected = z; }
 
+  bool  is_external_method() const                { return _external_method; }
+  void  set_external_method(bool z)               { _external_method = z; }
+
   bool  is_lazy_critical_native() const           { return _lazy_critical_native; }
   void  set_lazy_critical_native(bool z)          { _lazy_critical_native = z; }
 
--- a/src/share/vm/graal/graalCompilerToGPU.cpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/share/vm/graal/graalCompilerToGPU.cpp	Sat Jun 29 21:29:34 2013 -0400
@@ -24,7 +24,9 @@
 #include "precompiled.hpp"
 
 #include "graal/graalCompiler.hpp"
+#include "graal/graalCompilerToVM.hpp"
 #include "graal/graalEnv.hpp"
+#include "graal/graalJavaAccess.hpp"
 #include "runtime/gpu.hpp"
 
 
@@ -53,12 +55,48 @@
   jint len = env->GetArrayLength(code);
   const char *namestr = env->GetStringUTFChars(name, &is_copy);
   void *kernel = gpu::generate_kernel((unsigned char *)bytes, len, namestr);
+  tty->print_cr("generateKernel: %x", kernel);
   env->ReleaseByteArrayElements(code, bytes, 0);
   env->ReleaseStringUTFChars(name, namestr);
 
   return (jlong)kernel;
 C2V_END
 
+C2V_VMENTRY(jobject, executeExternalMethodVarargs, (JNIEnv *env, jobject, jobject args, jobject hotspotInstalledCode))
+  ResourceMark rm;
+  HandleMark hm;
+
+  if (gpu::is_available() == false || gpu::has_gpu_linkage() == false && gpu::is_initialized()) {
+    tty->print_cr("executeExternalMethodVarargs - not available / no linkage / not initialized");
+    return NULL;
+  }
+  jlong nmethodValue = HotSpotInstalledCode::codeBlob(hotspotInstalledCode);
+  nmethod* nm = (nmethod*) (address) nmethodValue;
+  methodHandle mh = nm->method();
+  Symbol* signature = mh->signature();
+  JavaCallArguments jca(mh->size_of_parameters());
+
+  JavaArgumentUnboxer jap(signature, &jca, (arrayOop) JNIHandles::resolve(args), mh->is_static());
+  JavaValue result(jap.get_ret_type());
+  jca.set_alternative_target(nm);
+
+  // start value is the kernel
+  jlong startValue = HotSpotInstalledCode::start(hotspotInstalledCode);
+
+  // JavaCalls::call(&result, mh, &jca, CHECK_NULL);
+  tty->print_cr("executeExternalMethodVarargs: start: %x", (address)startValue);
+  gpu::execute_kernel((address)startValue);
+
+  if (jap.get_ret_type() == T_VOID) {
+    return NULL;
+  } else if (jap.get_ret_type() == T_OBJECT || jap.get_ret_type() == T_ARRAY) {
+    return JNIHandles::make_local((oop) result.get_jobject());
+  } else {
+    oop o = java_lang_boxing_object::create(jap.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL);
+    return JNIHandles::make_local(o);
+  }
+C2V_END
+
 C2V_VMENTRY(jboolean, deviceInit, (JNIEnv *env, jobject))
   if (gpu::is_available() == false || gpu::has_gpu_linkage() == false) {
     tty->print_cr("deviceInit - not available / no linkage");
@@ -113,9 +151,10 @@
 #define GPUSPACE_METHOD       "J"
 
 JNINativeMethod CompilerToGPU_methods[] = {
-  {CC"generateKernel", CC"([B" STRING ")"GPUSPACE_METHOD, FN_PTR(generateKernel)},
-  {CC"deviceInit",     CC"()Z",                           FN_PTR(deviceInit)},
-  {CC"deviceDetach",   CC"()Z",                           FN_PTR(deviceDetach)},
+  {CC"generateKernel",                CC"([B" STRING ")"GPUSPACE_METHOD,        FN_PTR(generateKernel)},
+  {CC"deviceInit",                    CC"()Z",                                  FN_PTR(deviceInit)},
+  {CC"deviceDetach",                  CC"()Z",                                  FN_PTR(deviceDetach)},
+  {CC"executeExternalMethodVarargs",  CC"(["OBJECT HS_INSTALLED_CODE")"OBJECT,  FN_PTR(executeExternalMethodVarargs)},
 };
 
 int CompilerToGPU_methods_count() {
--- a/src/share/vm/graal/graalCompilerToVM.cpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/share/vm/graal/graalCompilerToVM.cpp	Sat Jun 29 21:29:34 2013 -0400
@@ -969,7 +969,13 @@
     if (!installed_code_handle.is_null()) {
       assert(installed_code_handle->is_a(HotSpotInstalledCode::klass()), "wrong type");
       HotSpotInstalledCode::set_codeBlob(installed_code_handle, (jlong) cb);
-      HotSpotInstalledCode::set_start(installed_code_handle, (jlong) cb->code_begin());
+      oop comp_result = HotSpotCompiledCode::comp(compiled_code_handle);
+      if (comp_result->is_a(ExternalCompilationResult::klass())) {
+        tty->print_cr("installCode0: ExternalCompilationResult");
+        HotSpotInstalledCode::set_start(installed_code_handle, ExternalCompilationResult::kernel(comp_result));
+      } else {
+        HotSpotInstalledCode::set_start(installed_code_handle, (jlong) cb->code_begin());
+      }
       nmethod* nm = cb->as_nmethod_or_null();
       assert(nm == NULL || !installed_code_handle->is_scavengable() || nm->on_scavenge_root_list(), "nm should be scavengable if installed_code is scavengable");
     }
--- a/src/share/vm/graal/graalEnv.cpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/share/vm/graal/graalEnv.cpp	Sat Jun 29 21:29:34 2013 -0400
@@ -565,6 +565,10 @@
 
         }
       }
+      
+      if (HotSpotNmethod::isExternal(installed_code())) {
+        tty->print_cr("External method:%s", method()->name_and_sig_as_C_string());
+      }
     }
   }
   // JVMTI -- compiled method notification (must be done outside lock)
--- a/src/share/vm/graal/graalJavaAccess.hpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/share/vm/graal/graalJavaAccess.hpp	Sat Jun 29 21:29:34 2013 -0400
@@ -82,6 +82,7 @@
   end_class                                                                                                                                                    \
   start_class(HotSpotNmethod)                                                                                                                                  \
     boolean_field(HotSpotNmethod, isDefault)                                                                                                                   \
+    boolean_field(HotSpotNmethod, isExternal)                                                                                                                   \
   end_class                                                                                                                                                    \
   start_class(HotSpotCompiledCode)                                                                                                                             \
     oop_field(HotSpotCompiledCode, comp, "Lcom/oracle/graal/api/code/CompilationResult;")                                                                      \
@@ -110,6 +111,9 @@
     int_field(ExceptionHandler, catchTypeCPI)                                                                                                                  \
     oop_field(ExceptionHandler, catchType, "Lcom/oracle/graal/api/meta/JavaType;")                                                                             \
   end_class                                                                                                                                                    \
+  start_class(ExternalCompilationResult)                                                                                                                       \
+    long_field(ExternalCompilationResult, kernel)                                                                                                              \
+  end_class                                                                                                                                                    \
   start_class(CompilationResult)                                                                                                                               \
     int_field(CompilationResult, frameSize)                                                                                                                    \
     int_field(CompilationResult, customStackAreaOffset)                                                                                                        \
--- a/src/share/vm/runtime/gpu.hpp	Sat Jun 29 11:40:52 2013 +0200
+++ b/src/share/vm/runtime/gpu.hpp	Sat Jun 29 21:29:34 2013 -0400
@@ -43,6 +43,8 @@
   
   static void * generate_kernel(unsigned char *code, int code_len, const char *name);
 
+  static bool execute_kernel(address kernel);
+
   static void set_available(bool value) {
     _available = value;
   }