changeset 9430:147162b27799

GRAAL-234 - PTX code loading
author Morris Meyer <morris.meyer@oracle.com>
date Tue, 30 Apr 2013 08:17:55 -0400
parents aaf8798b0969
children c0488f573091 3ccda80d466b
files graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotGraalRuntime.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java make/bsd/makefiles/buildtree.make make/bsd/makefiles/launcher.make make/bsd/makefiles/vm.make make/linux/makefiles/buildtree.make make/linux/makefiles/vm.make mx/projects src/gpu/ptx/gpu_ptx.cpp src/gpu/ptx/gpu_ptx.hpp src/os/bsd/vm/gpu_bsd.cpp src/os/bsd/vm/gpu_bsd.hpp src/share/vm/graal/graalCompiler.cpp src/share/vm/graal/graalCompilerToGPU.cpp src/share/vm/graal/graalCompilerToGPU.hpp src/share/vm/runtime/globals.hpp src/share/vm/runtime/gpu.cpp src/share/vm/runtime/gpu.hpp src/share/vm/runtime/thread.cpp
diffstat 24 files changed, 913 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Tue Apr 30 13:00:09 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Tue Apr 30 08:17:55 2013 -0400
@@ -24,6 +24,7 @@
 
 import java.lang.reflect.Method;
 
+import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -36,7 +37,12 @@
         compile("testAddConst1I");
     }
 
-    public static int testAddConst1I(int a) {
+    @Ignore
+    public void testAddInvoke() {
+        invoke(compile("testAddConst1I"), new Integer(42));
+    }
+
+    public int testAddConst1I(int a) {
         return a + 1;
     }
 
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Tue Apr 30 13:00:09 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTestBase.java	Tue Apr 30 08:17:55 2013 -0400
@@ -24,33 +24,57 @@
 
 import static com.oracle.graal.hotspot.HotSpotGraalRuntime.*;
 
-import com.oracle.graal.api.code.*;
-import com.oracle.graal.api.runtime.*;
-import com.oracle.graal.compiler.*;
-import com.oracle.graal.compiler.ptx.*;
-import com.oracle.graal.compiler.test.*;
-import com.oracle.graal.debug.*;
-import com.oracle.graal.java.*;
-import com.oracle.graal.nodes.*;
-import com.oracle.graal.phases.*;
+import com.oracle.graal.api.code.CompilationResult;
+import com.oracle.graal.api.code.SpeculationLog;
+import com.oracle.graal.api.code.TargetDescription;
+import com.oracle.graal.api.runtime.Graal;
+import com.oracle.graal.compiler.GraalCompiler;
+import com.oracle.graal.compiler.ptx.PTXBackend;
+import com.oracle.graal.compiler.test.GraalCompilerTest;
+import com.oracle.graal.debug.Debug;
+import com.oracle.graal.java.GraphBuilderConfiguration;
+import com.oracle.graal.java.GraphBuilderPhase;
+import com.oracle.graal.nodes.StructuredGraph;
+import com.oracle.graal.nodes.spi.GraalCodeCacheProvider;
+import com.oracle.graal.phases.OptimisticOptimizations;
+import com.oracle.graal.phases.PhasePlan;
 import com.oracle.graal.phases.PhasePlan.PhasePosition;
-import com.oracle.graal.ptx.*;
+import com.oracle.graal.ptx.PTX;
 
 public abstract class PTXTestBase extends GraalCompilerTest {
 
+    private StructuredGraph sg;
+
     protected CompilationResult compile(String test) {
         StructuredGraph graph = parse(test);
+        sg = graph;
         Debug.dump(graph, "Graph");
         TargetDescription target = new TargetDescription(new PTX(), true, 1, 0, true);
-        PTXBackend ptxBackend = new PTXBackend(Graal.getRequiredCapability(CodeCacheProvider.class), target);
+        PTXBackend ptxBackend = new PTXBackend(Graal.getRequiredCapability(GraalCodeCacheProvider.class), target);
         PhasePlan phasePlan = new PhasePlan();
         GraphBuilderPhase graphBuilderPhase = new GraphBuilderPhase(runtime, GraphBuilderConfiguration.getDefault(), OptimisticOptimizations.NONE);
         phasePlan.addPhase(PhasePosition.AFTER_PARSING, graphBuilderPhase);
         phasePlan.addPhase(PhasePosition.AFTER_PARSING, new PTXPhase());
         new PTXPhase().apply(graph);
-        CompilationResult result = GraalCompiler.compileMethod(runtime, graalRuntime().getReplacements(), ptxBackend, target, graph.method(), graph, null, phasePlan, OptimisticOptimizations.NONE,
-                        new SpeculationLog());
+        CompilationResult result = GraalCompiler.compileMethod(runtime, graalRuntime().getReplacements(),
+                                                               ptxBackend, target, graph.method(), graph, null, phasePlan,
+                                                               OptimisticOptimizations.NONE, new SpeculationLog());
         return result;
     }
 
+    protected StructuredGraph getStructuredGraph() {
+        return sg;
+    }
+
+    @SuppressWarnings("unused")
+    protected void invoke(CompilationResult result, Object... args) {
+        try {
+            // not quite yet - need multi-architecture Method changes from JDK-8013168
+            // Object[] executeArgs = argsWithReceiver(this, args);
+            // InstalledCode installedCode = runtime.addMethod(getStructuredGraph().method(), result);
+            // installedCode.executeVarargs(executeArgs);
+        } catch (Throwable th) {
+            th.printStackTrace();
+        }
+    }
 }
--- a/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Tue Apr 30 13:00:09 2013 +0200
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXBackend.java	Tue Apr 30 08:17:55 2013 -0400
@@ -50,9 +50,6 @@
 
         @Override
         public void enter(TargetMethodAssembler tasm) {
-            Buffer codeBuffer = tasm.asm.codeBuffer;
-            codeBuffer.emitString(".version 1.4");
-            codeBuffer.emitString(".target sm_10");
             // codeBuffer.emitString(".address_size 32"); // PTX ISA version 2.3
         }
 
@@ -71,7 +68,7 @@
         FrameMap frameMap = lirGen.frameMap;
         AbstractAssembler masm = new PTXAssembler(target, frameMap.registerConfig);
         HotSpotFrameContext frameContext = new HotSpotFrameContext();
-        TargetMethodAssembler tasm = new TargetMethodAssembler(target, runtime(), frameMap, masm, frameContext, compilationResult);
+        TargetMethodAssembler tasm = new PTXTargetMethodAssembler(target, runtime(), frameMap, masm, frameContext, compilationResult);
         tasm.setFrameSize(frameMap.frameSize());
         return tasm;
     }
@@ -81,6 +78,8 @@
         // Emit the prologue
         final String name = method.getName();
         Buffer codeBuffer = tasm.asm.codeBuffer;
+        codeBuffer.emitString(".version 1.4");
+        codeBuffer.emitString(".target sm_10");
         codeBuffer.emitString0(".entry " + name + " (");
         codeBuffer.emitString("");
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.compiler.ptx/src/com/oracle/graal/compiler/ptx/PTXTargetMethodAssembler.java	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.compiler.ptx;
+
+import com.oracle.graal.api.code.CodeCacheProvider;
+import com.oracle.graal.api.code.CompilationResult;
+import com.oracle.graal.api.code.TargetDescription;
+import com.oracle.graal.asm.AbstractAssembler;
+import com.oracle.graal.hotspot.HotSpotGraalRuntime;
+import com.oracle.graal.hotspot.bridge.CompilerToGPU;
+import com.oracle.graal.hotspot.meta.HotSpotMethod;
+import com.oracle.graal.lir.FrameMap;
+import com.oracle.graal.lir.asm.FrameContext;
+import com.oracle.graal.lir.asm.TargetMethodAssembler;
+
+public class PTXTargetMethodAssembler extends TargetMethodAssembler {
+
+    private static CompilerToGPU toGPU = HotSpotGraalRuntime.graalRuntime().getCompilerToGPU();
+    private static boolean validDevice = toGPU.deviceInit();
+
+    // detach ??
+
+    public PTXTargetMethodAssembler(TargetDescription target,
+                                    CodeCacheProvider runtime, FrameMap frameMap,
+                                    AbstractAssembler asm, FrameContext frameContext,
+                                    CompilationResult compilationResult) {
+        super(target, runtime, frameMap, asm, frameContext, compilationResult);
+    }
+
+    @Override
+    public CompilationResult finishTargetMethod(Object name, boolean isStub) {
+        CompilationResult graalCompile = super.finishTargetMethod(name, isStub);
+
+        try {
+            if (validDevice) {
+                HotSpotMethod method = (HotSpotMethod) name;
+                toGPU.generateKernel(graalCompile.getTargetCode(), method.getName());
+            }
+        } catch (Throwable th) {
+            th.printStackTrace();
+        }
+
+        return graalCompile;  // for now
+    }
+}
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotGraalRuntime.java	Tue Apr 30 13:00:09 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotGraalRuntime.java	Tue Apr 30 08:17:55 2013 -0400
@@ -123,8 +123,9 @@
         return unsafe.getInt(object, offset);
     }
 
-    protected/* final */CompilerToVM compilerToVm;
-    protected/* final */VMToCompiler vmToCompiler;
+    protected/* final */CompilerToVM  compilerToVm;
+    protected/* final */CompilerToGPU compilerToGpu;
+    protected/* final */VMToCompiler  vmToCompiler;
 
     protected final HotSpotRuntime runtime;
     protected final TargetDescription target;
@@ -137,12 +138,14 @@
     private final HotSpotBackend backend;
 
     protected HotSpotGraalRuntime() {
-        CompilerToVM toVM = new CompilerToVMImpl();
+        CompilerToVM  toVM  = new CompilerToVMImpl();
+        CompilerToGPU toGPU = new CompilerToGPUImpl();
 
         // initialize VmToCompiler
         VMToCompiler toCompiler = new VMToCompilerImpl(this);
 
-        compilerToVm = toVM;
+        compilerToVm  = toVM;
+        compilerToGpu = toGPU;
         vmToCompiler = toCompiler;
         config = new HotSpotVMConfig();
         compilerToVm.initializeConfiguration(config);
@@ -224,6 +227,10 @@
         return vmToCompiler;
     }
 
+    public CompilerToGPU getCompilerToGPU() {
+        return compilerToGpu;
+    }
+
     public JavaType lookupType(String name, HotSpotResolvedObjectType accessingClass, boolean eagerResolve) {
         if (name.length() == 1 && vmToCompiler instanceof VMToCompilerImpl) {
             VMToCompilerImpl impl = (VMToCompilerImpl) vmToCompiler;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package com.oracle.graal.hotspot.bridge;
+
+import com.oracle.graal.api.code.InvalidInstalledCodeException;
+
+/**
+ * Calls from Java into the GPU.
+ */
+public interface CompilerToGPU {
+
+    /**
+     * Attempts to initialize and create a valid context with the GPU.
+     * 
+     * @return whether the GPU context has been initialized and is valid.
+     */
+    boolean deviceInit();
+
+    /**
+     * Attempts to detach from a valid GPU context.
+     * 
+     * @return whether the GPU context has been properly disposed.
+     */
+    boolean deviceDetach();
+
+    /**
+     * Attempts to generate and return a bound function to the
+     * loaded method kernel on the GPU.
+     * 
+     * @param code the text or binary values for a method kernel
+     * @return the value of the bound kernel in GPU space.
+     */
+    long generateKernel(byte[] code, String name) throws InvalidInstalledCodeException;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package com.oracle.graal.hotspot.bridge;
+
+import com.oracle.graal.api.code.InvalidInstalledCodeException;
+
+
+/**
+ * Entries into the HotSpot GPU interface from Java code.
+ */
+public class CompilerToGPUImpl implements CompilerToGPU {
+
+    public native boolean deviceInit();
+
+    public native long generateKernel(byte[] code, String name) throws InvalidInstalledCodeException;
+
+    public native boolean deviceDetach();
+
+}
--- a/make/bsd/makefiles/buildtree.make	Tue Apr 30 13:00:09 2013 +0200
+++ b/make/bsd/makefiles/buildtree.make	Tue Apr 30 08:17:55 2013 -0400
@@ -243,7 +243,9 @@
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
-	echo "$(call gamma-path,commonsrc,os/posix/vm)"; \
+	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
+	echo "$(call gamma-path,altsrc,gpu/ptx) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu/ptx)"; \
 	echo; \
 	echo "Src_Dirs_I = \\"; \
 	echo "$(call gamma-path,altsrc,share/vm/prims) \\"; \
@@ -259,7 +261,9 @@
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
-	echo "$(call gamma-path,commonsrc,os/posix/vm)"; \
+	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
+	echo "$(call gamma-path,altsrc,gpu) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu)"; \
 	[ -n "$(CFLAGS_BROWSE)" ] && \
 	    echo && echo "CFLAGS_BROWSE = $(CFLAGS_BROWSE)"; \
 	[ -n "$(HOTSPOT_EXTRA_SYSDEFS)" ] && \
--- a/make/bsd/makefiles/launcher.make	Tue Apr 30 13:00:09 2013 +0200
+++ b/make/bsd/makefiles/launcher.make	Tue Apr 30 08:17:55 2013 -0400
@@ -67,7 +67,7 @@
   # framework libraries.
 
   ifeq ($(OS_VENDOR),Darwin)
-    LFLAGS_LAUNCHER         += -framework CoreFoundation
+    LFLAGS_LAUNCHER         += -framework CoreFoundation -framework ApplicationServices
   endif
 
   LIBS_LAUNCHER             += -l$(JVM) $(LIBS)
--- a/make/bsd/makefiles/vm.make	Tue Apr 30 13:00:09 2013 +0200
+++ b/make/bsd/makefiles/vm.make	Tue Apr 30 08:17:55 2013 -0400
@@ -128,6 +128,10 @@
 
 LIBS += -lm -pthread
 
+ifeq ($(OS_VENDOR),Darwin)
+  LIBS         += -framework ApplicationServices -framework IOKit
+endif
+
 # By default, link the *.o into the library, not the executable.
 LINK_INTO$(LINK_INTO) = LIBJVM
 
@@ -157,6 +161,7 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os/posix/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
+SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx
 
 ifndef JAVASE_EMBEDDED
 SOURCE_PATHS+=$(shell if [ -d $(HS_ALT_SRC)/share/vm/jfr ]; then \
@@ -179,7 +184,9 @@
 SHARK_PATHS := $(GAMMADIR)/src/share/vm/shark
 
 GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/graal)
+GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx)
 GRAAL_PATHS += $(HS_COMMON_SRC)/share/vm/graal
+GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx
 
 # Include dirs per type.
 Src_Dirs/CORE      := $(CORE_PATHS)
--- a/make/linux/makefiles/buildtree.make	Tue Apr 30 13:00:09 2013 +0200
+++ b/make/linux/makefiles/buildtree.make	Tue Apr 30 08:17:55 2013 -0400
@@ -238,7 +238,9 @@
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
-	echo "$(call gamma-path,commonsrc,os/posix/vm)"; \
+	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
+	echo "$(call gamma-path,altsrc,gpu/ptx) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu/ptx)"; \
 	echo; \
 	echo "Src_Dirs_I = \\"; \
 	echo "$(call gamma-path,altsrc,share/vm/prims) \\"; \
@@ -253,8 +255,9 @@
 	echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
-	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
-	echo "$(call gamma-path,commonsrc,os/posix/vm)"; \
+	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
+	echo "$(call gamma-path,altsrc,gpu) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu)"; \
 	[ -n "$(CFLAGS_BROWSE)" ] && \
 	    echo && echo "CFLAGS_BROWSE = $(CFLAGS_BROWSE)"; \
 	[ -n "$(ENABLE_FULL_DEBUG_SYMBOLS)" ] && \
--- a/make/linux/makefiles/vm.make	Tue Apr 30 13:00:09 2013 +0200
+++ b/make/linux/makefiles/vm.make	Tue Apr 30 08:17:55 2013 -0400
@@ -157,6 +157,7 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os/posix/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
+SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx
 
 ifndef JAVASE_EMBEDDED 
 ifneq (${ARCH},arm)
@@ -181,7 +182,9 @@
 SHARK_PATHS := $(GAMMADIR)/src/share/vm/shark
 
 GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/graal)
+GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx)
 GRAAL_PATHS += $(HS_COMMON_SRC)/share/vm/graal
+GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx
 
 # Include dirs per type.
 Src_Dirs/CORE      := $(CORE_PATHS)
--- a/mx/projects	Tue Apr 30 13:00:09 2013 +0200
+++ b/mx/projects	Tue Apr 30 08:17:55 2013 -0400
@@ -295,7 +295,7 @@
 # graal.compiler.ptx
 project@com.oracle.graal.compiler.ptx@subDir=graal
 project@com.oracle.graal.compiler.ptx@sourceDirs=src
-project@com.oracle.graal.compiler.ptx@dependencies=com.oracle.graal.compiler,com.oracle.graal.lir.ptx
+project@com.oracle.graal.compiler.ptx@dependencies=com.oracle.graal.lir.ptx,com.oracle.graal.hotspot
 project@com.oracle.graal.compiler.ptx@checkstyle=com.oracle.graal.graph
 project@com.oracle.graal.compiler.ptx@javaCompliance=1.7
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/ptx/gpu_ptx.cpp	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+
+#include "runtime/gpu.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/ostream.hpp"
+
+void * gpu::Ptx::_device_context;
+
+gpu::Ptx::cuda_cu_init_func_t gpu::Ptx::_cuda_cu_init;
+gpu::Ptx::cuda_cu_ctx_create_func_t gpu::Ptx::_cuda_cu_ctx_create;
+gpu::Ptx::cuda_cu_ctx_detach_func_t gpu::Ptx::_cuda_cu_ctx_detach;
+gpu::Ptx::cuda_cu_ctx_synchronize_func_t gpu::Ptx::_cuda_cu_ctx_synchronize;
+gpu::Ptx::cuda_cu_device_get_count_func_t gpu::Ptx::_cuda_cu_device_get_count;
+gpu::Ptx::cuda_cu_device_get_name_func_t gpu::Ptx::_cuda_cu_device_get_name;
+gpu::Ptx::cuda_cu_device_get_func_t gpu::Ptx::_cuda_cu_device_get;
+gpu::Ptx::cuda_cu_device_compute_capability_func_t gpu::Ptx::_cuda_cu_device_compute_capability;
+gpu::Ptx::cuda_cu_launch_kernel_func_t gpu::Ptx::_cuda_cu_launch_kernel;
+gpu::Ptx::cuda_cu_module_get_function_func_t gpu::Ptx::_cuda_cu_module_get_function;
+gpu::Ptx::cuda_cu_module_load_data_ex_func_t gpu::Ptx::_cuda_cu_module_load_data_ex;
+
+void gpu::probe_linkage() {
+#ifdef __APPLE__
+  set_gpu_linkage(gpu::Ptx::probe_linkage_apple());
+#else
+  set_gpu_linkage(false);
+#endif
+}
+
+void gpu::initialize_gpu() {
+  if (gpu::has_gpu_linkage()) {
+    set_initialized(gpu::Ptx::initialize_gpu());
+  }
+}
+
+void gpu::generate_kernel(unsigned char *code, int code_len, const char *name) {
+  if (gpu::has_gpu_linkage()) {
+    gpu::Ptx::generate_kernel(code, code_len, name);
+  }
+}
+
+#define __CUDA_API_VERSION 5000
+
+bool gpu::Ptx::initialize_gpu() {
+  int status = _cuda_cu_init(0, __CUDA_API_VERSION);
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_ptx::_cuda_cu_init: %d", status);
+  }
+
+  int device_count = 0;
+  status = _cuda_cu_device_get_count(&device_count);
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_ptx::_cuda_cu_device_get_count(%d): %d", device_count, status);
+  }
+
+  int device_id = 0, cu_device = 0;
+  status = _cuda_cu_device_get(&cu_device, device_id);
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_ptx::_cuda_cu_device_get(%d): %d", cu_device, status);
+  }
+
+  int major, minor;
+  status = _cuda_cu_device_compute_capability(&major, &minor, cu_device);
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_ptx::_cuda_cu_device_compute_capability(major %d, minor %d): %d",
+                  major, minor, status);
+  }
+
+  char device_name[256];
+  status = _cuda_cu_device_get_name(device_name, 256, cu_device);
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_ptx::_cuda_cu_device_get_name(%s): %d", device_name, status);
+  }
+
+  status = _cuda_cu_ctx_create(&_device_context, 0, cu_device);
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_ptx::_cuda_cu_ctx_create(%x): %d", _device_context, status);
+  }
+
+  return status == 0;  // CUDA_SUCCESS
+}
+
+void gpu::Ptx::generate_kernel(unsigned char *code, int code_len, const char *name) {
+
+  void *cu_module;
+  const unsigned int jit_num_options = 3;
+  int *jit_options = new int[jit_num_options];
+  void **jit_option_values = new void *[jit_num_options];
+
+  jit_options[0] = 4; // CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
+  int jit_log_buffer_size = 1024;
+  jit_option_values[0] = (void *)(size_t)jit_log_buffer_size;
+
+  jit_options[1] = 3; // CU_JIT_INFO_LOG_BUFFER
+  char *jit_log_buffer = new char[jit_log_buffer_size];
+  jit_option_values[1] = jit_log_buffer;
+
+  jit_options[2] = 0; // CU_JIT_MAX_REGISTERS
+  int jit_register_count = 32;
+  jit_option_values[2] = (void *)(size_t)jit_register_count;
+  
+  int status = _cuda_cu_module_load_data_ex(&cu_module, code,
+                                            jit_num_options, jit_options, (void **)jit_option_values);
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_ptx::_cuda_cu_module_load_data_ex(%x): %d", cu_module, status);
+    tty->print_cr("gpu_ptx::jit_log_buffer\n%s", jit_log_buffer);
+  }
+
+  void *cu_function;
+
+  status = _cuda_cu_module_get_function(&cu_function, cu_module, name);
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_ptx::_cuda_cu_module_get_function(%s):%x %d", name, cu_function, status);
+  }
+}
+
+
+#ifdef __APPLE__
+bool gpu::Ptx::probe_linkage_apple() {
+  void *handle = dlopen("/usr/local/cuda/lib/libcuda.dylib", RTLD_LAZY);
+  if (handle != NULL) {
+    _cuda_cu_init =
+        CAST_TO_FN_PTR(cuda_cu_init_func_t, dlsym(handle, "cuInit"));
+    _cuda_cu_ctx_create =
+        CAST_TO_FN_PTR(cuda_cu_ctx_create_func_t, dlsym(handle, "cuCtxCreate"));
+    _cuda_cu_ctx_detach =
+        CAST_TO_FN_PTR(cuda_cu_ctx_detach_func_t, dlsym(handle, "cuCtxDetach"));
+    _cuda_cu_ctx_synchronize =
+        CAST_TO_FN_PTR(cuda_cu_ctx_synchronize_func_t, dlsym(handle, "cuCtxSynchronize"));
+    _cuda_cu_device_get_count =
+        CAST_TO_FN_PTR(cuda_cu_device_get_count_func_t, dlsym(handle, "cuDeviceGetCount"));
+    _cuda_cu_device_get_name =
+        CAST_TO_FN_PTR(cuda_cu_device_get_name_func_t, dlsym(handle, "cuDeviceGetName"));
+    _cuda_cu_device_get =
+        CAST_TO_FN_PTR(cuda_cu_device_get_func_t, dlsym(handle, "cuDeviceGet"));
+    _cuda_cu_device_compute_capability =
+        CAST_TO_FN_PTR(cuda_cu_device_compute_capability_func_t, dlsym(handle, "cuDeviceComputeCapability"));
+    _cuda_cu_module_get_function =
+        CAST_TO_FN_PTR(cuda_cu_module_get_function_func_t, dlsym(handle, "cuModuleGetFunction"));
+    _cuda_cu_module_load_data_ex =
+        CAST_TO_FN_PTR(cuda_cu_module_load_data_ex_func_t, dlsym(handle, "cuModuleLoadDataEx"));
+    return true;
+  }
+  return false;
+}
+#endif
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/ptx/gpu_ptx.hpp	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef GPU_PTX_HPP
+#define GPU_PTX_HPP
+
+class Ptx {
+  friend class gpu;
+
+ protected:
+  static void probe_linkage();
+#ifdef __APPLE__
+  static bool probe_linkage_apple();
+#endif
+  static bool initialize_gpu();
+  static void generate_kernel(unsigned char *code, int code_len, const char *name);
+  
+private:
+  typedef int (*cuda_cu_init_func_t)(unsigned int, int);
+  typedef int (*cuda_cu_ctx_create_func_t)(void *, int, int);
+  typedef int (*cuda_cu_ctx_detach_func_t)(int *);
+  typedef int (*cuda_cu_ctx_synchronize_func_t)(int *);
+  typedef int (*cuda_cu_device_get_count_func_t)(int *);
+  typedef int (*cuda_cu_device_get_name_func_t)(char *, int, int);
+  typedef int (*cuda_cu_device_get_func_t)(int *, int);
+  typedef int (*cuda_cu_device_compute_capability_func_t)(int *, int *, int);
+  typedef int (*cuda_cu_launch_kernel_func_t)(int *, int *, int);
+  typedef int (*cuda_cu_module_get_function_func_t)(void *, void *, const char *);
+  typedef int (*cuda_cu_module_load_data_ex_func_t)(void *, void *, unsigned int, int *, void **);
+
+  static cuda_cu_init_func_t                      _cuda_cu_init;
+  static cuda_cu_ctx_create_func_t                _cuda_cu_ctx_create;
+  static cuda_cu_ctx_detach_func_t                _cuda_cu_ctx_detach;
+  static cuda_cu_ctx_synchronize_func_t           _cuda_cu_ctx_synchronize;
+  static cuda_cu_device_get_count_func_t          _cuda_cu_device_get_count;
+  static cuda_cu_device_get_name_func_t           _cuda_cu_device_get_name;
+  static cuda_cu_device_get_func_t                _cuda_cu_device_get;
+  static cuda_cu_device_compute_capability_func_t _cuda_cu_device_compute_capability;
+  static cuda_cu_launch_kernel_func_t             _cuda_cu_launch_kernel;
+  static cuda_cu_module_get_function_func_t       _cuda_cu_module_get_function;
+  static cuda_cu_module_load_data_ex_func_t       _cuda_cu_module_load_data_ex;
+
+protected:
+  static void * _device_context;
+};
+
+#endif // GPU_PTX_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os/bsd/vm/gpu_bsd.cpp	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "runtime/gpu.hpp"
+#include "utilities/ostream.hpp"
+
+#ifdef __APPLE__
+#include <CoreGraphics/CoreGraphics.h>
+#include <IOKit/IOKitLib.h>
+#endif
+
+void gpu::probe_gpu() {
+#ifdef __APPLE__
+  set_available(gpu::Bsd::probe_gpu_apple());
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_bsd::probe_gpu(APPLE): %d", gpu::is_available());
+  }
+#else
+  if (TraceWarpLoading) {
+    tty->print_cr("gpu_bsd::probe_gpu(not APPLE)");
+  }
+  set_available(false);
+#endif
+}
+
+#ifdef __APPLE__
+/*
+ * This is rudimentary at best, but until we decide on a CUDA Compiler Compatibility
+ * level, this will have to suffice.
+ */
+bool gpu::Bsd::probe_gpu_apple() {
+  CGError             err = CGDisplayNoErr;
+  CGDisplayCount      displayCount = 0;
+  CFDataRef           vendorID, deviceID, model;
+  CGDirectDisplayID   *displays;
+  IOOptionBits        options = kIORegistryIterateRecursively | kIORegistryIterateParents;
+  io_registry_entry_t displayPort;
+
+  err = CGGetActiveDisplayList(0, NULL, &displayCount);
+  displays = (CGDirectDisplayID *)calloc((size_t)displayCount, sizeof(CGDirectDisplayID));
+  err = CGGetActiveDisplayList(displayCount, displays, &displayCount);
+
+  for (CGDisplayCount i = 0; i < displayCount; i++) {
+	displayPort = CGDisplayIOServicePort(displays[i]);
+	vendorID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("vendor-id"),
+                                               kCFAllocatorDefault, options);
+	deviceID = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("device-id"),
+                                               kCFAllocatorDefault, options);
+	model = (CFDataRef)IORegistryEntrySearchCFProperty(displayPort, kIOServicePlane, CFSTR("model"),
+                                            kCFAllocatorDefault, options);
+    if (TraceWarpLoading) {
+      tty->print_cr("vendor: 0x%08X", *((UInt32*)CFDataGetBytePtr(vendorID)));
+      tty->print_cr("device: 0x%08X", *((UInt32*)CFDataGetBytePtr(deviceID)));
+      tty->print_cr("model: %s", CFDataGetBytePtr(model));
+    }
+    UInt32 vendor = *((UInt32*)CFDataGetBytePtr(vendorID));
+    if (vendor != 0x10DE) {
+      return false;
+    } else {
+      /*
+       * see https://developer.nvidia.com/cuda-gpus
+       * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs
+       * see http://www.pcidatabase.com/reports.php?type=csv
+       *
+       * Only supporting GK104, GK106, GK107 and GK110 GPUs for now,
+       * which is CUDA Computer Capability 3.0 and greater.
+       */
+      switch (*((UInt32*)CFDataGetBytePtr(deviceID))) {
+        case 0x11C0:
+          return true;  // NVIDIA GeForce GTX 660
+        default:
+          return false;
+      }
+    }
+  }
+  return false;
+}
+#endif
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os/bsd/vm/gpu_bsd.hpp	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_BSD_VM_GPU_BSD_HPP
+#define OS_BSD_VM_GPU_BSD_HPP
+
+
+class Bsd {
+  friend class gpu;
+
+ protected:
+  static bool probe_gpu();
+#ifdef __APPLE__
+  static bool probe_gpu_apple();
+#endif
+};
+
+#endif // OS_BSD_VM_GPU_BSD_HPP
--- a/src/share/vm/graal/graalCompiler.cpp	Tue Apr 30 13:00:09 2013 +0200
+++ b/src/share/vm/graal/graalCompiler.cpp	Tue Apr 30 08:17:55 2013 -0400
@@ -28,6 +28,7 @@
 #include "graal/graalJavaAccess.hpp"
 #include "graal/graalVMToCompiler.hpp"
 #include "graal/graalCompilerToVM.hpp"
+#include "graal/graalCompilerToGPU.hpp"
 #include "graal/graalEnv.hpp"
 #include "graal/graalRuntime.hpp"
 #include "runtime/arguments.hpp"
@@ -65,6 +66,13 @@
     vm_abort(false);
   }
   env->RegisterNatives(klass, CompilerToVM_methods, CompilerToVM_methods_count());
+  
+  klass = env->FindClass("com/oracle/graal/hotspot/bridge/CompilerToGPUImpl");
+  if (klass == NULL) {
+    tty->print_cr("graal CompilerToGPUImpl class not found");
+    vm_abort(false);
+  }
+  env->RegisterNatives(klass, CompilerToGPU_methods, CompilerToGPU_methods_count());
 
   ResourceMark rm;
   HandleMark hm;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/graal/graalCompilerToGPU.cpp	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "precompiled.hpp"
+
+#include "graal/graalCompiler.hpp"
+#include "graal/graalEnv.hpp"
+#include "runtime/gpu.hpp"
+
+
+// Entry to native method implementation that transitions current thread to '_thread_in_vm'.
+#define C2V_VMENTRY(result_type, name, signature) \
+  JNIEXPORT result_type JNICALL c2v_ ## name signature { \
+  TRACE_graal_3("CompilerToGPU::" #name); \
+  GRAAL_VM_ENTRY_MARK; \
+
+// Entry to native method implementation that calls a JNI function
+// and hence cannot transition current thread to '_thread_in_vm'.
+#define C2V_ENTRY(result_type, name, signature) \
+  JNIEXPORT result_type JNICALL c2v_ ## name signature { \
+  TRACE_graal_3("CompilerToGPU::" #name); \
+
+#define C2V_END }
+
+
+C2V_VMENTRY(jlong, generateKernel, (JNIEnv *env, jobject, jbyteArray code, jstring name))
+  if (gpu::is_available() == false || gpu::has_gpu_linkage() == false && gpu::is_initialized()) {
+    tty->print_cr("generateKernel - not available / no linkage / not initialized");
+    return 0;
+  }
+  jboolean is_copy;
+  jbyte *bytes = env->GetByteArrayElements(code, &is_copy);
+  jint len = env->GetArrayLength(code);
+  const char *namestr = env->GetStringUTFChars(name, &is_copy);
+  gpu::generate_kernel((unsigned char *)bytes, len, namestr);
+  env->ReleaseByteArrayElements(code, bytes, 0);
+  env->ReleaseStringUTFChars(name, namestr);
+
+  return 42;
+C2V_END
+
+C2V_VMENTRY(jboolean, deviceInit, (JNIEnv *env, jobject))
+  if (gpu::is_available() == false || gpu::has_gpu_linkage() == false) {
+    tty->print_cr("deviceInit - not available / no linkage");
+    return false;
+  }
+  if (gpu::is_initialized()) {
+    tty->print_cr("deviceInit - already initialized");
+    return true;
+  }
+  gpu::initialize_gpu();
+  return gpu::is_initialized();
+C2V_END
+
+C2V_VMENTRY(jboolean, deviceDetach, (JNIEnv *env, jobject))
+return true;
+C2V_END
+
+
+#define CC (char*)  /*cast a literal from (const char*)*/
+#define FN_PTR(f) CAST_FROM_FN_PTR(void*, &(c2v_ ## f))
+
+#define RESOLVED_TYPE         "Lcom/oracle/graal/api/meta/ResolvedJavaType;"
+#define TYPE                  "Lcom/oracle/graal/api/meta/JavaType;"
+#define METHOD                "Lcom/oracle/graal/api/meta/JavaMethod;"
+#define FIELD                 "Lcom/oracle/graal/api/meta/JavaField;"
+#define SIGNATURE             "Lcom/oracle/graal/api/meta/Signature;"
+#define CONSTANT_POOL         "Lcom/oracle/graal/api/meta/ConstantPool;"
+#define CONSTANT              "Lcom/oracle/graal/api/meta/Constant;"
+#define KIND                  "Lcom/oracle/graal/api/meta/Kind;"
+#define LOCAL                  "Lcom/oracle/graal/api/meta/Local;"
+#define RUNTIME_CALL          "Lcom/oracle/graal/api/code/RuntimeCall;"
+#define EXCEPTION_HANDLERS    "[Lcom/oracle/graal/api/meta/ExceptionHandler;"
+#define REFLECT_METHOD        "Ljava/lang/reflect/Method;"
+#define REFLECT_CONSTRUCTOR   "Ljava/lang/reflect/Constructor;"
+#define REFLECT_FIELD         "Ljava/lang/reflect/Field;"
+#define STRING                "Ljava/lang/String;"
+#define OBJECT                "Ljava/lang/Object;"
+#define CLASS                 "Ljava/lang/Class;"
+#define STACK_TRACE_ELEMENT   "Ljava/lang/StackTraceElement;"
+#define HS_RESOLVED_TYPE      "Lcom/oracle/graal/hotspot/meta/HotSpotResolvedObjectType;"
+#define HS_RESOLVED_JAVA_TYPE "Lcom/oracle/graal/hotspot/meta/HotSpotResolvedJavaType;"
+#define HS_RESOLVED_METHOD    "Lcom/oracle/graal/hotspot/meta/HotSpotResolvedJavaMethod;"
+#define HS_RESOLVED_FIELD     "Lcom/oracle/graal/hotspot/meta/HotSpotResolvedJavaField;"
+#define HS_COMP_RESULT        "Lcom/oracle/graal/hotspot/HotSpotCompilationResult;"
+#define HS_CONFIG             "Lcom/oracle/graal/hotspot/HotSpotVMConfig;"
+#define HS_METHOD             "Lcom/oracle/graal/hotspot/meta/HotSpotMethod;"
+#define HS_INSTALLED_CODE     "Lcom/oracle/graal/hotspot/meta/HotSpotInstalledCode;"
+#define METHOD_DATA           "Lcom/oracle/graal/hotspot/meta/HotSpotMethodData;"
+#define METASPACE_METHOD      "J"
+#define METASPACE_METHOD_DATA "J"
+#define NMETHOD               "J"
+#define GPUSPACE_METHOD       "J"
+
+JNINativeMethod CompilerToGPU_methods[] = {
+  {CC"generateKernel", CC"([B" STRING ")"GPUSPACE_METHOD, FN_PTR(generateKernel)},
+  {CC"deviceInit",     CC"()Z",                           FN_PTR(deviceInit)},
+  {CC"deviceDetach",   CC"()Z",                           FN_PTR(deviceDetach)},
+};
+
+int CompilerToGPU_methods_count() {
+  return sizeof(CompilerToGPU_methods) / sizeof(JNINativeMethod);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/graal/graalCompilerToGPU.hpp	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef SHARE_VM_GRAAL_GRAAL_COMPILER_TO_GPU_HPP
+#define SHARE_VM_GRAAL_GRAAL_COMPILER_TO_GPU_HPP
+
+#include "prims/jni.h"
+
+extern JNINativeMethod CompilerToGPU_methods[];
+int CompilerToGPU_methods_count();
+
+
+#endif // SHARE_VM_GRAAL_GRAAL_COMPILER_TO_GPU_HPP
--- a/src/share/vm/runtime/globals.hpp	Tue Apr 30 13:00:09 2013 +0200
+++ b/src/share/vm/runtime/globals.hpp	Tue Apr 30 08:17:55 2013 -0400
@@ -3691,6 +3691,9 @@
   product(bool , AllowNonVirtualCalls, false,                               \
           "Obey the ACC_SUPER flag and allow invokenonvirtual calls")       \
                                                                             \
+  develop(bool, TraceWarpLoading, false,                                    \
+          "trace external GPU warp loading")                                \
+                                                                            \
   experimental(uintx, ArrayAllocatorMallocLimit,                            \
           SOLARIS_ONLY(64*K) NOT_SOLARIS(max_uintx),                        \
           "Allocation less than this value will be allocated "              \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/gpu.cpp	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/gpu.hpp"
+#include "ptx/gpu_ptx.hpp"
+
+bool gpu::_available = false;   // does the hardware exist?
+bool gpu::_gpu_linkage = false; // is the driver library to access the GPU installed
+bool gpu::_initialized = false; // is the GPU defvice initialized
+
+void gpu::init() {
+#ifdef TARGET_OS_FAMILY_bsd
+  gpu::probe_gpu();
+#endif
+  // need multi-gpu TARGET ifdef
+  gpu::probe_linkage();
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/runtime/gpu.hpp	Tue Apr 30 08:17:55 2013 -0400
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_RUNTIME_GPU_HPP
+#define SHARE_VM_RUNTIME_GPU_HPP
+
+#include "runtime/atomic.hpp"
+
+// gpu defines the interface to the graphics processor; this includes traditional
+// GPU services such as graphics kernel load and execute.
+
+
+class gpu: AllStatic {
+public:
+  static void init(void);
+
+  static void probe_gpu();
+
+  static void probe_linkage();
+  
+  static void initialize_gpu();
+  
+  static void generate_kernel(unsigned char *code, int code_len, const char *name);
+
+  static void set_available(bool value) {
+    _available = value;
+  }
+
+  static bool is_available() { return _available; }
+
+  static void set_initialized(bool value) {
+    _initialized = value;
+  }
+
+  static bool is_initialized() { return _initialized; }
+
+  static void set_gpu_linkage(bool value) {
+    _gpu_linkage = value;
+  }
+
+  static bool has_gpu_linkage() { return _gpu_linkage; }
+
+protected:
+  static bool _available;
+  static bool _gpu_linkage;
+  static bool _initialized;
+
+  // Platform dependent stuff
+#ifdef TARGET_OS_FAMILY_linux
+#endif
+#ifdef TARGET_OS_FAMILY_solaris
+#endif
+#ifdef TARGET_OS_FAMILY_windows
+#endif
+#ifdef TARGET_OS_FAMILY_bsd
+# include "gpu_bsd.hpp"
+#endif
+
+# include "ptx/gpu_ptx.hpp"
+
+};
+
+
+#endif // SHARE_VM_RUNTIME_GPU_HPP
--- a/src/share/vm/runtime/thread.cpp	Tue Apr 30 13:00:09 2013 +0200
+++ b/src/share/vm/runtime/thread.cpp	Tue Apr 30 08:17:55 2013 -0400
@@ -54,6 +54,7 @@
 #include "runtime/deoptimization.hpp"
 #include "runtime/fprofiler.hpp"
 #include "runtime/frame.inline.hpp"
+#include "runtime/gpu.hpp"
 #include "runtime/init.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/java.hpp"
@@ -3307,6 +3308,9 @@
   // Initialize the os module before using TLS
   os::init();
 
+  // probe for warp capability
+  gpu::init();
+
   // Initialize system properties.
   Arguments::init_system_properties();