Mercurial > hg > truffle

--- a/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java	Thu Jan 30 00:52:33 2014 +0100
@@ -49,6 +49,10 @@

 public abstract class GraalKernelTester extends KernelTester {

+    public GraalKernelTester() {
+        super(getHSAILBackend().isDeviceInitialized());
+    }
+
     private static HSAILHotSpotBackend getHSAILBackend() {
         Backend backend = runtime().getBackend(HSAIL.class);
         Assume.assumeTrue(backend instanceof HSAILHotSpotBackend);
@@ -100,11 +104,10 @@
     @Override
     protected void dispatchKernelOkra(int range, Object... args) {
         HSAILHotSpotBackend backend = getHSAILBackend();
-        HotSpotNmethod code = backend.compileAndInstallKernel(testMethod);
-
-        if (code != null) {
+        if (backend.isDeviceInitialized()) {
             try {
-                code.executeParallel(range, 0, 0, args);
+                HotSpotNmethod code = backend.compileAndInstallKernel(testMethod);
+                backend.executeKernel(code, range, args);
             } catch (InvalidInstalledCodeException e) {
                 Debug.log("WARNING:Invalid installed code: " + e);
                 e.printStackTrace();
--- a/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/KernelTester.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/KernelTester.java	Thu Jan 30 00:52:33 2014 +0100
@@ -108,17 +108,17 @@

     private static boolean gaveNoOkraWarning = false;
     private boolean onSimulator;
-    private boolean okraLibExists;
+    private final boolean okraLibExists;

     public boolean runningOnSimulator() {
         return onSimulator;
     }

-    public KernelTester() {
-        okraLibExists = OkraUtil.okraLibExists();
+    public KernelTester(boolean okraLibExists) {
         dispatchMode = DispatchMode.SEQ;
         hsailMode = HsailMode.COMPILED;
         useLambdaMethod = false;
+        this.okraLibExists = okraLibExists;
     }

     public abstract void runTest();
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/BasicPTXTest.java	Thu Jan 30 00:52:33 2014 +0100
@@ -22,6 +22,8 @@
  */
 package com.oracle.graal.compiler.ptx.test;

+import static org.junit.Assert.*;
+
 import org.junit.*;

 /**
@@ -56,6 +58,11 @@
         return p1 + p0;
     }

+    @Test
+    public void testGetAvailableProcessors() {
+        assertTrue(getPTXBackend().getAvailableProcessors() >= 0);
+    }
+
     public static void main(String[] args) {
         compileAndPrintCode(new BasicPTXTest());
     }
--- a/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTest.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.compiler.ptx.test/src/com/oracle/graal/compiler/ptx/test/PTXTest.java	Thu Jan 30 00:52:33 2014 +0100
@@ -43,7 +43,7 @@
  */
 public abstract class PTXTest extends GraalCompilerTest {

-    private static PTXHotSpotBackend getPTXBackend() {
+    public static PTXHotSpotBackend getPTXBackend() {
         Backend backend = runtime().getBackend(PTX.class);
         Assume.assumeTrue(backend instanceof PTXHotSpotBackend);
         return (PTXHotSpotBackend) backend;
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/ForEachToGraal.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/ForEachToGraal.java	Thu Jan 30 00:52:33 2014 +0100
@@ -106,7 +106,7 @@
         if (code != null) {
             try {
                 // No return value from HSAIL kernels
-                code.executeParallel(jobSize, 0, 0, args);
+                getHSAILBackend().executeKernel(code, jobSize, args);
                 return true;
             } catch (InvalidInstalledCodeException iice) {
                 Debug.log("WARNING: Invalid installed code at exec time." + iice);
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Thu Jan 30 00:52:33 2014 +0100
@@ -30,6 +30,7 @@
 import java.lang.reflect.*;
 import java.util.*;

+import com.amd.okra.*;
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.code.CallingConvention.Type;
 import com.oracle.graal.api.meta.*;
@@ -60,6 +61,7 @@

     private Map<String, String> paramTypeMap = new HashMap<>();
     private Buffer codeBuffer;
+    private final boolean deviceInitialized;

     public HSAILHotSpotBackend(HotSpotGraalRuntime runtime, HotSpotProviders providers) {
         super(runtime, providers);
@@ -67,6 +69,10 @@
         paramTypeMap.put("HotSpotResolvedPrimitiveType<float>", "f32");
         paramTypeMap.put("HotSpotResolvedPrimitiveType<double>", "f64");
         paramTypeMap.put("HotSpotResolvedPrimitiveType<long>", "s64");
+
+        // The order of the conjunction below is important: the OkraUtil
+        // call may provision the native library required by the initialize() call
+        deviceInitialized = OkraUtil.okraLibExists() && initialize();
     }

     @Override
@@ -75,6 +81,20 @@
     }

     /**
+     * Initializes the GPU device.
+     *
+     * @return whether or not initialization was successful
+     */
+    private static native boolean initialize();
+
+    /**
+     * Determines if the GPU device (or simulator) is available and initialized.
+     */
+    public boolean isDeviceInitialized() {
+        return deviceInitialized;
+    }
+
+    /**
      * Completes the initialization of the HSAIL backend. This includes initializing the providers
      * and registering any method substitutions specified by the HSAIL backend.
      */
@@ -117,12 +137,15 @@
         graphBuilderSuite.appendPhase(new NonNullParametersPhase());
         CallingConvention cc = getCallingConvention(providers.getCodeCache(), Type.JavaCallee, graph.method(), false);
         Suites suites = providers.getSuites().getDefaultSuites();
-        ExternalCompilationResult hsailCode = compileGraph(graph, cc, method, providers, this, this.getTarget(), null, graphBuilderSuite, OptimisticOptimizations.NONE, getProfilingInfo(graph),
-                        new SpeculationLog(), suites, true, new ExternalCompilationResult(), CompilationResultBuilderFactory.Default);
+        ExternalCompilationResult hsailCode = compileGraph(graph, cc, method, providers, this, this.getTarget(), null, graphBuilderSuite, OptimisticOptimizations.NONE, getProfilingInfo(graph), null,
+                        suites, true, new ExternalCompilationResult(), CompilationResultBuilderFactory.Default);

         if (makeBinary) {
+            if (!deviceInitialized) {
+                throw new GraalInternalError("Cannot generate GPU kernel if device is not initialized");
+            }
             try (Scope ds = Debug.scope("GeneratingKernelBinary")) {
-                long kernel = getRuntime().getCompilerToGPU().generateKernel(hsailCode.getTargetCode(), method.getName());
+                long kernel = generateKernel(hsailCode.getTargetCode(), method.getName());
                 if (kernel == 0) {
                     throw new GraalInternalError("Failed to compile HSAIL kernel");
                 }
@@ -135,6 +158,11 @@
     }

     /**
+     * Generates a GPU binary from HSAIL code.
+     */
+    private static native long generateKernel(byte[] hsailCode, String name);
+
+    /**
      * Installs the {@linkplain ExternalCompilationResult#getEntryPoint() GPU binary} associated
      * with some given HSAIL code in the code cache and returns a {@link HotSpotNmethod} handle to
      * the installed code.
@@ -147,6 +175,15 @@
         return getProviders().getCodeCache().addExternalMethod(method, hsailCode);
     }

+    public boolean executeKernel(HotSpotInstalledCode kernel, int jobSize, Object[] args) throws InvalidInstalledCodeException {
+        if (!deviceInitialized) {
+            throw new GraalInternalError("Cannot execute GPU kernel if device is not initialized");
+        }
+        return executeKernel0(kernel, jobSize, args);
+    }
+
+    private static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args) throws InvalidInstalledCodeException;
+
     /**
      * Use the HSAIL register set when the compilation target is HSAIL.
      */
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Thu Jan 30 00:52:33 2014 +0100
@@ -45,7 +45,6 @@
 import com.oracle.graal.graph.*;
 import com.oracle.graal.hotspot.*;
 import com.oracle.graal.hotspot.HotSpotReplacementsImpl.GraphProducer;
-import com.oracle.graal.hotspot.bridge.*;
 import com.oracle.graal.hotspot.meta.*;
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.LIRInstruction.OperandFlag;
@@ -67,6 +66,8 @@
  */
 public class PTXHotSpotBackend extends HotSpotBackend {

+    private final boolean deviceInitialized;
+
     /**
      * Descriptor for the PTX runtime method for calling a kernel. The C++ signature is:
      *
@@ -101,38 +102,58 @@

     public PTXHotSpotBackend(HotSpotGraalRuntime runtime, HotSpotProviders providers) {
         super(runtime, providers);
-        CompilerToGPU compilerToGPU = getRuntime().getCompilerToGPU();
-        deviceInitialized = OmitDeviceInit || compilerToGPU.deviceInit();
+        if (OmitDeviceInit) {
+            deviceInitialized = true;
+        } else {
+            boolean init = false;
+            try {
+                init = initialize();
+            } catch (UnsatisfiedLinkError e) {
+            }
+            deviceInitialized = init;
+        }
     }

+    /**
+     * Initializes the GPU device.
+     *
+     * @return whether or not initialization was successful
+     */
+    private static native boolean initialize();
+
     @Override
     public boolean shouldAllocateRegisters() {
         return false;
     }

     /**
-     * Used to omit {@linkplain CompilerToGPU#deviceInit() device initialization}.
+     * Used to omit {@linkplain #initialize() device initialization}.
      */
     private static final boolean OmitDeviceInit = Boolean.getBoolean("graal.ptx.omitDeviceInit");

     @Override
     public void completeInitialization() {
         HotSpotHostForeignCallsProvider hostForeignCalls = (HotSpotHostForeignCallsProvider) getRuntime().getHostProviders().getForeignCalls();
-        CompilerToGPU compilerToGPU = getRuntime().getCompilerToGPU();
         if (deviceInitialized) {
-            long launchKernel = compilerToGPU.getLaunchKernelAddress();
+            long launchKernel = getLaunchKernelAddress();
             hostForeignCalls.registerForeignCall(CALL_KERNEL, launchKernel, NativeCall, DESTROYS_REGISTERS, NOT_LEAF, NOT_REEXECUTABLE, ANY_LOCATION);
         }
         super.completeInitialization();
     }

-    private boolean deviceInitialized;
+    /**
+     * Gets the address of {@code gpu::Ptx::execute_kernel_from_vm()}.
+     */
+    private static native long getLaunchKernelAddress();

     @Override
     public FrameMap newFrameMap() {
         return new PTXFrameMap(getCodeCache());
     }

+    /**
+     * Determines if the GPU device (or simulator) is available and initialized.
+     */
     public boolean isDeviceInitialized() {
         return deviceInitialized;
     }
@@ -181,7 +202,7 @@
         if (makeBinary) {
             try (Scope ds = Debug.scope("GeneratingKernelBinary")) {
                 assert ptxCode.getTargetCode() != null;
-                long kernel = getRuntime().getCompilerToGPU().generateKernel(ptxCode.getTargetCode(), method.getName());
+                long kernel = generateKernel(ptxCode.getTargetCode(), method.getName());
                 ptxCode.setEntryPoint(kernel);
             } catch (Throwable e) {
                 throw Debug.handle(e);
@@ -191,6 +212,11 @@
     }

     /**
+     * Generates a GPU binary from PTX code.
+     */
+    private static native long generateKernel(byte[] targetCode, String name);
+
+    /**
      * A list of the {@linkplain #installKernel(ResolvedJavaMethod, ExternalCompilationResult)
      * installed} kernels. This is required so that there is a strong reference to each installed
      * kernel as long as it is {@linkplain HotSpotNmethod#isValid() valid}. The list is pruned of
@@ -446,4 +472,17 @@
         codeBuffer.emitString0("}");
         codeBuffer.emitString("");
     }
+
+    /**
+     * Gets the total number of available CUDA cores.
+     */
+    public int getAvailableProcessors() {
+        if (!deviceInitialized) {
+            return 0;
+        }
+        return getAvailableProcessors0();
+    }
+
+    private static native int getAvailableProcessors0();
+
 }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotGraalRuntime.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotGraalRuntime.java	Thu Jan 30 00:52:33 2014 +0100
@@ -181,7 +181,6 @@
     }

     protected/* final */CompilerToVM compilerToVm;
-    protected/* final */CompilerToGPU compilerToGpu;
     protected/* final */VMToCompiler vmToCompiler;

     private HotSpotRuntimeInterpreterInterface runtimeInterpreterInterface;
@@ -211,11 +210,9 @@

     private HotSpotGraalRuntime() {
         CompilerToVM toVM = new CompilerToVMImpl();
-        CompilerToGPU toGPU = new CompilerToGPUImpl();
         VMToCompiler toCompiler = new VMToCompilerImpl(this);

         compilerToVm = toVM;
-        compilerToGpu = toGPU;
         vmToCompiler = toCompiler;
         config = new HotSpotVMConfig(compilerToVm);

@@ -237,7 +234,7 @@
         String hostArchitecture = config.getHostArchitectureName();
         hostBackend = registerBackend(findFactory(hostArchitecture).createBackend(this, null));

-        String[] gpuArchitectures = getGPUArchitectureNames();
+        String[] gpuArchitectures = getGPUArchitectureNames(compilerToVm);
         for (String arch : gpuArchitectures) {
             HotSpotBackendFactory factory = findFactory(arch);
             if (factory == null) {
@@ -271,15 +268,12 @@

     /**
      * Gets the names of the supported GPU architectures for the purpose of finding the
-     * corresponding {@linkplain HotSpotBackendFactory backend} objects. This method first looks for
-     * a comma or {@link java.io.File#pathSeparatorChar} separated list of names in the
-     * {@value #GRAAL_GPU_ISALIST_PROPERTY_NAME} system property. If this property is not set, then
-     * the GPU native support code is queried.
+     * corresponding {@linkplain HotSpotBackendFactory backend} objects.
      */
-    private static String[] getGPUArchitectureNames() {
-        String gpuList = System.getProperty(GRAAL_GPU_ISALIST_PROPERTY_NAME);
-        if (gpuList != null && !gpuList.isEmpty()) {
-            String[] gpus = gpuList.split("[,:]");
+    private static String[] getGPUArchitectureNames(CompilerToVM c2vm) {
+        String gpuList = c2vm.getGPUs();
+        if (!gpuList.isEmpty()) {
+            String[] gpus = gpuList.split(",");
             return gpus;
         }
         return new String[0];
@@ -320,10 +314,6 @@
         return vmToCompiler;
     }

-    public CompilerToGPU getCompilerToGPU() {
-        return compilerToGpu;
-    }
-
     /**
      * Converts a name to a Java type.
      *
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPU.java	Thu Jan 30 00:48:41 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package com.oracle.graal.hotspot.bridge;
-
-import com.oracle.graal.api.code.InvalidInstalledCodeException;
-import com.oracle.graal.hotspot.meta.HotSpotInstalledCode;
-
-/**
- * Calls from Java into the GPU.
- */
-public interface CompilerToGPU {
-
-    /**
-     * Attempts to initialize and create a valid context with the GPU.
-     *
-     * @return whether the GPU context has been initialized and is valid.
-     */
-    boolean deviceInit();
-
-    /**
-     * Attempts to detach from a valid GPU context.
-     *
-     * @return whether the GPU context has been properly disposed.
-     */
-    boolean deviceDetach();
-
-    int availableProcessors();
-
-    /**
-     * Attempts to generate and return a bound function to the loaded method kernel on the GPU.
-     *
-     * @param code the text or binary values for a method kernel
-     * @return the value of the bound kernel in GPU space.
-     */
-    long generateKernel(byte[] code, String name) throws InvalidInstalledCodeException;
-
-    Object executeExternalMethodVarargs(Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
-
-    Object executeParallelMethodVarargs(int dimX, int dimY, int dimZ, Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
-
-    /**
-     * Gets the address of the runtime function for launching a kernel function.
-     */
-    long getLaunchKernelAddress();
-}
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToGPUImpl.java	Thu Jan 30 00:48:41 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package com.oracle.graal.hotspot.bridge;
-
-import com.oracle.graal.api.code.InvalidInstalledCodeException;
-import com.oracle.graal.hotspot.meta.HotSpotInstalledCode;
-
-/**
- * Entries into the HotSpot GPU interface from Java code.
- */
-public class CompilerToGPUImpl implements CompilerToGPU {
-
-    public native boolean deviceInit();
-
-    public native long generateKernel(byte[] code, String name) throws InvalidInstalledCodeException;
-
-    public native boolean deviceDetach();
-
-    public native int availableProcessors();
-
-    public native Object executeExternalMethodVarargs(Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
-
-    public native Object executeParallelMethodVarargs(int dimX, int dimY, int dimZ, Object[] args, HotSpotInstalledCode hotspotInstalledCode) throws InvalidInstalledCodeException;
-
-    public native long getLaunchKernelAddress();
-}
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToVM.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToVM.java	Thu Jan 30 00:52:33 2014 +0100
@@ -264,4 +264,11 @@
      * Generate a unique id to identify the result of the compile.
      */
     int allocateCompileId(HotSpotResolvedJavaMethod method, int entryBCI);
+
+    /**
+     * Gets the names of the supported GPU architectures.
+     *
+     * @return a comma separated list of names
+     */
+    String getGPUs();
 }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToVMImpl.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/CompilerToVMImpl.java	Thu Jan 30 00:52:33 2014 +0100
@@ -172,4 +172,6 @@
     public native boolean isMature(long method);

     public native int allocateCompileId(HotSpotResolvedJavaMethod method, int entryBCI);
+
+    public native String getGPUs();
 }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Thu Jan 30 00:48:41 2014 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Thu Jan 30 00:52:33 2014 +0100
@@ -115,25 +115,11 @@
         return true;
     }

-    public Object executeParallel(int dimX, int dimY, int dimZ, Object... args) throws InvalidInstalledCodeException {
-
-        // For HSAIL, we do not pass the iteration variable, it comes from the workitemid
-        // assert checkArgs(args);
-
-        assert isExternal(); // for now
-
-        return runtime().getCompilerToGPU().executeParallelMethodVarargs(dimX, dimY, dimZ, args, this);
-
-    }
-
     @Override
     public Object executeVarargs(Object... args) throws InvalidInstalledCodeException {
         assert checkArgs(args);
-        if (isExternal()) {
-            return runtime().getCompilerToGPU().executeExternalMethodVarargs(args, this);
-        } else {
-            return runtime().getCompilerToVM().executeCompiledMethodVarargs(args, this);
-        }
+        assert !isExternal();
+        return runtime().getCompilerToVM().executeCompiledMethodVarargs(args, this);
     }

     @Override
--- a/hotspot/.project	Thu Jan 30 00:48:41 2014 +0100
+++ b/hotspot/.project	Thu Jan 30 00:52:33 2014 +0100
@@ -86,41 +86,6 @@
 	</natures>
 	<linkedResources>
 		<link>
-			<name>ptx</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/gpu/ptx</locationURI>
-		</link>
-		<link>
-			<name>x86</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/cpu/x86/vm</locationURI>
-		</link>
-		<link>
-			<name>ptx</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/gpu/ptx/vm</locationURI>
-		</link>
-		<link>
-			<name>hsail</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/gpu/hsail/vm</locationURI>
-		</link>
-		<link>
-			<name>sparc</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/cpu/sparc/vm</locationURI>
-		</link>
-		<link>
-			<name>generated</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/build/linux/linux_amd64_graal/generated</locationURI>
-		</link>
-		<link>
-			<name>make</name>
-			<type>2</type>
-			<locationURI>WORKSPACE_LOC/make</locationURI>
-		</link>
-		<link>
 			<name>bsd</name>
 			<type>2</type>
 			<locationURI>PARENT-1-PROJECT_LOC/src/os/bsd/vm</locationURI>
@@ -131,9 +96,49 @@
 			<locationURI>PARENT-1-PROJECT_LOC/src/os_cpu/bsd_x86/vm</locationURI>
 		</link>
 		<link>
-			<name>bsd_ptx</name>
+			<name>generated</name>
+			<type>2</type>
+			<locationURI>PARENT-1-PROJECT_LOC/build/linux/linux_amd64_graal/generated</locationURI>
+		</link>
+		<link>
+			<name>hsail</name>
+			<type>2</type>
+			<locationURI>PARENT-1-PROJECT_LOC/src/gpu/hsail/vm</locationURI>
+		</link>
+		<link>
+			<name>linux</name>
+			<type>2</type>
+			<locationURI>PARENT-1-PROJECT_LOC/src/os/linux/vm</locationURI>
+		</link>
+		<link>
+			<name>linux_sparc</name>
+			<type>2</type>
+			<locationURI>PARENT-1-PROJECT_LOC/src/os_cpu/linux_sparc/vm</locationURI>
+		</link>
+		<link>
+			<name>linux_x86</name>
 			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/os_gpu/bsd_ptx/vm</locationURI>
+			<locationURI>PARENT-1-PROJECT_LOC/src/os_cpu/linux_x86/vm</locationURI>
+		</link>
+		<link>
+			<name>make</name>
+			<type>2</type>
+			<locationURI>WORKSPACE_LOC/make</locationURI>
+		</link>
+		<link>
+			<name>ptx</name>
+			<type>2</type>
+			<locationURI>PARENT-1-PROJECT_LOC/src/gpu/ptx/vm</locationURI>
+		</link>
+		<link>
+			<name>sparc</name>
+			<type>2</type>
+			<locationURI>PARENT-1-PROJECT_LOC/src/cpu/sparc/vm</locationURI>
+		</link>
+		<link>
+			<name>vm</name>
+			<type>2</type>
+			<locationURI>PARENT-1-PROJECT_LOC/src/share/vm</locationURI>
 		</link>
 		<link>
 			<name>windows</name>
@@ -146,34 +151,9 @@
 			<locationURI>PARENT-1-PROJECT_LOC/src/os_cpu/windows_x86/vm</locationURI>
 		</link>
 		<link>
-			<name>linux</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/os/linux/vm</locationURI>
-		</link>
-		<link>
-			<name>linux_x86</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/os_cpu/linux_x86/vm</locationURI>
-		</link>
-		<link>
-			<name>windows_hsail</name>
+			<name>x86</name>
 			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/os_gpu/windows_hsail/vm</locationURI>
-		</link>
-		<link>
-			<name>linux_ptx</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/os_gpu/linux_ptx/vm</locationURI>
-		</link>
-		<link>
-			<name>linux_sparc</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/os_cpu/linux_sparc/vm</locationURI>
-		</link>
-		<link>
-			<name>vm</name>
-			<type>2</type>
-			<locationURI>PARENT-1-PROJECT_LOC/src/share/vm</locationURI>
+			<locationURI>PARENT-1-PROJECT_LOC/src/cpu/x86/vm</locationURI>
 		</link>
 	</linkedResources>
 </projectDescription>
--- a/make/bsd/makefiles/buildtree.make	Thu Jan 30 00:48:41 2014 +0100
+++ b/make/bsd/makefiles/buildtree.make	Thu Jan 30 00:52:33 2014 +0100
@@ -239,8 +239,6 @@
 	echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
-	echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
-	echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
@@ -261,8 +259,6 @@
 	echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
-	echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
-	echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
--- a/make/bsd/makefiles/vm.make	Thu Jan 30 00:48:41 2014 +0100
+++ b/make/bsd/makefiles/vm.make	Thu Jan 30 00:52:33 2014 +0100
@@ -178,7 +178,6 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/hsail/vm
-SOURCE_PATHS+=$(HS_COMMON_SRC)/os_gpu/bsd_ptx/vm

 CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path))
 CORE_PATHS+=$(GENERATED)/jvmtifiles $(GENERATED)/tracefiles
--- a/make/linux/makefiles/buildtree.make	Thu Jan 30 00:48:41 2014 +0100
+++ b/make/linux/makefiles/buildtree.make	Thu Jan 30 00:52:33 2014 +0100
@@ -235,8 +235,6 @@
 	echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
-	echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
-	echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
@@ -257,8 +255,6 @@
 	echo "$(call gamma-path,commonsrc,cpu/$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os_cpu/$(OS_FAMILY)_$(SRCARCH)/vm) \\"; \
-	echo "$(call gamma-path,altsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
-	echo "$(call gamma-path,commonsrc,os_gpu/$(OS_FAMILY)_ptx/vm) \\"; \
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
--- a/make/linux/makefiles/vm.make	Thu Jan 30 00:48:41 2014 +0100
+++ b/make/linux/makefiles/vm.make	Thu Jan 30 00:52:33 2014 +0100
@@ -158,7 +158,6 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/hsail/vm
-SOURCE_PATHS+=$(HS_COMMON_SRC)/os_gpu/linux_ptx/vm

 CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path))
 CORE_PATHS+=$(GENERATED)/jvmtifiles $(GENERATED)/tracefiles
--- a/make/windows/makefiles/projectcreator.make	Thu Jan 30 00:48:41 2014 +0100
+++ b/make/windows/makefiles/projectcreator.make	Thu Jan 30 00:52:33 2014 +0100
@@ -56,7 +56,6 @@
         -relativeInclude src\os\windows\vm \
         -relativeInclude src\os_cpu\windows_$(Platform_arch)\vm \
         -relativeInclude src\cpu\$(Platform_arch)\vm \
-        -relativeInclude src\os_gpu\windows_hsail\vm \
         -relativeInclude src\gpu \
         -absoluteInclude $(HOTSPOTBUILDSPACE)/%f/generated \
         -relativeSrcInclude src \
--- a/make/windows/makefiles/vm.make	Thu Jan 30 00:48:41 2014 +0100
+++ b/make/windows/makefiles/vm.make	Thu Jan 30 00:52:33 2014 +0100
@@ -126,7 +126,6 @@
   /I "$(COMMONSRC)\share\vm\prims" \
   /I "$(COMMONSRC)\os\windows\vm" \
   /I "$(COMMONSRC)\os_cpu\windows_$(Platform_arch)\vm" \
-  /I "$(COMMONSRC)\os_gpu\windows_hsail\vm" \
   /I "$(COMMONSRC)\cpu\$(Platform_arch)\vm"

 CXX_DONT_USE_PCH=/D DONT_USE_PRECOMPILED_HEADER
@@ -171,7 +170,6 @@
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/libadt
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/os/windows/vm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm
-VM_PATH=$(VM_PATH);$(WorkSpace)/src/os_gpu/windows_hsail/vm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/cpu/$(Platform_arch)/vm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/opto
--- a/src/gpu/hsail/vm/gpu_hsail.cpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/gpu/hsail/vm/gpu_hsail.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -29,9 +29,39 @@
 #include "utilities/ostream.hpp"
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
+#include "graal/graalEnv.hpp"
+#include "graal/graalCompiler.hpp"
+#include "graal/graalJavaAccess.hpp"
 #include "hsailKernelArguments.hpp"

-void * gpu::Hsail::_device_context;
+// Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'.
+#define GPU_VMENTRY(result_type, name, signature) \
+  JNIEXPORT result_type JNICALL name signature { \
+  GRAAL_VM_ENTRY_MARK; \
+
+// Entry to GPU native method implementation that calls a JNI function
+// and hence cannot transition current thread to '_thread_in_vm'.
+#define GPU_ENTRY(result_type, name, signature) \
+  JNIEXPORT result_type JNICALL name signature { \
+
+#define GPU_END }
+
+#define CC (char*)  /*cast a literal from (const char*)*/
+#define FN_PTR(f) CAST_FROM_FN_PTR(void*, &(f))
+
+#define OBJECT                "Ljava/lang/Object;"
+#define STRING                "Ljava/lang/String;"
+#define HS_INSTALLED_CODE     "Lcom/oracle/graal/hotspot/meta/HotSpotInstalledCode;"
+
+//  public native void executeKernel(HotSpotNmethod kernel, int jobSize, int i, int j, Object[] args) throws InvalidInstalledCodeException;
+
+JNINativeMethod gpu::Hsail::HSAIL_methods[] = {
+  {CC"initialize",       CC"()Z",                               FN_PTR(gpu::Hsail::initialize)},
+  {CC"generateKernel",   CC"([B" STRING ")J",                   FN_PTR(gpu::Hsail::generate_kernel)},
+  {CC"executeKernel0",   CC"("HS_INSTALLED_CODE"I["OBJECT")Z",  FN_PTR(gpu::Hsail::execute_kernel_void_1d)},
+};
+
+void * gpu::Hsail::_device_context = NULL;

 gpu::Hsail::okra_create_context_func_t  gpu::Hsail::_okra_create_context;
 gpu::Hsail::okra_create_kernel_func_t   gpu::Hsail::_okra_create_kernel;
@@ -47,20 +77,6 @@
 gpu::Hsail::okra_register_heap_func_t   gpu::Hsail::_okra_register_heap;


-bool gpu::Hsail::initialize_gpu() {
-  // All the initialization is done in the okra library so
-  // nothing to do here.
-  if (TraceGPUInteraction) {
-    tty->print_cr("[HSAIL] Simulator: initialize_gpu");
-  }
-  return true;
-}
-
-unsigned int gpu::Hsail::total_cores() {
-  // This is not important with simulator
-  return 1;
-}
-
 void gpu::Hsail::register_heap() {
   // After the okra functions are set up and the heap is initialized, register the java heap with HSA
   guarantee(Universe::heap() != NULL, "heap should be there by now.");
@@ -71,54 +87,67 @@
   _okra_register_heap(Universe::heap()->base(), Universe::heap()->capacity());
 }

-bool  gpu::Hsail::execute_kernel_void_1d(address kernel, int dimX, jobject args, methodHandle& mh) {
-  objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args);
+GPU_VMENTRY(jboolean, gpu::Hsail::execute_kernel_void_1d, (JNIEnv* env, jclass, jobject kernel_handle, jint dimX, jobject args_handle))
+
+  ResourceMark rm;
+  jlong nmethodValue = HotSpotInstalledCode::codeBlob(kernel_handle);
+  if (nmethodValue == 0) {
+    SharedRuntime::throw_and_post_jvmti_exception(JavaThread::current(), vmSymbols::com_oracle_graal_api_code_InvalidInstalledCodeException(), NULL);
+  }
+  nmethod* nm = (nmethod*) (address) nmethodValue;
+  methodHandle mh = nm->method();
+  Symbol* signature = mh->signature();
+
+  void* kernel = (void*) HotSpotInstalledCode::codeStart(kernel_handle);
+  if (kernel == NULL) {
+    SharedRuntime::throw_and_post_jvmti_exception(JavaThread::current(), vmSymbols::com_oracle_graal_api_code_InvalidInstalledCodeException(), NULL);
+  }
+
+  objArrayOop args = (objArrayOop) JNIHandles::resolve(args_handle);

   // Reset the kernel arguments
   _okra_clearargs(kernel);

   // This object sets up the kernel arguments
-  HSAILKernelArguments hka(kernel, mh->signature(), argsArray, mh->is_static());
+  HSAILKernelArguments hka((address) kernel, mh->signature(), args, mh->is_static());

   // Run the kernel
-  bool success = _okra_execute_with_range(kernel, dimX);
-  return success;
-}
+  return _okra_execute_with_range(kernel, dimX);
+GPU_END

-void *gpu::Hsail::generate_kernel(unsigned char *code, int code_len, const char *name) {
+GPU_ENTRY(jlong, gpu::Hsail::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
+  guarantee(_okra_create_kernel != NULL, "[HSAIL] Okra not linked");
+  ResourceMark rm;
+  jsize name_len = env->GetStringLength(name_handle);
+  jsize code_len = env->GetArrayLength(code_handle);

-  if (_okra_create_kernel == NULL) {
-    // probe linkage and we really need it to work this time
-    bool success = probe_linkage_internal(true);
-    guarantee(success, "[HSAIL] loading okra library");
-  }
+  char* name = NEW_RESOURCE_ARRAY(char, name_len + 1);
+  unsigned char *code = NEW_RESOURCE_ARRAY(unsigned char, code_len + 1);

-  gpu::Hsail::register_heap();
+  code[code_len] = 0;
+  name[name_len] = 0;
+
+  env->GetByteArrayRegion(code_handle, 0, code_len, (jbyte*) code);
+  env->GetStringUTFRegion(name_handle, 0, name_len, name);
+
+  register_heap();

   // The kernel entrypoint is always run for the time being
   const char* entryPointName = "&run";

   _device_context = _okra_create_context();

-  // code is not null terminated, must be a better way to do this
-  unsigned char* nullTerminatedCodeBuffer = (unsigned char*) malloc(code_len + 1);
-  memcpy(nullTerminatedCodeBuffer, code, code_len);
-  nullTerminatedCodeBuffer[code_len] = 0;
-  void* kernel = _okra_create_kernel(_device_context, nullTerminatedCodeBuffer, entryPointName);
-  free(nullTerminatedCodeBuffer);
-  return kernel;
-}
+  return (jlong) _okra_create_kernel(_device_context, code, entryPointName);
+GPU_END

 #if defined(LINUX)
 static const char okra_library_name[] = "libokra_x86_64.so";
-#elif defined (_WINDOWS)
+#elif defined(_WINDOWS)
 static char const okra_library_name[] = "okra_x86_64.dll";
 #else
-static char const okra_library_name[] = "";
+static char const okra_library_name[] = NULL;
 #endif

-#define STD_BUFFER_SIZE 1024
-
 #define STRINGIFY(x)     #x

 #define LOOKUP_OKRA_FUNCTION(name, alias)  \
@@ -126,15 +155,10 @@
     CAST_TO_FN_PTR(alias##_func_t, os::dll_lookup(handle, STRINGIFY(name))); \
   if (_##alias == NULL) {      \
   tty->print_cr("[HSAIL] ***** Error: Failed to lookup %s in %s, wrong version of OKRA?", STRINGIFY(name), okra_library_name); \
-        return 0; \
+        return false; \
   } \

-bool gpu::Hsail::probe_linkage() {
-  return probe_linkage_internal(false);
-}
-
-
-bool gpu::Hsail::probe_linkage_internal(bool isRequired) {
+GPU_ENTRY(jboolean, gpu::Hsail::initialize, (JNIEnv *env, jclass))
   if (okra_library_name == NULL) {
     if (TraceGPUInteraction) {
       tty->print_cr("Unsupported HSAIL platform");
@@ -143,32 +167,21 @@
   }

   // here we know we have a valid okra_library_name to try to load
-  // the isRequired boolean specifies whether it is an error if the
-  // probe does not find the okra library
-  char *buffer = (char*)malloc(STD_BUFFER_SIZE);
+  char ebuf[O_BUFLEN];
   if (TraceGPUInteraction) {
       tty->print_cr("[HSAIL] library is %s", okra_library_name);
   }
-  void *handle = os::dll_load(okra_library_name, buffer, STD_BUFFER_SIZE);
+  void *handle = os::dll_load(okra_library_name, ebuf, O_BUFLEN);
   // try alternate location if env variable set
   char *okra_lib_name_from_env_var = getenv("_OKRA_SIM_LIB_PATH_");
   if ((handle == NULL) && (okra_lib_name_from_env_var != NULL)) {
-    handle = os::dll_load(okra_lib_name_from_env_var, buffer, STD_BUFFER_SIZE);
+    handle = os::dll_load(okra_lib_name_from_env_var, ebuf, O_BUFLEN);
     if ((handle != NULL) && TraceGPUInteraction) {
       tty->print_cr("[HSAIL] using _OKRA_SIM_LIB_PATH_=%s", getenv("_OKRA_SIM_LIB_PATH_"));
     }
   }
-  free(buffer);

-  if ((handle == NULL) && !isRequired) {
-    // return true for now but we will probe again later
-    if (TraceGPUInteraction) {
-      tty->print_cr("[HSAIL] library load not in PATH, waiting for Java to put in tmpdir.");
-    }
-    return true;
-  }
-
-  if ((handle == NULL) && isRequired) {
+  if (handle == NULL) {
     // Unable to dlopen okra
     if (TraceGPUInteraction) {
       tty->print_cr("[HSAIL] library load failed.");
@@ -176,6 +189,8 @@
     return false;
   }

+  guarantee(_okra_create_context == NULL, "cannot repeat GPU initialization");
+
   // at this point we know handle is valid and we can lookup the functions
   LOOKUP_OKRA_FUNCTION(okra_create_context, okra_create_context);
   LOOKUP_OKRA_FUNCTION(okra_create_kernel, okra_create_kernel);
@@ -189,7 +204,27 @@
   LOOKUP_OKRA_FUNCTION(okra_execute_with_range, okra_execute_with_range);
   LOOKUP_OKRA_FUNCTION(okra_clearargs, okra_clearargs);
   LOOKUP_OKRA_FUNCTION(okra_register_heap, okra_register_heap);
-
   // if we made it this far, real success
+
+  gpu::initialized_gpu("Okra");
+
+  return true;
+GPU_END
+
+bool gpu::Hsail::register_natives(JNIEnv* env) {
+  jclass klass = env->FindClass("com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend");
+  if (klass == NULL) {
+    if (TraceGPUInteraction) {
+      tty->print_cr("HSAILHotSpotBackend class not found");
+    }
+    return false;
+  }
+  jint status = env->RegisterNatives(klass, HSAIL_methods, sizeof(HSAIL_methods) / sizeof(JNINativeMethod));
+  if (status != JNI_OK) {
+    if (TraceGPUInteraction) {
+      tty->print_cr("Error registering natives for HSAILHotSpotBackend: %d", status);
+    }
+    return false;
+  }
   return true;
 }
--- a/src/gpu/hsail/vm/gpu_hsail.hpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/gpu/hsail/vm/gpu_hsail.hpp	Thu Jan 30 00:52:33 2014 +0100
@@ -26,17 +26,27 @@
 #define GPU_HSAIL_HPP

 class Hsail {
-  friend class gpu;
+
+private:
+
+  static JNINativeMethod HSAIL_methods[];

- protected:
-  static bool probe_linkage();
-  static bool initialize_gpu();
-  static unsigned int total_cores();
-  static void* generate_kernel(unsigned char *code, int code_len, const char *name);
-  static bool execute_kernel_void_1d(address kernel, int dimX, jobject args, methodHandle& mh);
+  // static native boolean initialize();
+  static jboolean initialize(JNIEnv *env, jclass);
+
+  // static native long generateKernel(byte[] targetCode, String name);
+  static jlong generate_kernel(JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle);
+
+  // static native boolean executeKernel0(HotSpotInstalledCode kernel, int jobSize, Object[] args);
+  static jboolean execute_kernel_void_1d(JNIEnv *env, jclass, jobject hotspotInstalledCode, jint dimX, jobject args);
+
   static void register_heap();

 public:
+
+  // Registers the implementations for the native methods in HSAILHotSpotBackend
+  static bool register_natives(JNIEnv* env);
+
 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
   typedef unsigned long long CUdeviceptr;
 #else
@@ -57,8 +67,6 @@
   typedef bool (*okra_clearargs_func_t)(void*);
   typedef bool (*okra_register_heap_func_t)(void*, size_t);

-  static bool probe_linkage_internal(bool isRequired);
-
 public:
   static okra_create_context_func_t             _okra_create_context;
   static okra_create_kernel_func_t              _okra_create_kernel;
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -30,8 +30,34 @@
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
 #include "runtime/interfaceSupport.hpp"
+#include "graal/graalEnv.hpp"
+#include "graal/graalCompiler.hpp"
 #include "ptxKernelArguments.hpp"

+// Entry to GPU native method implementation that transitions current thread to '_thread_in_vm'.
+#define GPU_VMENTRY(result_type, name, signature) \
+  JNIEXPORT result_type JNICALL name signature { \
+  GRAAL_VM_ENTRY_MARK; \
+
+// Entry to GPU native method implementation that calls a JNI function
+// and hence cannot transition current thread to '_thread_in_vm'.
+#define GPU_ENTRY(result_type, name, signature) \
+  JNIEXPORT result_type JNICALL name signature { \
+
+#define GPU_END }
+
+#define CC (char*)  /*cast a literal from (const char*)*/
+#define FN_PTR(f) CAST_FROM_FN_PTR(void*, &(f))
+
+#define STRING                "Ljava/lang/String;"
+
+JNINativeMethod gpu::Ptx::PTX_methods[] = {
+  {CC"initialize",              CC"()Z",               FN_PTR(gpu::Ptx::initialize)},
+  {CC"generateKernel",          CC"([B" STRING ")J",   FN_PTR(gpu::Ptx::generate_kernel)},
+  {CC"getLaunchKernelAddress",  CC"()J",               FN_PTR(gpu::Ptx::get_execute_kernel_from_vm_address)},
+  {CC"getAvailableProcessors0", CC"()I",               FN_PTR(gpu::Ptx::get_total_cores)},
+};
+
 void * gpu::Ptx::_device_context;
 int    gpu::Ptx::_cu_device = 0;

@@ -62,7 +88,7 @@
     CAST_TO_FN_PTR(alias##_func_t, os::dll_lookup(handle, STRINGIFY(name))); \
   if (_##alias == NULL) {      \
   tty->print_cr("[CUDA] ***** Error: Failed to lookup %s", STRINGIFY(name)); \
-        return 0; \
+        return false; \
   } \

 #define LOOKUP_CUDA_V2_FUNCTION(name, alias)  LOOKUP_CUDA_FUNCTION(name##_v2, alias)
@@ -70,7 +96,7 @@
 /*
  * see http://en.wikipedia.org/wiki/CUDA#Supported_GPUs
  */
-int ncores(int major, int minor) {
+int gpu::Ptx::ncores(int major, int minor) {
     int device_type = (major << 4) + minor;

     switch (device_type) {
@@ -88,12 +114,36 @@
     }
 }

-bool gpu::Ptx::initialize_gpu() {
+bool gpu::Ptx::register_natives(JNIEnv* env) {
+  jclass klass = env->FindClass("com/oracle/graal/hotspot/ptx/PTXHotSpotBackend");
+  if (klass == NULL) {
+    if (TraceGPUInteraction) {
+      tty->print_cr("PTXHotSpotBackend class not found");
+    }
+    return false;
+  }
+  jint status = env->RegisterNatives(klass, PTX_methods, sizeof(PTX_methods) / sizeof(JNINativeMethod));
+  if (status != JNI_OK) {
+    if (TraceGPUInteraction) {
+      tty->print_cr("Error registering natives for PTXHotSpotBackend: %d", status);
+    }
+    return false;
+  }
+  return true;
+}
+
+GPU_ENTRY(jboolean, gpu::Ptx::initialize, (JNIEnv *env, jclass))
+
+  if (!link()) {
+    return false;
+  }

   /* Initialize CUDA driver API */
   int status = _cuda_cu_init(0);
   if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("Failed to initialize CUDA device");
+    if (TraceGPUInteraction) {
+      tty->print_cr("Failed to initialize CUDA device: %d", status);
+    }
     return false;
   }

@@ -160,11 +210,12 @@
     tty->print_cr("[CUDA] Using %s", device_name);
   }

+  gpu::initialized_gpu(device_name);

   return true;
-}
+GPU_END

-unsigned int gpu::Ptx::total_cores() {
+GPU_ENTRY(jint, gpu::Ptx::get_total_cores, (JNIEnv *env, jobject))

     int minor, major, nmp;
     int status = _cuda_cu_device_get_attribute(&minor,
@@ -190,7 +241,7 @@
                                            _cu_device);

     if (status != GRAAL_CUDA_SUCCESS) {
-        tty->print_cr("[CUDA] Failed to get numberof MPs on device: %d", _cu_device);
+        tty->print_cr("[CUDA] Failed to get number of MPs on device: %d", _cu_device);
         return 0;
     }

@@ -249,17 +300,28 @@
                       total, async_engines, can_map_host_memory, concurrent_kernels);
         tty->print_cr("[CUDA] Max threads per block: %d warp size: %d", max_threads_per_block, warp_size);
     }
-    return (total);
+    return total;
+GPU_END
+
+GPU_ENTRY(jlong, gpu::Ptx::generate_kernel, (JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle))
+  ResourceMark rm;
+  jsize name_len = env->GetStringLength(name_handle);
+  jsize code_len = env->GetArrayLength(code_handle);

-}
+  char* name = NEW_RESOURCE_ARRAY(char, name_len + 1);
+  unsigned char *code = NEW_RESOURCE_ARRAY(unsigned char, code_len + 1);

-void *gpu::Ptx::generate_kernel(unsigned char *code, int code_len, const char *name) {
+  code[code_len] = 0;
+  name[name_len] = 0;
+
+  env->GetByteArrayRegion(code_handle, 0, code_len, (jbyte*) code);
+  env->GetStringUTFRegion(name_handle, 0, name_len, name);

   struct CUmod_st * cu_module;
   // Use three JIT compiler options
   const unsigned int jit_num_options = 3;
-  int *jit_options = NEW_C_HEAP_ARRAY(int, jit_num_options, mtCompiler);
-  void **jit_option_values = NEW_C_HEAP_ARRAY(void *, jit_num_options, mtCompiler);
+  int *jit_options = NEW_RESOURCE_ARRAY(int, jit_num_options);
+  void **jit_option_values = NEW_RESOURCE_ARRAY(void *, jit_num_options);

   // Set up PTX JIT compiler options
   // 1. set size of compilation log buffer
@@ -268,23 +330,22 @@
   jit_option_values[0] = (void *)(size_t)jit_log_buffer_size;

   // 2. set pointer to compilation log buffer
-  char *jit_log_buffer = NEW_C_HEAP_ARRAY(char, jit_log_buffer_size, mtCompiler);
+  char *jit_log_buffer = NEW_RESOURCE_ARRAY(char, jit_log_buffer_size);
   jit_options[1] = GRAAL_CU_JIT_INFO_LOG_BUFFER;
   jit_option_values[1] = jit_log_buffer;

-  // 3. set pointer to set the Maximum # of registers (32) for the kernel
+  // 3. set pointer to set the maximum number of registers (32) for the kernel
   int jit_register_count = 32;
   jit_options[2] = GRAAL_CU_JIT_MAX_REGISTERS;
   jit_option_values[2] = (void *)(size_t)jit_register_count;

-  /* Create CUDA context to compile and execute the kernel */
+  // Create CUDA context to compile and execute the kernel
   int status = _cuda_cu_ctx_create(&_device_context, GRAAL_CU_CTX_MAP_HOST, _cu_device);

   if (status != GRAAL_CUDA_SUCCESS) {
     tty->print_cr("[CUDA] Failed to create CUDA context for device(%d): %d", _cu_device, status);
-    return NULL;
+    return 0L;
   }
-
   if (TraceGPUInteraction) {
     tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device);
   }
@@ -293,50 +354,43 @@

   if (status != GRAAL_CUDA_SUCCESS) {
     tty->print_cr("[CUDA] Failed to set current context for device: %d", _cu_device);
-    return NULL;
+    return 0L;
   }
-
   if (TraceGPUInteraction) {
     tty->print_cr("[CUDA] Success: Set current context for device: %d", _cu_device);
-  }
-
-  if (TraceGPUInteraction) {
     tty->print_cr("[CUDA] PTX Kernel\n%s", code);
     tty->print_cr("[CUDA] Function name : %s", name);
-
   }

   /* Load module's data with compiler options */
   status = _cuda_cu_module_load_data_ex(&cu_module, (void*) code, jit_num_options,
-                                            jit_options, (void **)jit_option_values);
+                                        jit_options, (void **)jit_option_values);
   if (status != GRAAL_CUDA_SUCCESS) {
     if (status == GRAAL_CUDA_ERROR_NO_BINARY_FOR_GPU) {
       tty->print_cr("[CUDA] Check for malformed PTX kernel or incorrect PTX compilation options");
     }
     tty->print_cr("[CUDA] *** Error (%d) Failed to load module data with online compiler options for method %s",
                   status, name);
-    return NULL;
+    return 0L;
   }

   if (TraceGPUInteraction) {
     tty->print_cr("[CUDA] Loaded data for PTX Kernel");
   }

-  struct CUfunc_st * cu_function;
-
+  struct CUfunc_st* cu_function;
   status = _cuda_cu_module_get_function(&cu_function, cu_module, name);

   if (status != GRAAL_CUDA_SUCCESS) {
     tty->print_cr("[CUDA] *** Error: Failed to get function %s", name);
-    return NULL;
+    return 0L;
   }

   if (TraceGPUInteraction) {
     tty->print_cr("[CUDA] Got function handle for %s kernel address %p", name, cu_function);
   }
-
-  return cu_function;
-}
+  return (jlong) cu_function;
+GPU_END

 // A PtxCall is used to manage executing a GPU kernel. In addition to launching
 // the kernel, this class releases resources allocated for the execution.
@@ -480,6 +534,9 @@
   }
 };

+GPU_VMENTRY(jlong, gpu::Ptx::get_execute_kernel_from_vm_address, (JNIEnv *env, jclass))
+  return (jlong) gpu::Ptx::execute_kernel_from_vm;
+GPU_END

 JRT_ENTRY(jlong, gpu::Ptx::execute_kernel_from_vm(JavaThread* thread, jlong kernel, jint dimX, jint dimY, jint dimZ,
                                                   jlong buffer,
@@ -493,6 +550,10 @@
     return 0L;
   }

+#if 0
+  Universe::heap()->collect(GCCause::_jvmti_force_gc);
+#endif
+
   PtxCall call(thread, (address) buffer, bufferSize, (oop*) (address) pinnedObjects, encodedReturnTypeSize);

 #define TRY(action) do { \
@@ -523,152 +584,6 @@

 JRT_END

-bool gpu::Ptx::execute_kernel(address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
-    return gpu::Ptx::execute_warp(1, 1, 1, kernel, ptxka, ret);
-}
-
-bool gpu::Ptx::execute_warp(int dimX, int dimY, int dimZ,
-                            address kernel, PTXKernelArguments &ptxka, JavaValue &ret) {
-  // grid dimensionality
-  unsigned int gridX = 1;
-  unsigned int gridY = 1;
-  unsigned int gridZ = 1;
-
-  // thread dimensionality
-  unsigned int blockX = dimX;
-  unsigned int blockY = dimY;
-  unsigned int blockZ = dimZ;
-
-  struct CUfunc_st* cu_function = (struct CUfunc_st*) kernel;
-
-  void * config[5] = {
-    GRAAL_CU_LAUNCH_PARAM_BUFFER_POINTER, ptxka._kernelArgBuffer,
-    GRAAL_CU_LAUNCH_PARAM_BUFFER_SIZE, &(ptxka._bufferOffset),
-    GRAAL_CU_LAUNCH_PARAM_END
-  };
-
-  if (kernel == NULL) {
-    return false;
-  }
-
-  if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] launching kernel");
-  }
-
-  int status = _cuda_cu_launch_kernel(cu_function,
-                                      gridX, gridY, gridZ,
-                                      blockX, blockY, blockZ,
-                                      0, NULL, NULL, (void **) &config);
-  if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] Failed to launch kernel");
-    return false;
-  }
-
-  if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Success: Kernel Launch: X: %d Y: %d Z: %d", blockX, blockY, blockZ);
-  }
-
-  status = _cuda_cu_ctx_synchronize();
-
-  if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] Failed to synchronize launched kernel (%d)", status);
-    return false;
-  }
-
-  if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Success: Synchronized launch kernel");
-  }
-
-
-  // Get the result. TODO: Move this code to get_return_oop()
-  BasicType return_type = ptxka.get_ret_type();
-  switch (return_type) {
-     case T_INT:
-       {
-         int return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_INT_BYTE_SIZE);
-         if (status != GRAAL_CUDA_SUCCESS) {
-           tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
-           return false;
-         }
-         ret.set_jint(return_val);
-       }
-       break;
-     case T_BOOLEAN:
-       {
-         int return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_INT_BYTE_SIZE);
-         if (status != GRAAL_CUDA_SUCCESS) {
-           tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
-           return false;
-         }
-         ret.set_jint(return_val);
-       }
-       break;
-     case T_FLOAT:
-       {
-         float return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_FLOAT_BYTE_SIZE);
-         if (status != GRAAL_CUDA_SUCCESS) {
-           tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
-           return false;
-         }
-         ret.set_jfloat(return_val);
-       }
-       break;
-     case T_DOUBLE:
-       {
-         double return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_DOUBLE_BYTE_SIZE);
-         if (status != GRAAL_CUDA_SUCCESS) {
-           tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
-           return false;
-         }
-         ret.set_jdouble(return_val);
-       }
-       break;
-     case T_LONG:
-       {
-         long return_val;
-         status = gpu::Ptx::_cuda_cu_memcpy_dtoh(&return_val, ptxka._dev_return_value, T_LONG_BYTE_SIZE);
-         if (status != GRAAL_CUDA_SUCCESS) {
-           tty->print_cr("[CUDA] *** Error (%d) Failed to copy value to device argument", status);
-           return false;
-         }
-         ret.set_jlong(return_val);
-       }
-       break;
-     case T_VOID:
-       break;
-     default:
-       tty->print_cr("[CUDA] TODO *** Unhandled return type: %d", return_type);
-  }
-
-  // Free device memory allocated for result
-  status = gpu::Ptx::_cuda_cu_memfree(ptxka._dev_return_value);
-  if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] *** Error (%d) Failed to free device memory of return value", status);
-    return false;
-  }
-
-  if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Success: Freed device memory of return value");
-  }
-
-  // Destroy context
-  status = gpu::Ptx::_cuda_cu_ctx_destroy(_device_context);
-  if (status != GRAAL_CUDA_SUCCESS) {
-    tty->print_cr("[CUDA] *** Error (%d) Failed to destroy context", status);
-    return false;
-  }
-
-  if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Success: Destroy context");
-  }
-
-  return (status == GRAAL_CUDA_SUCCESS);
-}
-
 #if defined(LINUX)
 static const char cuda_library_name[] = "libcuda.so";
 #elif defined(__APPLE__)
@@ -677,58 +592,56 @@
 static char const cuda_library_name[] = "";
 #endif

-#define STD_BUFFER_SIZE 1024
+bool gpu::Ptx::link() {
+  if (cuda_library_name == NULL) {
+    if (TraceGPUInteraction) {
+      tty->print_cr("Failed to find CUDA linkage");
+    }
+    return false;
+  }
+  char ebuf[O_BUFLEN];
+  void *handle = os::dll_load(cuda_library_name, ebuf, O_BUFLEN);
+  if (handle == NULL) {
+    if (TraceGPUInteraction) {
+      tty->print_cr("Unsupported CUDA platform: %s", ebuf);
+    }
+    return false;
+  }

-bool gpu::Ptx::probe_linkage() {
-  if (cuda_library_name != NULL) {
-    char *buffer = (char*)malloc(STD_BUFFER_SIZE);
-    void *handle = os::dll_load(cuda_library_name, buffer, STD_BUFFER_SIZE);
-        free(buffer);
-    if (handle != NULL) {
-      LOOKUP_CUDA_FUNCTION(cuInit, cuda_cu_init);
-      LOOKUP_CUDA_FUNCTION(cuCtxSynchronize, cuda_cu_ctx_synchronize);
-      LOOKUP_CUDA_FUNCTION(cuCtxGetCurrent, cuda_cu_ctx_get_current);
-      LOOKUP_CUDA_FUNCTION(cuCtxSetCurrent, cuda_cu_ctx_set_current);
-      LOOKUP_CUDA_FUNCTION(cuDeviceGetCount, cuda_cu_device_get_count);
-      LOOKUP_CUDA_FUNCTION(cuDeviceGetName, cuda_cu_device_get_name);
-      LOOKUP_CUDA_FUNCTION(cuDeviceGet, cuda_cu_device_get);
-      LOOKUP_CUDA_FUNCTION(cuDeviceComputeCapability, cuda_cu_device_compute_capability);
-      LOOKUP_CUDA_FUNCTION(cuDeviceGetAttribute, cuda_cu_device_get_attribute);
-      LOOKUP_CUDA_FUNCTION(cuModuleGetFunction, cuda_cu_module_get_function);
-      LOOKUP_CUDA_FUNCTION(cuModuleLoadDataEx, cuda_cu_module_load_data_ex);
-      LOOKUP_CUDA_FUNCTION(cuLaunchKernel, cuda_cu_launch_kernel);
-      LOOKUP_CUDA_FUNCTION(cuMemHostRegister, cuda_cu_mem_host_register);
-      LOOKUP_CUDA_FUNCTION(cuMemHostUnregister, cuda_cu_mem_host_unregister);
+  LOOKUP_CUDA_FUNCTION(cuInit, cuda_cu_init);
+  LOOKUP_CUDA_FUNCTION(cuCtxSynchronize, cuda_cu_ctx_synchronize);
+  LOOKUP_CUDA_FUNCTION(cuCtxGetCurrent, cuda_cu_ctx_get_current);
+  LOOKUP_CUDA_FUNCTION(cuCtxSetCurrent, cuda_cu_ctx_set_current);
+  LOOKUP_CUDA_FUNCTION(cuDeviceGetCount, cuda_cu_device_get_count);
+  LOOKUP_CUDA_FUNCTION(cuDeviceGetName, cuda_cu_device_get_name);
+  LOOKUP_CUDA_FUNCTION(cuDeviceGet, cuda_cu_device_get);
+  LOOKUP_CUDA_FUNCTION(cuDeviceComputeCapability, cuda_cu_device_compute_capability);
+  LOOKUP_CUDA_FUNCTION(cuDeviceGetAttribute, cuda_cu_device_get_attribute);
+  LOOKUP_CUDA_FUNCTION(cuModuleGetFunction, cuda_cu_module_get_function);
+  LOOKUP_CUDA_FUNCTION(cuModuleLoadDataEx, cuda_cu_module_load_data_ex);
+  LOOKUP_CUDA_FUNCTION(cuLaunchKernel, cuda_cu_launch_kernel);
+  LOOKUP_CUDA_FUNCTION(cuMemHostRegister, cuda_cu_mem_host_register);
+  LOOKUP_CUDA_FUNCTION(cuMemHostUnregister, cuda_cu_mem_host_unregister);
 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
-      LOOKUP_CUDA_V2_FUNCTION(cuCtxCreate, cuda_cu_ctx_create);
-      LOOKUP_CUDA_V2_FUNCTION(cuCtxDestroy, cuda_cu_ctx_destroy);
-      LOOKUP_CUDA_V2_FUNCTION(cuMemAlloc, cuda_cu_memalloc);
-      LOOKUP_CUDA_V2_FUNCTION(cuMemFree, cuda_cu_memfree);
-      LOOKUP_CUDA_V2_FUNCTION(cuMemcpyHtoD, cuda_cu_memcpy_htod);
-      LOOKUP_CUDA_V2_FUNCTION(cuMemcpyDtoH, cuda_cu_memcpy_dtoh);
-      LOOKUP_CUDA_V2_FUNCTION(cuMemHostGetDevicePointer, cuda_cu_mem_host_get_device_pointer);
+  LOOKUP_CUDA_V2_FUNCTION(cuCtxCreate, cuda_cu_ctx_create);
+  LOOKUP_CUDA_V2_FUNCTION(cuCtxDestroy, cuda_cu_ctx_destroy);
+  LOOKUP_CUDA_V2_FUNCTION(cuMemAlloc, cuda_cu_memalloc);
+  LOOKUP_CUDA_V2_FUNCTION(cuMemFree, cuda_cu_memfree);
+  LOOKUP_CUDA_V2_FUNCTION(cuMemcpyHtoD, cuda_cu_memcpy_htod);
+  LOOKUP_CUDA_V2_FUNCTION(cuMemcpyDtoH, cuda_cu_memcpy_dtoh);
+  LOOKUP_CUDA_V2_FUNCTION(cuMemHostGetDevicePointer, cuda_cu_mem_host_get_device_pointer);
 #else
-      LOOKUP_CUDA_FUNCTION(cuCtxCreate, cuda_cu_ctx_create);
-      LOOKUP_CUDA_FUNCTION(cuCtxDestroy, cuda_cu_ctx_destroy);
-      LOOKUP_CUDA_FUNCTION(cuMemAlloc, cuda_cu_memalloc);
-      LOOKUP_CUDA_FUNCTION(cuMemFree, cuda_cu_memfree);
-      LOOKUP_CUDA_FUNCTION(cuMemcpyHtoD, cuda_cu_memcpy_htod);
-      LOOKUP_CUDA_FUNCTION(cuMemcpyDtoH, cuda_cu_memcpy_dtoh);
-      LOOKUP_CUDA_FUNCTION(cuMemHostGetDevicePointer, cuda_cu_mem_host_get_device_pointer);
+  LOOKUP_CUDA_FUNCTION(cuCtxCreate, cuda_cu_ctx_create);
+  LOOKUP_CUDA_FUNCTION(cuCtxDestroy, cuda_cu_ctx_destroy);
+  LOOKUP_CUDA_FUNCTION(cuMemAlloc, cuda_cu_memalloc);
+  LOOKUP_CUDA_FUNCTION(cuMemFree, cuda_cu_memfree);
+  LOOKUP_CUDA_FUNCTION(cuMemcpyHtoD, cuda_cu_memcpy_htod);
+  LOOKUP_CUDA_FUNCTION(cuMemcpyDtoH, cuda_cu_memcpy_dtoh);
+  LOOKUP_CUDA_FUNCTION(cuMemHostGetDevicePointer, cuda_cu_mem_host_get_device_pointer);
 #endif

-      if (TraceGPUInteraction) {
-        tty->print_cr("[CUDA] Success: library linkage");
-      }
-      return true;
-    } else {
-      // Unable to dlopen libcuda
-      return false;
-    }
-  } else {
-    tty->print_cr("Unsupported CUDA platform");
-    return false;
+  if (TraceGPUInteraction) {
+    tty->print_cr("[CUDA] Success: library linkage");
   }
-  tty->print_cr("Failed to find CUDA linkage");
-  return false;
+  return true;
 }
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Jan 30 00:52:33 2014 +0100
@@ -88,18 +88,33 @@
 #define GRAAL_CU_CTX_SCHED_BLOCKING_SYNC 0x04

 class Ptx {
-  friend class gpu;
   friend class PtxCall;

- protected:
-  static bool probe_linkage();
-  static bool initialize_gpu();
-  static unsigned int total_cores();
-  static void* get_context();
-  static void* generate_kernel(unsigned char *code, int code_len, const char *name);
-  static bool execute_warp(int dimX, int dimY, int dimZ, address kernel, PTXKernelArguments & ka, JavaValue &ret);
-  static bool execute_kernel(address kernel, PTXKernelArguments & ka, JavaValue &ret);
+private:
+
+  static JNINativeMethod PTX_methods[];
+
+  // static native boolean initialize();
+  static jboolean initialize(JNIEnv* env, jclass);
+
+  // static native long generateKernel(byte[] targetCode, String name);
+  static jlong generate_kernel(JNIEnv *env, jclass, jbyteArray code_handle, jstring name_handle);
+
+  // static native long getLaunchKernelAddress();
+  static jlong get_execute_kernel_from_vm_address(JNIEnv *env, jclass);
+
+  // static native int getAvailableProcessors0();
+  static jint get_total_cores(JNIEnv *env, jobject);
+
+  // Links the CUDA driver library functions
+  static bool link();
+
+  static int ncores(int major, int minor);
+
 public:
+  // Registers the implementations for the native methods in PTXHotSpotBackend
+  static bool register_natives(JNIEnv* env);
+
 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
   typedef unsigned long long CUdeviceptr;
 #else
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os/bsd/vm/gpu_bsd.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "runtime/gpu.hpp"
+#include "utilities/ostream.hpp"
+
+jobject gpu::probe_gpus(JNIEnv* env) {
+#ifdef __APPLE__
+  /*
+   * Let the CUDA driver initialization be the gate to GPU for now, pending
+   * a better detection solution for NVIDA PTX and AMD HSAIL.
+   */
+  if (gpu::Ptx::register_natives(env)) {
+    if (TraceGPUInteraction) {
+      tty->print_cr("Assuming NVidia/PTX support (APPLE)");
+    }
+    return env->NewStringUTF("PTX");
+  }
+#else
+  if (TraceGPUInteraction) {
+    tty->print_cr("Assuming no GPU (not APPLE)");
+  }
+#endif
+  return env->NewStringUTF("");
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os/linux/vm/gpu_linux.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "runtime/gpu.hpp"
+#include "utilities/ostream.hpp"
+
+/*
+ * Probe for CUDA device on PCI bus using /proc/bus/pci/devices. Do
+ * not rely on CUDA tool kit being installed. We will check if CUDA
+ * library is installed later.
+ */
+
+static unsigned int nvidia_vendor_id = 0x10de;
+static unsigned int amd_vendor_id = 0x1002;
+
+jobject gpu::probe_gpus(JNIEnv* env) {
+  bool hsail = false;
+  bool ptx = false;
+
+  if (UseHSAILSimulator && gpu::Hsail::register_natives(env)) {
+    hsail = true;
+  }
+
+  /*
+   * Open /proc/bus/pci/devices to look for the first GPU device. For
+   * now, we will just find the first GPU device. Will need to revisit
+   * this to support execution on multiple GPU devices, if they exist.
+   */
+  FILE *pci_devices = fopen("/proc/bus/pci/devices", "r");
+  char contents[4096];
+  unsigned int bus_num_devfn_ign;
+  unsigned int vendor;
+  unsigned int device;
+  if (pci_devices == NULL) {
+    tty->print_cr("*** Failed to open /proc/bus/pci/devices");
+    return NULL;
+  }
+
+  while (fgets(contents, sizeof(contents)-1, pci_devices)) {
+    sscanf(contents, "%04x%04x%04x", &bus_num_devfn_ign, &vendor, &device);
+    /* Break after finding the first GPU device. */
+    if (vendor == nvidia_vendor_id) {
+      if (TraceGPUInteraction) {
+        tty->print_cr("Found supported nVidia GPU: vendor=0x%04x, device=0x%04x", vendor, device);
+      }
+      if (!ptx && gpu::Ptx::register_natives(env)) {
+        ptx = true;
+      }
+    }
+  }
+
+  // Close file pointer.
+  fclose(pci_devices);
+
+  const char* gpus = "";
+  if (ptx && hsail) {
+    gpus = "PTX,HSAIL";
+  } else if (ptx) {
+    gpus = "PTX";
+  } else if (hsail) {
+    gpus = "HSAIL";
+  }
+  return env->NewStringUTF(gpus);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os/windows/vm/gpu_windows.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/gpu.hpp"
+#include "utilities/ostream.hpp"
+
+jobject gpu::probe_gpus(JNIEnv* env) {
+  // TODO: add detection of PTX/NVidia
+  if (UseHSAILSimulator && gpu::Hsail::register_natives(env)) {
+    return env->NewStringUTF("HSAIL");
+  }
+  return env->NewStringUTF("");
+}
--- a/src/os_gpu/bsd_ptx/vm/gpu_bsd.hpp	Thu Jan 30 00:48:41 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef OS_BSD_VM_GPU_BSD_HPP
-#define OS_BSD_VM_GPU_BSD_HPP
-
-
-class Bsd {
-  friend class gpu;
-
- protected:
-  static bool probe_gpu();
-#ifdef __APPLE__
-  static bool probe_gpu_apple();
-#endif
-};
-
-#endif // OS_BSD_VM_GPU_BSD_HPP
--- a/src/os_gpu/linux_ptx/vm/gpu_linux.hpp	Thu Jan 30 00:48:41 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef OS_BSD_VM_GPU_LINUX_HPP
-#define OS_BSD_VM_GPU_LINUX_HPP
-
-
-class Linux {
-  friend class gpu;
-
- protected:
-  static bool probe_gpu();
-};
-
-#endif // OS_BSD_VM_GPU_LINUX_HPP
--- a/src/os_gpu/windows_hsail/vm/gpu_windows.hpp	Thu Jan 30 00:48:41 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef OS_WINDOWS_VM_GPU_WINDOWS_HPP
-#define OS_WINDOWS_VM_GPU_WINDOWS_HPP
-
-
-class Windows {
-  friend class gpu;
-
- protected:
-  static bool probe_gpu();
-};
-
-#endif // OS_WINDOWS_VM_GPU_WINDOWS_HPP
--- a/src/share/vm/graal/graalCompiler.cpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/share/vm/graal/graalCompiler.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -28,7 +28,6 @@
 #include "graal/graalJavaAccess.hpp"
 #include "graal/graalVMToCompiler.hpp"
 #include "graal/graalCompilerToVM.hpp"
-#include "graal/graalCompilerToGPU.hpp"
 #include "graal/graalEnv.hpp"
 #include "graal/graalRuntime.hpp"
 #include "runtime/arguments.hpp"
@@ -72,13 +71,6 @@
   }
   env->RegisterNatives(klass, CompilerToVM_methods, CompilerToVM_methods_count());

-  klass = env->FindClass("com/oracle/graal/hotspot/bridge/CompilerToGPUImpl");
-  if (klass == NULL) {
-    tty->print_cr("graal CompilerToGPUImpl class not found");
-    vm_abort(false);
-  }
-  env->RegisterNatives(klass, CompilerToGPU_methods, CompilerToGPU_methods_count());
-
   ResourceMark rm;
   HandleMark hm;
   {
--- a/src/share/vm/graal/graalCompilerToGPU.cpp	Thu Jan 30 00:48:41 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,269 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include "precompiled.hpp"
-
-#include "memory/oopFactory.hpp"
-#include "graal/graalCompiler.hpp"
-#include "graal/graalEnv.hpp"
-#include "graal/graalJavaAccess.hpp"
-#include "runtime/gpu.hpp"
-#include "runtime/javaCalls.hpp"
-# include "ptx/vm/ptxKernelArguments.hpp"
-
-// Entry to native method implementation that transitions current thread to '_thread_in_vm'.
-#define C2V_VMENTRY(result_type, name, signature) \
-  JNIEXPORT result_type JNICALL c2v_ ## name signature { \
-  TRACE_graal_3("CompilerToGPU::" #name); \
-  GRAAL_VM_ENTRY_MARK; \
-
-// Entry to native method implementation that calls a JNI function
-// and hence cannot transition current thread to '_thread_in_vm'.
-#define C2V_ENTRY(result_type, name, signature) \
-  JNIEXPORT result_type JNICALL c2v_ ## name signature { \
-  TRACE_graal_3("CompilerToGPU::" #name); \
-
-#define C2V_END }
-
-
-C2V_ENTRY(jlong, generateKernel, (JNIEnv *env, jobject, jbyteArray code, jstring name))
-  if (gpu::is_available() == false || gpu::has_gpu_linkage() == false && gpu::is_initialized()) {
-    if (TraceGPUInteraction) {
-      tty->print_cr("generateKernel - not available / no linkage / not initialized");
-    }
-    return 0;
-  }
-  jboolean is_copy;
-  jbyte *bytes = env->GetByteArrayElements(code, &is_copy);
-  jint len = env->GetArrayLength(code);
-  const char *namestr = env->GetStringUTFChars(name, &is_copy);
-  void *kernel = gpu::generate_kernel((unsigned char *)bytes, len, namestr);
-  if (kernel == NULL) {
-    tty->print_cr("[CUDA] *** Error: Failed to compile kernel");
-  } else if (TraceGPUInteraction) {
-    tty->print_cr("[CUDA] Generated kernel");
-  }
-  env->ReleaseByteArrayElements(code, bytes, 0);
-  env->ReleaseStringUTFChars(name, namestr);
-
-  return (jlong)kernel;
-C2V_END
-
-C2V_VMENTRY(jobject, executeExternalMethodVarargs, (JNIEnv *env, jobject, jobject args, jobject hotspotInstalledCode))
-  ResourceMark rm;
-  HandleMark hm;
-
-  if (gpu::is_available() == false || gpu::has_gpu_linkage() == false && gpu::is_initialized()) {
-    tty->print_cr("executeExternalMethodVarargs - not available / no linkage / not initialized");
-    return NULL;
-  }
-  jlong nmethodValue = HotSpotInstalledCode::codeBlob(hotspotInstalledCode);
-  nmethod* nm = (nmethod*) (address) nmethodValue;
-  methodHandle mh = nm->method();
-  Symbol* signature = mh->signature();
-
-  // start value is the kernel
-  jlong startValue = HotSpotInstalledCode::codeStart(hotspotInstalledCode);
-
-  PTXKernelArguments ptxka(signature, (arrayOop) JNIHandles::resolve(args), mh->is_static());
-  JavaValue result(ptxka.get_ret_type());
-  if (!gpu::execute_kernel((address)startValue, ptxka, result)) {
-    return NULL;
-  }
-
-  if (ptxka.get_ret_type() == T_VOID) {
-    return NULL;
-  } else if (ptxka.get_ret_type() == T_OBJECT || ptxka.get_ret_type() == T_ARRAY) {
-    return JNIHandles::make_local((oop) result.get_jobject());
-  } else {
-    oop o = java_lang_boxing_object::create(ptxka.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL);
-    if (TraceGPUInteraction) {
-      switch (ptxka.get_ret_type()) {
-        case T_INT:
-          tty->print_cr("GPU execution returned (int) %d", result.get_jint());
-          break;
-        case T_LONG:
-          tty->print_cr("GPU execution returned (long) %ld", result.get_jlong());
-          break;
-        case T_FLOAT:
-          tty->print_cr("GPU execution returned (float) %f", result.get_jfloat());
-          break;
-        case T_DOUBLE:
-          tty->print_cr("GPU execution returned (double) %f", result.get_jdouble());
-          break;
-        default:
-          tty->print_cr("**** Value returned by GPU not yet handled");
-          break;
-        }
-    }
-    return JNIHandles::make_local(o);
-  }
-C2V_END
-
-C2V_VMENTRY(jobject, executeParallelMethodVarargs, (JNIEnv *env,
-                                                          jobject,
-                                                          jint dimX, jint dimY, jint dimZ,
-                                                          jobject args, jobject hotspotInstalledCode))
-  ResourceMark rm;
-  HandleMark hm;
-
-  if (gpu::is_available() == false || gpu::has_gpu_linkage() == false && gpu::is_initialized()) {
-    tty->print_cr("executeParallelMethodVarargs - not available / no linkage / not initialized");
-    return NULL;
-  }
-  jlong nmethodValue = HotSpotInstalledCode::codeBlob(hotspotInstalledCode);
-  nmethod* nm = (nmethod*) (address) nmethodValue;
-  methodHandle mh = nm->method();
-  Symbol* signature = mh->signature();
-
-  // start value is the kernel
-  jlong startValue = HotSpotInstalledCode::codeStart(hotspotInstalledCode);
-
-  if (UseHSAILSimulator) {
-    gpu::execute_kernel_void_1d((address)startValue, dimX, args, mh);
-    return NULL;
-  }
-
-  PTXKernelArguments ptxka(signature, (arrayOop) JNIHandles::resolve(args), mh->is_static());
-  JavaValue result(ptxka.get_ret_type());
-  if (!gpu::execute_warp(dimX, dimY, dimZ, (address) startValue, ptxka, result)) {
-    return NULL;
-  }
-
-  if (ptxka.get_ret_type() == T_VOID) {
-    return NULL;
-  } else if (ptxka.get_ret_type() == T_OBJECT || ptxka.get_ret_type() == T_ARRAY) {
-    return JNIHandles::make_local((oop) result.get_jobject());
-  } else {
-    oop o = java_lang_boxing_object::create(ptxka.get_ret_type(), (jvalue *) result.get_value_addr(), CHECK_NULL);
-    if (TraceGPUInteraction) {
-      switch (ptxka.get_ret_type()) {
-        case T_INT:
-          tty->print_cr("GPU execution returned %d", result.get_jint());
-          break;
-        case T_FLOAT:
-          tty->print_cr("GPU execution returned %f", result.get_jfloat());
-          break;
-        case T_DOUBLE:
-          tty->print_cr("GPU execution returned %g", result.get_jdouble());
-          break;
-        default:
-          tty->print_cr("GPU returned unhandled");
-          break;
-      }
-    }
-    return JNIHandles::make_local(o);
-  }
-C2V_END
-
-JRT_ENTRY(jlong, invalidLaunchKernel(JavaThread* thread))
-  SharedRuntime::throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_LinkageError(), "invalid kernel launch function");
-  return 0L;
-JRT_END
-
-C2V_VMENTRY(jlong, getLaunchKernelAddress, (JNIEnv *env, jobject))
-  if (gpu::get_target_il_type() == gpu::PTX) {
-    return (jlong) gpu::Ptx::execute_kernel_from_vm;
-  }
-  return (jlong) invalidLaunchKernel;
-C2V_END
-
-C2V_VMENTRY(jboolean, deviceInit, (JNIEnv *env, jobject))
-  if (gpu::is_available() == false || gpu::has_gpu_linkage() == false) {
-    if (TraceGPUInteraction) {
-      tty->print_cr("deviceInit - not available / no linkage");
-    }
-    return false;
-  }
-  if (gpu::is_initialized()) {
-    tty->print_cr("deviceInit - already initialized");
-    return true;
-  }
-  gpu::initialize_gpu();
-  return gpu::is_initialized();
-C2V_END
-
-C2V_VMENTRY(jint, availableProcessors, (JNIEnv *env, jobject))
-  if (gpu::is_available() == false || gpu::has_gpu_linkage() == false) {
-    if (TraceGPUInteraction) {
-      tty->print_cr("deviceInit - not available / no linkage");
-    }
-    return false;
-  }
-  return gpu::available_processors();
-C2V_END
-
-C2V_VMENTRY(jboolean, deviceDetach, (JNIEnv *env, jobject))
-return true;
-C2V_END
-
-
-#define CC (char*)  /*cast a literal from (const char*)*/
-#define FN_PTR(f) CAST_FROM_FN_PTR(void*, &(c2v_ ## f))
-
-#define RESOLVED_TYPE         "Lcom/oracle/graal/api/meta/ResolvedJavaType;"
-#define TYPE                  "Lcom/oracle/graal/api/meta/JavaType;"
-#define METHOD                "Lcom/oracle/graal/api/meta/JavaMethod;"
-#define FIELD                 "Lcom/oracle/graal/api/meta/JavaField;"
-#define SIGNATURE             "Lcom/oracle/graal/api/meta/Signature;"
-#define CONSTANT_POOL         "Lcom/oracle/graal/api/meta/ConstantPool;"
-#define CONSTANT              "Lcom/oracle/graal/api/meta/Constant;"
-#define KIND                  "Lcom/oracle/graal/api/meta/Kind;"
-#define LOCAL                 "Lcom/oracle/graal/api/meta/Local;"
-#define RUNTIME_CALL          "Lcom/oracle/graal/api/code/RuntimeCall;"
-#define EXCEPTION_HANDLERS    "[Lcom/oracle/graal/api/meta/ExceptionHandler;"
-#define REFLECT_METHOD        "Ljava/lang/reflect/Method;"
-#define REFLECT_CONSTRUCTOR   "Ljava/lang/reflect/Constructor;"
-#define REFLECT_FIELD         "Ljava/lang/reflect/Field;"
-#define STRING                "Ljava/lang/String;"
-#define OBJECT                "Ljava/lang/Object;"
-#define CLASS                 "Ljava/lang/Class;"
-#define STACK_TRACE_ELEMENT   "Ljava/lang/StackTraceElement;"
-#define HS_RESOLVED_TYPE      "Lcom/oracle/graal/hotspot/meta/HotSpotResolvedObjectType;"
-#define HS_RESOLVED_JAVA_TYPE "Lcom/oracle/graal/hotspot/meta/HotSpotResolvedJavaType;"
-#define HS_RESOLVED_METHOD    "Lcom/oracle/graal/hotspot/meta/HotSpotResolvedJavaMethod;"
-#define HS_RESOLVED_FIELD     "Lcom/oracle/graal/hotspot/meta/HotSpotResolvedJavaField;"
-#define HS_COMPILED_CODE      "Lcom/oracle/graal/hotspot/HotSpotCompiledCode;"
-#define HS_CONFIG             "Lcom/oracle/graal/hotspot/HotSpotVMConfig;"
-#define HS_METHOD             "Lcom/oracle/graal/hotspot/meta/HotSpotMethod;"
-#define HS_INSTALLED_CODE     "Lcom/oracle/graal/hotspot/meta/HotSpotInstalledCode;"
-#define METHOD_DATA           "Lcom/oracle/graal/hotspot/meta/HotSpotMethodData;"
-#define METASPACE_METHOD      "J"
-#define METASPACE_METHOD_DATA "J"
-#define NMETHOD               "J"
-#define GPUSPACE_METHOD       "J"
-
-JNINativeMethod CompilerToGPU_methods[] = {
-  {CC"generateKernel",                CC"([B" STRING ")"GPUSPACE_METHOD,          FN_PTR(generateKernel)},
-  {CC"deviceInit",                    CC"()Z",                                    FN_PTR(deviceInit)},
-  {CC"deviceDetach",                  CC"()Z",                                    FN_PTR(deviceDetach)},
-  {CC"availableProcessors",           CC"()I",                                    FN_PTR(availableProcessors)},
-  {CC"executeExternalMethodVarargs",  CC"(["OBJECT HS_INSTALLED_CODE")"OBJECT,    FN_PTR(executeExternalMethodVarargs)},
-  {CC"executeParallelMethodVarargs",  CC"(III["OBJECT HS_INSTALLED_CODE")"OBJECT, FN_PTR(executeParallelMethodVarargs)},
-  {CC"getLaunchKernelAddress",        CC"()J",                                    FN_PTR(getLaunchKernelAddress)},
-};
-
-int CompilerToGPU_methods_count() {
-  return sizeof(CompilerToGPU_methods) / sizeof(JNINativeMethod);
-}
-
--- a/src/share/vm/graal/graalCompilerToGPU.hpp	Thu Jan 30 00:48:41 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#ifndef SHARE_VM_GRAAL_GRAAL_COMPILER_TO_GPU_HPP
-#define SHARE_VM_GRAAL_GRAAL_COMPILER_TO_GPU_HPP
-
-#include "prims/jni.h"
-
-extern JNINativeMethod CompilerToGPU_methods[];
-int CompilerToGPU_methods_count();
-
-
-#endif // SHARE_VM_GRAAL_GRAAL_COMPILER_TO_GPU_HPP
--- a/src/share/vm/graal/graalCompilerToVM.cpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/share/vm/graal/graalCompilerToVM.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -40,6 +40,7 @@
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/vmStructs.hpp"
+#include "runtime/gpu.hpp"


 Method* getMethodFromHotSpotMethod(oop hotspot_method) {
@@ -821,6 +822,14 @@
   return (jlongArray) JNIHandles::make_local(arrayOop);
 C2V_END

+C2V_ENTRY(jobject, getGPUs, (JNIEnv *env, jobject))
+#if defined(TARGET_OS_FAMILY_bsd) || defined(TARGET_OS_FAMILY_linux) || defined(TARGET_OS_FAMILY_windows)
+  return gpu::probe_gpus(env);
+#else
+  return env->NewStringUTF("");
+#endif
+C2V_END
+
 C2V_VMENTRY(int, allocateCompileId, (JNIEnv *env, jobject, jobject hotspot_method, int entry_bci))
   HandleMark hm;
   ResourceMark rm;
@@ -897,6 +906,7 @@
   {CC"readUnsafeUncompressedPointer", CC"("OBJECT"J)"OBJECT,                                            FN_PTR(readUnsafeUncompressedPointer)},
   {CC"readUnsafeKlassPointer",        CC"("OBJECT")J",                                                  FN_PTR(readUnsafeKlassPointer)},
   {CC"collectCounters",               CC"()[J",                                                         FN_PTR(collectCounters)},
+  {CC"getGPUs",                       CC"()"STRING,                                                     FN_PTR(getGPUs)},
   {CC"allocateCompileId",             CC"("HS_RESOLVED_METHOD"I)I",                                     FN_PTR(allocateCompileId)},
   {CC"isMature",                      CC"("METASPACE_METHOD_DATA")Z",                                   FN_PTR(isMature)},
 };
--- a/src/share/vm/runtime/arguments.cpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/share/vm/runtime/arguments.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -131,9 +131,6 @@
 SystemProperty *Arguments::_java_home = NULL;
 SystemProperty *Arguments::_java_class_path = NULL;
 SystemProperty *Arguments::_sun_boot_class_path = NULL;
-#ifdef GRAAL
-SystemProperty *Arguments::_graal_gpu_isalist = NULL;
-#endif

 char* Arguments::_meta_index_path = NULL;
 char* Arguments::_meta_index_dir = NULL;
@@ -197,9 +194,6 @@
   _sun_boot_class_path = new SystemProperty("sun.boot.class.path", NULL,  true);

   _java_class_path = new SystemProperty("java.class.path", "",  true);
-#ifdef GRAAL
-  _graal_gpu_isalist = new SystemProperty("graal.gpu.isalist", NULL, true);
-#endif

   // Add to System Property list.
   PropertyList_add(&_system_properties, _java_ext_dirs);
@@ -209,9 +203,6 @@
   PropertyList_add(&_system_properties, _java_home);
   PropertyList_add(&_system_properties, _java_class_path);
   PropertyList_add(&_system_properties, _sun_boot_class_path);
-#ifdef GRAAL
-  PropertyList_add(&_system_properties, _graal_gpu_isalist);
-#endif

   // Set OS specific system properties values
   os::init_system_properties_values();
@@ -3844,24 +3835,6 @@
     }
   }

-#ifdef GRAAL
-  if (_graal_gpu_isalist->value() == NULL) {
-    // Initialize the graal.gpu.isalist system property if
-    // a) it was not explicitly defined by the user and
-    // b) at least one GPU is available.
-    // GPU offload can be disabled by setting the property
-    // to the empty string on the command line
-    if (gpu::is_available() && gpu::has_gpu_linkage()) {
-      if (gpu::get_target_il_type() == gpu::PTX) {
-        _graal_gpu_isalist->append_value("PTX");
-      }
-      if (gpu::get_target_il_type() == gpu::HSAIL) {
-        _graal_gpu_isalist->append_value("HSAIL");
-      }
-    }
-  }
-#endif
-
   return JNI_OK;
 }
--- a/src/share/vm/runtime/compilationPolicy.cpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/share/vm/runtime/compilationPolicy.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -163,14 +163,12 @@

 bool CompilationPolicy::can_be_offloaded_to_gpu(methodHandle m) {
 #ifdef GRAAL
-  if (GPUOffload) {
-    // Check if this method can be offloaded to GPU.
-    // 1. Offload it to GPU if it is a Lambda method
+  if (GPUOffload && gpu::initialized_gpus() > 0) {
+    // Check if this method can be off-loaded to GPU.
     if (m->is_synthetic()) {
-      // A lambda method is a syntheric method.
+      // A lambda method is a synthetic method.
       Symbol * klass_name = m->method_holder()->name();
       Symbol * method_name = m->name();
-      bool offloadToGPU = false;
       {
         ResourceMark rm;
         if (klass_name != NULL) {
@@ -179,37 +177,19 @@
             char* methodPrefix = strstr(method_name->as_C_string(), lambdaPrefix);
             if (methodPrefix != 0) {
               if ((strncmp(lambdaPrefix, methodPrefix, strlen(lambdaPrefix)) == 0)) {
-                offloadToGPU = true;
+                if (TraceGPUInteraction) {
+                  char buf[O_BUFLEN];
+                  tty->print_cr("Selected lambda method %s for GPU offload", m->name_and_sig_as_C_string(buf, O_BUFLEN));
+                }
+                return true;
               }
             }
           }
         }
       }
-      if (offloadToGPU) {
-        // If GPU is available and the necessary linkage is available
-        // return true indicatin that this method must be compiled.
-        if (gpu::is_available() && gpu::has_gpu_linkage()) {
-          if (TraceGPUInteraction) {
-            tty->print("Compiling Lambda method ");
-            m->print_short_name();
-            switch (gpu::get_target_il_type()) {
-            case gpu::PTX :
-              tty->print_cr("to PTX");
-              break;
-            case gpu::HSAIL :
-              tty->print_cr("to HSAIL");
-              break;
-            default :
-              tty->print_cr("to Unknown GPU!!!");
-            }
-          }
-          return true;
-        }
-      }
     }
   }
 #endif
-
   return false;
 }
--- a/src/share/vm/runtime/gpu.cpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/share/vm/runtime/gpu.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -26,85 +26,11 @@
 #include "runtime/gpu.hpp"
 #include "runtime/handles.hpp"

-bool gpu::_available = false;    // does the hardware exist?
-bool gpu::_gpu_linkage = false;  // is the driver library to access the GPU installed
-bool gpu::_initialized = false;  // is the GPU device initialized
-gpu::TargetGPUIL gpu::_targetIL = gpu::NONE; // No GPU detected yet.
-
-void gpu::init() {
-#if defined(TARGET_OS_FAMILY_bsd) || defined(TARGET_OS_FAMILY_linux) || defined(TARGET_OS_FAMILY_windows)
-  gpu::probe_gpu();
-  if (gpu::get_target_il_type() == gpu::PTX) {
-    set_gpu_linkage(gpu::Ptx::probe_linkage());
-  } else if (gpu::get_target_il_type() == gpu::HSAIL) {
-    set_gpu_linkage(gpu::Hsail::probe_linkage());
-  } else {
-    set_gpu_linkage(false);
-  }
-#endif
-}
-
-void gpu::initialize_gpu() {
-  if (gpu::has_gpu_linkage()) {
-    if (gpu::get_target_il_type() == gpu::PTX) {
-      set_initialized(gpu::Ptx::initialize_gpu());
-    } else if (gpu::get_target_il_type() == gpu::HSAIL) {
-      set_initialized(gpu::Hsail::initialize_gpu());
-    }
-  }
-}
-
-void * gpu::generate_kernel(unsigned char *code, int code_len, const char *name) {
-  if (gpu::has_gpu_linkage()) {
-    if (gpu::get_target_il_type() == gpu::PTX) {
-      return (gpu::Ptx::generate_kernel(code, code_len, name));
-    } else if (gpu::get_target_il_type() == gpu::HSAIL) {
-      return (gpu::Hsail::generate_kernel(code, code_len, name));
-    }
-  }
-  return NULL;
-}
+int gpu::_initialized_gpus = 0;

-bool gpu::execute_kernel(address kernel, PTXKernelArguments & ptxka, JavaValue& ret) {
-    if (gpu::has_gpu_linkage()) {
-        if (gpu::get_target_il_type() == gpu::PTX) {
-            return (gpu::Ptx::execute_kernel(kernel, ptxka, ret));
-        }
-        // Add kernel execution functionality of other GPUs here
+void gpu::initialized_gpu(const char* name) {
+    _initialized_gpus++;
+    if (TraceGPUInteraction) {
+      tty->print_cr("[GPU] registered initialization of %s (total initialized: %d)", name, _initialized_gpus);
     }
-    return false;
-}
-
-// This is HSAIL specific to work with Sumatra JDK
-bool gpu::execute_kernel_void_1d(address kernel, int dimX, jobject args, methodHandle& mh) {
-    if (gpu::has_gpu_linkage()) {
-        if (gpu::get_target_il_type() == gpu::HSAIL) {
-            return (gpu::Hsail::execute_kernel_void_1d(kernel, dimX, args, mh));
-        }
-    }
-    return false;
-
 }
-
-
-bool gpu::execute_warp(int dimX, int dimY, int dimZ,
-                       address kernel, PTXKernelArguments & ptxka, JavaValue& ret) {
-    if (gpu::has_gpu_linkage()) {
-        if (gpu::get_target_il_type() == gpu::PTX) {
-            return (gpu::Ptx::execute_warp(dimX, dimY, dimZ, kernel, ptxka, ret));
-        }
-        // Add kernel execution functionality of other GPUs here
-    }
-    return false;
-}
-
-int gpu::available_processors() {
-    if (gpu::has_gpu_linkage()) {
-        if (gpu::get_target_il_type() == gpu::PTX) {
-            return (gpu::Ptx::total_cores());
-        }
-        // Add kernel execution functionality of other GPUs here
-    }
-    return 0;
-}
-
--- a/src/share/vm/runtime/gpu.hpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/share/vm/runtime/gpu.hpp	Thu Jan 30 00:52:33 2014 +0100
@@ -27,85 +27,28 @@

 #include "runtime/atomic.hpp"
 #include "oops/symbol.hpp"
+#include "utilities/array.hpp"

 class PTXKernelArguments;

-// gpu defines the interface to the graphics processor; this includes traditional
-// GPU services such as graphics kernel load and execute.
-
-
-class gpu: AllStatic {
-public:
-
-  enum TargetGPUIL { NONE = 0, PTX = 1, HSAIL = 2};
-  static void init(void);
-
-  static void probe_gpu();
-
-  static void initialize_gpu();
-
-  static int available_processors();
-
-  static void * generate_kernel(unsigned char *code, int code_len, const char *name);
+// Defines the interface to the graphics processor(s).
+class gpu : AllStatic {
+ private:
+  static int _initialized_gpus;

-  static bool execute_warp(int dimX, int dimY, int dimZ,
-                           address kernel, PTXKernelArguments & ptxka, JavaValue & ret);
-
-  static bool execute_kernel(address kernel, PTXKernelArguments & ptxka, JavaValue & ret);
-
-  // No return value from HSAIL kernels
-  static bool execute_kernel_void_1d(address kernel, int dimX, jobject args, methodHandle& mh);
-
-  static void set_available(bool value) {
-    _available = value;
-  }
-
-  static bool is_available() { return _available; }
-
-  static void set_initialized(bool value) {
-    _initialized = value;
-  }
+  // Notifies that a GPU device has been initialized.
+  static void initialized_gpu(const char* name);

-  static bool is_initialized() { return _initialized; }
-
-  static void set_gpu_linkage(bool value) {
-    _gpu_linkage = value;
-  }
-
-  static bool has_gpu_linkage() { return _gpu_linkage; }
-
-  static void set_target_il_type(TargetGPUIL value) {
-    _targetIL = value;
-  }
-
-  static enum gpu::TargetGPUIL get_target_il_type() {
-    return _targetIL;
-  }
+ public:

-protected:
-  static bool _available;
-  static bool _gpu_linkage;
-  static bool _initialized;
-  static TargetGPUIL _targetIL;
+  // Gets a comma separated list of supported GPU architecture names.
+  static jobject probe_gpus(JNIEnv* env);
+
+  static int initialized_gpus() { return _initialized_gpus; }

-  // Platform dependent stuff
-#ifdef TARGET_OS_FAMILY_linux
-# include "gpu_linux.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_solaris
-#endif
-#ifdef TARGET_OS_FAMILY_windows
-# include "gpu_windows.hpp"
-#endif
-#ifdef TARGET_OS_FAMILY_bsd
-# include "gpu_bsd.hpp"
-#endif
-
-public:
 # include "ptx/vm/gpu_ptx.hpp"
 # include "hsail/vm/gpu_hsail.hpp"

 };

-
 #endif // SHARE_VM_RUNTIME_GPU_HPP
--- a/src/share/vm/runtime/thread.cpp	Thu Jan 30 00:48:41 2014 +0100
+++ b/src/share/vm/runtime/thread.cpp	Thu Jan 30 00:52:33 2014 +0100
@@ -3392,11 +3392,6 @@
   jint parse_result = Arguments::parse(args);
   if (parse_result != JNI_OK) return parse_result;

-#ifdef GRAAL
-  // Probe for supported GPUs and initialize them.
-  gpu::init();
-#endif
-
   os::init_before_ergo();

   jint ergo_result = Arguments::apply_ergo();