changeset 15177:66e3af78ea96

HSAIL: added safepoint support Contributed-by: Eric Caspole <eric.caspole@amd.com>
author Doug Simon <doug.simon@oracle.com>
date Thu, 17 Apr 2014 00:44:32 +0200
parents 78f1a1a70628
children 0c53453c4d5e
files graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILNodeLIRBuilder.java graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerationResult.java graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotNodeLIRBuilder.java graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotSafepointOp.java graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java graal/com.oracle.graal.hsail/src/com/oracle/graal/hsail/HSAIL.java graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILControlFlow.java make/linux/makefiles/buildtree.make make/linux/makefiles/vm.make src/gpu/hsail/vm/gpu_hsail.cpp src/gpu/hsail/vm/gpu_hsail.hpp src/gpu/hsail/vm/vmStructs_hsail.hpp src/gpu/ptx/vm/gpu_ptx.cpp src/gpu/ptx/vm/gpu_ptx.hpp src/os/bsd/vm/gpu_bsd.cpp src/os/linux/vm/gpu_linux.cpp src/os/windows/vm/gpu_windows.cpp src/share/vm/graal/graalCompilerToVM.cpp src/share/vm/graal/graalGlobals.hpp src/share/vm/runtime/gpu.cpp src/share/vm/runtime/gpu.hpp src/share/vm/runtime/safepoint.cpp
diffstat 23 files changed, 325 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILNodeLIRBuilder.java	Wed Apr 16 22:54:48 2014 +0200
+++ b/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILNodeLIRBuilder.java	Thu Apr 17 00:44:32 2014 +0200
@@ -25,7 +25,6 @@
 
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.compiler.gen.*;
-import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.hsail.*;
@@ -63,11 +62,6 @@
     }
 
     @Override
-    public void visitSafepointNode(SafepointNode i) {
-        Debug.log("visitSafePointNode unimplemented");
-    }
-
-    @Override
     public void emitNullCheck(ValueNode v, DeoptimizingNode deopting) {
         assert v.stamp() instanceof ObjectStamp;
         Variable obj = newVariable(Kind.Object);
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Wed Apr 16 22:54:48 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Thu Apr 17 00:44:32 2014 +0200
@@ -61,7 +61,7 @@
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.asm.*;
 import com.oracle.graal.lir.hsail.*;
-import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizeOp;
+import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp;
 import com.oracle.graal.lir.hsail.HSAILMove.AtomicGetAndAddOp;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.StructuredGraph.GuardsStage;
@@ -443,6 +443,7 @@
 
         HotSpotVMConfig config = getRuntime().getConfig();
         boolean useHSAILDeoptimization = config.useHSAILDeoptimization;
+        boolean useHSAILSafepoints = config.useHSAILSafepoints;
 
         // see what graph nodes we have to see if we are using the thread register
         // if not, we don't have to emit the code that sets that up
@@ -576,13 +577,23 @@
 
             // Aliases for d17
             RegisterValue d17_donorThreadIndex = HSAIL.d17.asValue(wordKind);
+            RegisterValue d17_safepointFlagAddrIndex = d17_donorThreadIndex;
 
             // Aliases for s34
             RegisterValue s34_deoptOccurred = HSAIL.s34.asValue(Kind.Int);
             RegisterValue s34_donorThreadIndex = s34_deoptOccurred;
 
             asm.emitLoadKernelArg(d16_deoptInfo, asm.getDeoptInfoName(), "u64");
-            asm.emitComment("// Check if a deopt has occurred and abort if true before doing any work");
+            asm.emitComment("// Check if a deopt or safepoint has occurred and abort if true before doing any work");
+
+            if (useHSAILSafepoints) {
+                // Load address of _notice_safepoints field
+                asm.emitLoad(wordKind, d17_safepointFlagAddrIndex, new HSAILAddressValue(wordKind, d16_deoptInfo, config.hsailNoticeSafepointsOffset).toAddress());
+                // Load int value from that field
+                asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(Kind.Int, d17_safepointFlagAddrIndex, 0).toAddress());
+                asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
+                asm.cbr(deoptInProgressLabel);
+            }
             asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(Kind.Int, d16_deoptInfo, config.hsailDeoptOccurredOffset).toAddress());
             asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
             asm.cbr(deoptInProgressLabel);
@@ -693,11 +704,11 @@
             AllocatableValue waveMathScratch1 = HSAIL.d18.asValue(wordKind);
             AllocatableValue waveMathScratch2 = HSAIL.d19.asValue(wordKind);
 
-            AllocatableValue actionAndReasonReg = HSAIL.s32.asValue(Kind.Int);
-            AllocatableValue codeBufferOffsetReg = HSAIL.s33.asValue(Kind.Int);
+            AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(Kind.Int);
+            AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(Kind.Int);
             AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int);
             AllocatableValue workidreg = HSAIL.s35.asValue(Kind.Int);
-            AllocatableValue dregOopMapReg = HSAIL.s39.asValue(Kind.Int);
+            AllocatableValue dregOopMapReg = HSAIL.dregOopMapReg.asValue(Kind.Int);
 
             HSAILAddress deoptNextIndexAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeoptNextIndex).toAddress();
             HSAILAddress neverRanArrayAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToNeverRanArray).toAddress();
@@ -833,7 +844,7 @@
         compilationResult.setHostGraph(prepareHostGraph(method, lirGenRes.getDeopts(), getProviders(), config));
     }
 
-    private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List<DeoptimizeOp> deopts, HotSpotProviders providers, HotSpotVMConfig config) {
+    private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List<DeoptimizingOp> deopts, HotSpotProviders providers, HotSpotVMConfig config) {
         if (deopts.isEmpty()) {
             return null;
         }
@@ -847,12 +858,12 @@
         int[] keySuccessors = new int[deopts.size() + 1];
         double[] keyProbabilities = new double[deopts.size() + 1];
         int i = 0;
-        Collections.sort(deopts, new Comparator<DeoptimizeOp>() {
-            public int compare(DeoptimizeOp o1, DeoptimizeOp o2) {
+        Collections.sort(deopts, new Comparator<DeoptimizingOp>() {
+            public int compare(DeoptimizingOp o1, DeoptimizingOp o2) {
                 return o1.getCodeBufferPos() - o2.getCodeBufferPos();
             }
         });
-        for (DeoptimizeOp deopt : deopts) {
+        for (DeoptimizingOp deopt : deopts) {
             keySuccessors[i] = i;
             keyProbabilities[i] = 1.0 / deopts.size();
             keys[i] = deopt.getCodeBufferPos();
@@ -881,7 +892,7 @@
         return BeginNode.begin(vmError);
     }
 
-    private static BeginNode createHostDeoptBranch(DeoptimizeOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers, HotSpotVMConfig config) {
+    private static BeginNode createHostDeoptBranch(DeoptimizingOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers, HotSpotVMConfig config) {
         BeginNode branch = hsailFrame.graph().add(new BeginNode());
         DynamicDeoptimizeNode deoptimization = hsailFrame.graph().add(new DynamicDeoptimizeNode(reasonAndAction, speculation));
         deoptimization.setStateBefore(createFrameState(deopt.getFrameState().topFrame, hsailFrame, providers, config));
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerationResult.java	Wed Apr 16 22:54:48 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerationResult.java	Thu Apr 17 00:44:32 2014 +0200
@@ -26,21 +26,21 @@
 
 import com.oracle.graal.compiler.gen.*;
 import com.oracle.graal.lir.*;
-import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizeOp;
+import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp;
 
 public class HSAILHotSpotLIRGenerationResult extends LIRGenerationResultBase {
 
-    private List<DeoptimizeOp> deopts = new ArrayList<>();
+    private List<DeoptimizingOp> deopts = new ArrayList<>();
 
     public HSAILHotSpotLIRGenerationResult(LIR lir, FrameMap frameMap) {
         super(lir, frameMap);
     }
 
-    public List<DeoptimizeOp> getDeopts() {
+    public List<DeoptimizingOp> getDeopts() {
         return deopts;
     }
 
-    public void addDeopt(DeoptimizeOp deopt) {
+    public void addDeopt(DeoptimizingOp deopt) {
         deopts.add(deopt);
     }
 
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotNodeLIRBuilder.java	Wed Apr 16 22:54:48 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotNodeLIRBuilder.java	Thu Apr 17 00:44:32 2014 +0200
@@ -28,6 +28,7 @@
 import com.oracle.graal.asm.*;
 import com.oracle.graal.compiler.gen.*;
 import com.oracle.graal.compiler.hsail.*;
+import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.hotspot.*;
 import com.oracle.graal.hotspot.HotSpotVMConfig.CompressEncoding;
@@ -111,6 +112,19 @@
     }
 
     @Override
+    public void visitSafepointNode(SafepointNode i) {
+        HotSpotVMConfig config = getGen().config;
+        if (config.useHSAILSafepoints == true) {
+            LIRFrameState info = gen.state(i);
+            HSAILHotSpotSafepointOp safepoint = new HSAILHotSpotSafepointOp(info, config, this);
+            ((HSAILHotSpotLIRGenerationResult) getGen().getResult()).addDeopt(safepoint);
+            append(safepoint);
+        } else {
+            Debug.log("HSAIL safepoints turned off");
+        }
+    }
+
+    @Override
     public void emitPrefetchAllocate(ValueNode address, ValueNode distance) {
         // nop
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotSafepointOp.java	Thu Apr 17 00:44:32 2014 +0200
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.hotspot.hsail;
+
+import static com.oracle.graal.api.code.ValueUtil.*;
+
+import com.oracle.graal.api.code.*;
+import com.oracle.graal.api.meta.*;
+import com.oracle.graal.asm.hsail.*;
+import com.oracle.graal.hotspot.*;
+import com.oracle.graal.hsail.*;
+import com.oracle.graal.lir.*;
+import com.oracle.graal.lir.asm.*;
+import com.oracle.graal.lir.hsail.*;
+import com.oracle.graal.nodes.spi.*;
+
+/**
+ * Emits a safepoint deoptimization from HSA back to CPU.
+ */
+@Opcode("SAFEPOINT")
+public class HSAILHotSpotSafepointOp extends HSAILLIRInstruction implements HSAILControlFlow.DeoptimizingOp {
+    private Constant actionAndReason;
+    @State protected LIRFrameState frameState;
+    protected int codeBufferPos = -1;
+    protected int dregOopMap = 0;
+    final int offsetToNoticeSafepoints;
+
+    public HSAILHotSpotSafepointOp(LIRFrameState state, HotSpotVMConfig config, NodeLIRBuilderTool tool) {
+        actionAndReason = tool.getLIRGeneratorTool().getMetaAccess().encodeDeoptActionAndReason(DeoptimizationAction.None, DeoptimizationReason.None, 0);
+        frameState = state;
+        offsetToNoticeSafepoints = config.hsailNoticeSafepointsOffset;
+    }
+
+    @Override
+    public void emitCode(CompilationResultBuilder crb, HSAILAssembler masm) {
+
+        // get a unique codeBuffer position
+        // when we save our state, we will save this as well (it can be used as a key to get the
+        // debugInfo)
+        codeBufferPos = masm.position();
+
+        masm.emitComment(" /* HSAIL safepoint bci=" + frameState.debugInfo().getBytecodePosition().getBCI() + ", frameState=" + frameState + " */");
+        String afterSafepointLabel = "@LAfterSafepoint_at_pos_" + codeBufferPos;
+
+        AllocatableValue scratch64 = HSAIL.d16.asValue(Kind.Object);
+        AllocatableValue spAddrReg = HSAIL.d17.asValue(Kind.Object);
+        AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int);
+        masm.emitLoadKernelArg(scratch64, masm.getDeoptInfoName(), "u64");
+
+        // Build address of noticeSafepoints field
+        HSAILAddress noticeSafepointsAddr = new HSAILAddressValue(Kind.Object, scratch64, offsetToNoticeSafepoints).toAddress();
+        masm.emitLoad(Kind.Object, spAddrReg, noticeSafepointsAddr);
+
+        // Load int value from that field
+        HSAILAddress noticeSafepointsIntAddr = new HSAILAddressValue(Kind.Int, spAddrReg, 0).toAddress();
+        masm.emitLoadAcquire(scratch32, noticeSafepointsIntAddr);
+        masm.emitCompare(Kind.Int, scratch32, Constant.forInt(0), "eq", false, false);
+        masm.cbr(afterSafepointLabel);
+
+        BytecodeFrame frame = frameState.debugInfo().frame();
+        for (int i = 0; i < frame.numLocals + frame.numStack; i++) {
+            Value val = frame.values[i];
+            if (isLegal(val) && isRegister(val)) {
+                Register reg = asRegister(val);
+                if (val.getKind() == Kind.Object) {
+                    dregOopMap |= 1 << (reg.encoding());
+                }
+            }
+        }
+
+        AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(Kind.Int);
+        AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(Kind.Int);
+        AllocatableValue dregOopMapReg = HSAIL.dregOopMapReg.asValue(Kind.Int);
+        masm.emitMov(Kind.Int, actionAndReasonReg, actionAndReason);
+        masm.emitMov(Kind.Int, codeBufferOffsetReg, Constant.forInt(codeBufferPos));
+        masm.emitMov(Kind.Int, dregOopMapReg, Constant.forInt(dregOopMap));
+        masm.emitJumpToLabelName(masm.getDeoptLabelName());
+
+        masm.emitString0(afterSafepointLabel + ":\n");
+
+        // now record the debuginfo
+        crb.recordInfopoint(codeBufferPos, frameState, InfopointReason.SAFEPOINT);
+    }
+
+    public LIRFrameState getFrameState() {
+        return frameState;
+    }
+
+    public int getCodeBufferPos() {
+        return codeBufferPos;
+    }
+}
\ No newline at end of file
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Wed Apr 16 22:54:48 2014 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Thu Apr 17 00:44:32 2014 +0200
@@ -1013,11 +1013,13 @@
     @HotSpotVMField(name = "ThreadShadow::_pending_failed_speculation", type = "oop", get = HotSpotVMField.Type.OFFSET) @Stable public int pendingFailedSpeculationOffset;
 
     @HotSpotVMFlag(name = "UseHSAILDeoptimization") @Stable public boolean useHSAILDeoptimization;
+    @HotSpotVMFlag(name = "UseHSAILSafepoints") @Stable public boolean useHSAILSafepoints;
 
     /**
      * Offsets of Hsail deoptimization fields (defined in gpu_hsail.hpp). Used to propagate
      * exceptions from Hsail back to C++ runtime.
      */
+    @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_notice_safepoints", type = "jint*", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailNoticeSafepointsOffset;
     @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_save_states[0]", type = "Hsail::HSAILKernelDeoptimization", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailSaveStatesOffset0;
     @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_save_states[1]", type = "Hsail::HSAILKernelDeoptimization", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailSaveStatesOffset1;
     @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_occurred", type = "jint", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailDeoptOccurredOffset;
--- a/graal/com.oracle.graal.hsail/src/com/oracle/graal/hsail/HSAIL.java	Wed Apr 16 22:54:48 2014 +0200
+++ b/graal/com.oracle.graal.hsail/src/com/oracle/graal/hsail/HSAIL.java	Thu Apr 17 00:44:32 2014 +0200
@@ -132,7 +132,11 @@
     public static final Register d18 = new Register(82, 18, "d18", CPU);
     public static final Register d19 = new Register(83, 19, "d19", CPU);
     public static final Register d20 = new Register(84, 20, "d20", CPU);
+
     public static final Register threadRegister = d20;
+    public static final Register actionAndReasonReg = s32;
+    public static final Register codeBufferOffsetReg = s33;
+    public static final Register dregOopMapReg = s39;
 
     // @formatter:off
     public static final Register[] cRegisters = {
--- a/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILControlFlow.java	Wed Apr 16 22:54:48 2014 +0200
+++ b/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILControlFlow.java	Thu Apr 17 00:44:32 2014 +0200
@@ -129,11 +129,17 @@
         }
     }
 
+    public interface DeoptimizingOp {
+        public LIRFrameState getFrameState();
+
+        public int getCodeBufferPos();
+    }
+
     /***
      * The ALIVE annotation is so we can get a scratch32 register that does not clobber
      * actionAndReason.
      */
-    public static class DeoptimizeOp extends ReturnOp {
+    public static class DeoptimizeOp extends ReturnOp implements DeoptimizingOp {
 
         @Alive({REG, CONST}) protected Value actionAndReason;
         @State protected LIRFrameState frameState;
@@ -173,6 +179,8 @@
             // debugInfo)
             codeBufferPos = masm.position();
 
+            masm.emitComment("/* HSAIL Deoptimization pos=" + codeBufferPos + ", bci=" + frameState.debugInfo().getBytecodePosition().getBCI() + ", frameState=" + frameState + " */");
+
             // get the bitmap of $d regs that contain references
             ReferenceMap referenceMap = frameState.debugInfo().getReferenceMap();
             for (int dreg = HSAIL.d0.number; dreg <= HSAIL.d15.number; dreg++) {
@@ -181,14 +189,9 @@
                 }
             }
 
-            // here we will by convention use some never-allocated registers to pass to the epilogue
-            // deopt code
-            // todo: define these in HSAIL.java
-            // we need to pass the actionAndReason and the codeBufferPos
-
-            AllocatableValue actionAndReasonReg = HSAIL.s32.asValue(Kind.Int);
-            AllocatableValue codeBufferOffsetReg = HSAIL.s33.asValue(Kind.Int);
-            AllocatableValue dregOopMapReg = HSAIL.s39.asValue(Kind.Int);
+            AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(Kind.Int);
+            AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(Kind.Int);
+            AllocatableValue dregOopMapReg = HSAIL.dregOopMapReg.asValue(Kind.Int);
             masm.emitMov(Kind.Int, actionAndReasonReg, actionAndReason);
             masm.emitMov(Kind.Int, codeBufferOffsetReg, Constant.forInt(codeBufferPos));
             masm.emitMov(Kind.Int, dregOopMapReg, Constant.forInt(dregOopMap));
--- a/make/linux/makefiles/buildtree.make	Wed Apr 16 22:54:48 2014 +0200
+++ b/make/linux/makefiles/buildtree.make	Thu Apr 17 00:44:32 2014 +0200
@@ -258,6 +258,10 @@
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
+	echo "$(call gamma-path,altsrc,gpu/ptx/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu/ptx/vm)" \\; \
+	echo "$(call gamma-path,altsrc,gpu/hsail/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu/hsail/vm) \\"; \
 	echo "$(call gamma-path,altsrc,gpu) \\"; \
 	echo "$(call gamma-path,commonsrc,gpu)"; \
 	[ -n "$(CFLAGS_BROWSE)" ] && \
--- a/make/linux/makefiles/vm.make	Wed Apr 16 22:54:48 2014 +0200
+++ b/make/linux/makefiles/vm.make	Thu Apr 17 00:44:32 2014 +0200
@@ -185,8 +185,10 @@
 
 GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/graal)
 GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx/vm)
+GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/hsail/vm)
 GRAAL_PATHS += $(HS_COMMON_SRC)/share/vm/graal
 GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx/vm
+GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/hsail/vm
 
 # Include dirs per type.
 Src_Dirs/CORE      := $(CORE_PATHS)
--- a/src/gpu/hsail/vm/gpu_hsail.cpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/gpu/hsail/vm/gpu_hsail.cpp	Thu Apr 17 00:44:32 2014 +0200
@@ -71,6 +71,7 @@
 };
 
 void * Hsail::_device_context = NULL;
+jint   Hsail::_notice_safepoints = false;
 
 Hsail::okra_create_context_func_t  Hsail::_okra_create_context;
 Hsail::okra_create_kernel_func_t   Hsail::_okra_create_kernel;
@@ -122,6 +123,18 @@
 
 static Stats kernelStats;
 
+//static jint in_kernel = 0;
+
+void Hsail::notice_safepoints() {
+  _notice_safepoints = true;
+//  if (TraceGPUInteraction) {
+//    tty->print_cr("[HSAIL] Notice safepoint in_kernel=%d", in_kernel);
+//  }
+}
+
+void Hsail::ignore_safepoints() {
+  _notice_safepoints = false;
+}
 
 void Hsail::register_heap() {
   // After the okra functions are set up and the heap is initialized, register the java heap with HSA
@@ -203,8 +216,7 @@
 }
 
 
-
-jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oops_save,
+jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oops_save, 
                                                 jobject donor_threads, int allocBytesPerWorkitem, TRAPS) {
   ResourceMark rm(THREAD);
   objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args);
@@ -258,6 +270,9 @@
 
   // This object sets up the kernel arguments
   HSAILKernelArguments hka((address) kernel, mh->signature(), argsArray, mh->is_static(), e);
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] range=%d", dimX);
+  }
 
   // if any object passed was null, throw an exception here
   // doing this means the kernel code can avoid null checks on the object parameters.
@@ -273,8 +288,12 @@
   // Run the kernel
   bool success = false;
   {
-    TraceTime t1("execute kernel", TraceGPUInteraction);
+    TraceTime t("execute kernel", TraceGPUInteraction);
+
+    //in_kernel = 1;
+    // Run the kernel
     success = _okra_execute_with_range(kernel, dimX);
+    //in_kernel = 0;
   }
 
   // fix up any tlab tops that overflowed
@@ -297,17 +316,23 @@
   }
 
   if (UseHSAILDeoptimization) {
+    kernelStats.incDeopts();
     // check if any workitem requested a deopt
     // currently we only support at most one such workitem
     int deoptcode = e->deopt_occurred();
-    if (deoptcode != 0) {
-      if (deoptcode != 1) {
+    if (deoptcode != 1) {
+      if (deoptcode == 0) {
+        if (TraceGPUInteraction && _notice_safepoints != 0) {
+          tty->print_cr("[HSAIL] observed safepoint during kernel");
+        }
+      } else {
         // error condition detected in deopt code
         char msg[200];
         sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1));
         guarantee(deoptcode == 1, msg);
       }
-      kernelStats.incDeopts();
+    } else {
+
       {
         TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction);
         if (TraceGPUInteraction) {
@@ -402,48 +427,47 @@
           tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized);
         }
       }
-
-      {
-        TraceTime t3("handle never-rans", TraceGPUInteraction);
+    }
 
-        // Handle any never_ran workitems if there were any
-        int count_never_ran = 0;
-        bool handleNeverRansHere = true;
-        // turn off verbose trace stuff for javacall arg setup
-        bool savedTraceGPUInteraction = TraceGPUInteraction;
-        TraceGPUInteraction = false;
-        jboolean *never_ran_array = e->never_ran_array();
-        if (handleNeverRansHere) {
-          for (int k = 0; k < dimX; k++) {
-            if (never_ran_array[k]) {
-              // run it as a javaCall
-              KlassHandle methKlass = mh->method_holder();
-              Thread* THREAD = Thread::current();
-              JavaValue result(T_VOID);
-              JavaCallArguments javaArgs;
-              // re-resolve the args_handle here
-              objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args);
-              // This object sets up the javaCall arguments
-              // the way argsArray is set up, this should work for instance methods as well
-              // (the receiver will be the first oop pushed)
-              HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static());
-              if (mh->is_static()) {
-                JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
-              } else {
-                JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
-              }
-              count_never_ran++;
+    // Handle any never_ran workitems if there were any
+    {
+      TraceTime t("handle never-rans ", TraceGPUInteraction);
+      int count_never_ran = 0;
+      bool handleNeverRansHere = true;
+      // turn off verbose trace stuff for javacall arg setup
+      bool savedTraceGPUInteraction = TraceGPUInteraction;
+      TraceGPUInteraction = false;
+      jboolean *never_ran_array = e->never_ran_array();
+      if (handleNeverRansHere) {
+        for (int k = 0; k < dimX; k++) {
+          if (never_ran_array[k]) {
+            // run it as a javaCall
+            KlassHandle methKlass = mh->method_holder();
+            Thread* THREAD = Thread::current();
+            JavaValue result(T_VOID);
+            JavaCallArguments javaArgs;
+            // re-resolve the args_handle here
+            objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args);
+            // This object sets up the javaCall arguments
+            // the way argsArray is set up, this should work for instance methods as well
+            // (the receiver will be the first oop pushed)
+            HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static());
+            if (mh->is_static()) {
+              JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
+            } else {
+              JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
             }
+            count_never_ran++;
           }
-          TraceGPUInteraction = savedTraceGPUInteraction;
-          if (TraceGPUInteraction) {
-            tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran);
-            showRanges(never_ran_array, dimX);
-          }
-        } // end of never-ran handling
-      }
+        }
+        TraceGPUInteraction = savedTraceGPUInteraction;
+        if (TraceGPUInteraction) {
+          tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran);
+          showRanges(never_ran_array, dimX);
+        }
+      } // end of never-ran handling
     }
-    
+
     FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal);
     delete e;
   }
@@ -547,7 +571,7 @@
   LOOKUP_OKRA_FUNCTION(okra_register_heap, okra_register_heap);
   // if we made it this far, real success
 
-  gpu::initialized_gpu("Okra");
+  Gpu::initialized_gpu(new Hsail());
 
   return true;
 GPU_END
--- a/src/gpu/hsail/vm/gpu_hsail.hpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/gpu/hsail/vm/gpu_hsail.hpp	Thu Apr 17 00:44:32 2014 +0200
@@ -27,11 +27,9 @@
 
 #include "utilities/exceptions.hpp"
 #include "graal/graalEnv.hpp"
-// #include "graal/graalCodeInstaller.hpp"
 #include "gpu_hsail_Frame.hpp"
 
-class Hsail {
-  friend class gpu;
+class Hsail : public Gpu {
 
   public:
   class HSAILKernelDeoptimization {
@@ -58,6 +56,7 @@
   class HSAILDeoptimizationInfo : public ResourceObj {
     friend class VMStructs;
    private:
+    jint* _notice_safepoints;
     jint _deopt_occurred;
     jint _deopt_next_index;
     JavaThread** _donor_threads;
@@ -67,12 +66,12 @@
     HSAILKernelDeoptimization _deopt_save_states[MAX_DEOPT_SAVE_STATES_SIZE];
 
     inline HSAILDeoptimizationInfo() {
+      _notice_safepoints = &Hsail::_notice_safepoints;
       _deopt_occurred = 0;
       _deopt_next_index = 0;
     }
 
     inline jint deopt_occurred() {
-      // Check that hsail did not write in the wrong place
       return _deopt_occurred;
     }
     inline jint num_deopts() { return _deopt_next_index; }
@@ -102,8 +101,8 @@
   static void getNewTlabForDonorThread(ThreadLocalAllocBuffer* tlab, size_t tlabMinHsail);
 
   static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oopsSave,
-                                                  jobject donorThreads, int allocBytesPerWorkitem, TRAPS);
-
+                                                  jobject donor_threads, int allocBytesPerWorkitem, TRAPS);
+  
   static void register_heap();
 
   static GraalEnv::CodeInstallResult install_code(Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle triggered_deoptimizations);
@@ -113,6 +112,11 @@
   // Registers the implementations for the native methods in HSAILHotSpotBackend
   static bool register_natives(JNIEnv* env);
 
+  virtual const char* name() { return "HSAIL"; }
+
+  virtual void notice_safepoints();
+  virtual void ignore_safepoints();
+
 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
   typedef unsigned long long CUdeviceptr;
 #else
@@ -149,5 +153,8 @@
   
 protected:
   static void* _device_context;
+
+  // true if safepoints are activated
+  static jint _notice_safepoints;
 };
 #endif // GPU_HSAIL_HPP
--- a/src/gpu/hsail/vm/vmStructs_hsail.hpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/gpu/hsail/vm/vmStructs_hsail.hpp	Thu Apr 17 00:44:32 2014 +0200
@@ -41,6 +41,7 @@
   nonstatic_field(Hsail::HSAILKernelDeoptimization, _actionAndReason,                           jint)                                 \
   nonstatic_field(Hsail::HSAILKernelDeoptimization, _first_frame,                               HSAILFrame)                           \
                                                                                                                                       \
+  nonstatic_field(Hsail::HSAILDeoptimizationInfo, _notice_safepoints,                      jint*)                                     \
   nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_occurred,                         jint)                                      \
   nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_next_index,                       jint)                                      \
   nonstatic_field(Hsail::HSAILDeoptimizationInfo, _donor_threads,                          JavaThread**)                              \
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Apr 17 00:44:32 2014 +0200
@@ -271,7 +271,7 @@
     tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device);
   }
 
-  gpu::initialized_gpu(device_name);
+  Gpu::initialized_gpu(new Ptx());
 
   return true;
 GPU_END
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Apr 17 00:44:32 2014 +0200
@@ -93,7 +93,7 @@
 
 #define GRAAL_SUPPORTED_COMPUTE_CAPABILITY_VERSION 3.0
 
-class Ptx {
+class Ptx : public Gpu {
   friend class PtxCall;
 
 private:
@@ -120,6 +120,9 @@
   static int ncores(int major, int minor);
 
 public:
+
+  virtual const char* name() { return "PTX"; }
+
   // Registers the implementations for the native methods in PTXHotSpotBackend
   static bool register_natives(JNIEnv* env);
 
--- a/src/os/bsd/vm/gpu_bsd.cpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/os/bsd/vm/gpu_bsd.cpp	Thu Apr 17 00:44:32 2014 +0200
@@ -27,7 +27,7 @@
 #include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"
 
-jobject gpu::probe_gpus(JNIEnv* env) {
+jobject Gpu::probe_gpus(JNIEnv* env) {
 #ifdef __APPLE__
   /*
    * Let the CUDA driver initialization be the gate to GPU for now, pending
--- a/src/os/linux/vm/gpu_linux.cpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/os/linux/vm/gpu_linux.cpp	Thu Apr 17 00:44:32 2014 +0200
@@ -38,7 +38,7 @@
 
 #define PCI_DRIVER_NAME_START_POS 255
 
-jobject gpu::probe_gpus(JNIEnv* env) {
+jobject Gpu::probe_gpus(JNIEnv* env) {
   bool hsail = false;
   bool ptx = false;
 
--- a/src/os/windows/vm/gpu_windows.cpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/os/windows/vm/gpu_windows.cpp	Thu Apr 17 00:44:32 2014 +0200
@@ -27,7 +27,7 @@
 #include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"
 
-jobject gpu::probe_gpus(JNIEnv* env) {
+jobject Gpu::probe_gpus(JNIEnv* env) {
   // TODO: add detection of PTX/NVidia
   if (Hsail::register_natives(env)) {
     return env->NewStringUTF("HSAIL");
--- a/src/share/vm/graal/graalCompilerToVM.cpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/share/vm/graal/graalCompilerToVM.cpp	Thu Apr 17 00:44:32 2014 +0200
@@ -757,7 +757,7 @@
 
 C2V_ENTRY(jobject, getGPUs, (JNIEnv *env, jobject))
 #if defined(TARGET_OS_FAMILY_bsd) || defined(TARGET_OS_FAMILY_linux) || defined(TARGET_OS_FAMILY_windows)
-  return gpu::probe_gpus(env);
+  return Gpu::probe_gpus(env);
 #else
   return env->NewStringUTF("");
 #endif
--- a/src/share/vm/graal/graalGlobals.hpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/share/vm/graal/graalGlobals.hpp	Thu Apr 17 00:44:32 2014 +0200
@@ -88,6 +88,9 @@
   product(bool, UseHSAILDeoptimization, true,                               \
           "Code gen and runtime support for deoptimizing HSAIL kernels")    \
                                                                             \
+  product(bool, UseHSAILSafepoints, true,                                   \
+          "Code gen and runtime support for safepoints in HSAIL kernels")   \
+                                                                            \
   product(bool, GPUOffload, false,                                          \
           "Offload execution to GPU whenever possible")                     \
                                                                             \
--- a/src/share/vm/runtime/gpu.cpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/share/vm/runtime/gpu.cpp	Thu Apr 17 00:44:32 2014 +0200
@@ -26,11 +26,24 @@
 #include "runtime/gpu.hpp"
 #include "runtime/handles.hpp"
 
-int gpu::_initialized_gpus = 0;
+int  Gpu::_initialized_gpus_count = 0;
+Gpu* Gpu::_initialized_gpus[MAX_GPUS];
 
-void gpu::initialized_gpu(const char* name) {
-    _initialized_gpus++;
-    if (TraceGPUInteraction) {
-      tty->print_cr("[GPU] registered initialization of %s (total initialized: %d)", name, _initialized_gpus);
+void Gpu::initialized_gpu(Gpu* gpu) {
+  // GPUs are always initialized on the same thread so no need for locking
+  guarantee(_initialized_gpus_count < MAX_GPUS, "oob");
+  if (TraceGPUInteraction) {
+    tty->print_cr("[GPU] registered initialization of %s (total initialized: %d)", gpu->name(), _initialized_gpus);
+  }
+  _initialized_gpus[_initialized_gpus_count++] = gpu;
+}
+
+void Gpu::safepoint_event(SafepointEvent event) {
+  for (int i = 0; i < _initialized_gpus_count; i++) {
+    if (event == SafepointBegin) {
+      _initialized_gpus[i]->notice_safepoints();
+    } else {
+      _initialized_gpus[i]->ignore_safepoints();
     }
+  }
 }
--- a/src/share/vm/runtime/gpu.hpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/share/vm/runtime/gpu.hpp	Thu Apr 17 00:44:32 2014 +0200
@@ -29,21 +29,41 @@
 #include "oops/symbol.hpp"
 #include "utilities/array.hpp"
 
+#define MAX_GPUS 2
+
 // Defines the interface to the graphics processor(s).
-class gpu : AllStatic {
+class Gpu {
  private:
-  static int _initialized_gpus;  // number of initialize GPU devices
+  static int _initialized_gpus_count;
+  static Gpu* _initialized_gpus[MAX_GPUS];
 
  public:
 
   // Notification of a GPU device that has been initialized.
-  static void initialized_gpu(const char* name);
+  static void initialized_gpu(Gpu* gpu);
 
   // Gets a comma separated list of supported GPU architecture names.
   static jobject probe_gpus(JNIEnv* env);
   
   // Gets the number of GPU devices that have been initialized.
-  static int initialized_gpus() { return _initialized_gpus; }
+  static int initialized_gpus() { return _initialized_gpus_count; }
+
+  enum SafepointEvent {
+    SafepointBegin,
+    SafepointEnd
+  };
+
+  // Called when a safepoint has been activated.
+  static void safepoint_event(SafepointEvent event);
+
+  // Name of this GPU
+  virtual const char* name() = 0;
+
+  // Called when a safepoint has been activated.
+  virtual void notice_safepoints() {};
+
+  // Called when a safepoint has been deactivated.
+  virtual void ignore_safepoints() {};
 };
 
 #endif // SHARE_VM_RUNTIME_GPU_HPP
--- a/src/share/vm/runtime/safepoint.cpp	Wed Apr 16 22:54:48 2014 +0200
+++ b/src/share/vm/runtime/safepoint.cpp	Thu Apr 17 00:44:32 2014 +0200
@@ -39,6 +39,7 @@
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
+#include "runtime/gpu.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/osThread.hpp"
@@ -205,6 +206,12 @@
     os::make_polling_page_unreadable();
   }
 
+#ifdef GRAAL
+  if (UseHSAILSafepoints) {
+    Gpu::safepoint_event(Gpu::SafepointBegin);
+  }
+#endif
+  
   // Consider using active_processor_count() ... but that call is expensive.
   int ncpus = os::processor_count() ;
 
@@ -438,6 +445,12 @@
   // Remove safepoint check from interpreter
   Interpreter::ignore_safepoints();
 
+#ifdef GRAAL
+  if (UseHSAILSafepoints) {
+    Gpu::safepoint_event(Gpu::SafepointEnd);
+  }
+#endif
+
   {
     MutexLocker mu(Safepoint_lock);