changeset 15190:039d8902bdb8

Merge.
author Thomas Wuerthinger <thomas.wuerthinger@oracle.com>
date Thu, 17 Apr 2014 13:17:25 +0200
parents 19a98af07b63 (current diff) bbd2583f4f1d (diff)
children 04e6706df64c 201f6858a4f3
files
diffstat 41 files changed, 513 insertions(+), 221 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Thu Apr 17 13:17:16 2014 +0200
+++ b/.hgignore	Thu Apr 17 13:17:25 2014 +0200
@@ -5,6 +5,7 @@
 ^mx/netbeans-config.zip
 ^mx/netbeans-config-libs.zip
 ^mx/eclipse-launches
+^mx/jmh
 ^mx/currentAnnotationProcessors
 ^mx/ecj.jar
 ^mx/includes
--- a/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/CompilationResult.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/CompilationResult.java	Thu Apr 17 13:17:25 2014 +0200
@@ -368,7 +368,7 @@
      * starting address of the table. This type of table maybe generated when translating a
      * multi-way branch based on a key value from a dense value set (e.g. the {@code tableswitch}
      * JVM instruction).
-     * 
+     *
      * The table is indexed by the contiguous range of integers from {@link #low} to {@link #high}
      * inclusive.
      */
@@ -460,7 +460,7 @@
     private final List<ExceptionHandler> exceptionHandlers = new ArrayList<>();
     private final List<Mark> marks = new ArrayList<>();
 
-    private int frameSize = -1;
+    private int totalFrameSize = -1;
     private int customStackAreaOffset = -1;
     private int registerRestoreEpilogueOffset = -1;
 
@@ -525,18 +525,29 @@
     }
 
     /**
-     * Sets the frame size in bytes. Does not include the return address pushed onto the stack, if
-     * any.
-     * 
+     * The total frame size of the method in bytes. This includes the return address pushed onto the
+     * stack, if any.
+     *
+     * @return the frame size
+     */
+    public int getTotalFrameSize() {
+        assert totalFrameSize != -1 : "frame size not yet initialized!";
+        return totalFrameSize;
+    }
+
+    /**
+     * Sets the total frame size in bytes. This includes the return address pushed onto the stack,
+     * if any.
+     *
      * @param size the size of the frame in bytes
      */
-    public void setFrameSize(int size) {
-        frameSize = size;
+    public void setTotalFrameSize(int size) {
+        totalFrameSize = size;
     }
 
     /**
      * Sets the machine that has been generated by the compiler.
-     * 
+     *
      * @param code the machine code generated
      * @param size the size of the machine code
      */
@@ -548,7 +559,7 @@
     /**
      * Records a reference to the data section in the code section (e.g. to load an integer or
      * floating point constant).
-     * 
+     *
      * @param codePos the position in the code where the data reference occurs
      * @param data the data that is referenced
      */
@@ -559,7 +570,7 @@
 
     /**
      * Records a reference to an inlined constant in the code section (e.g. to load a constant oop).
-     * 
+     *
      * @param codePos the position in the code where the inlined constant occurs
      * @param data the data that is referenced
      */
@@ -570,7 +581,7 @@
 
     /**
      * Records a call in the code array.
-     * 
+     *
      * @param codePos the position of the call in the code array
      * @param size the size of the call instruction
      * @param target the being called
@@ -584,7 +595,7 @@
 
     /**
      * Records an exception handler for this method.
-     * 
+     *
      * @param codePos the position in the code that is covered by the handler
      * @param handlerPos the position of the handler
      */
@@ -594,7 +605,7 @@
 
     /**
      * Records an infopoint in the code array.
-     * 
+     *
      * @param codePos the position of the infopoint in the code array
      * @param debugInfo the debug info for the infopoint
      */
@@ -604,10 +615,10 @@
 
     /**
      * Records a custom infopoint in the code section.
-     * 
+     *
      * Compiler implementations can use this method to record non-standard infopoints, which are not
      * handled by the dedicated methods like {@link #recordCall}.
-     * 
+     *
      * @param infopoint the infopoint to record, usually a derived class from {@link Infopoint}
      */
     public void addInfopoint(Infopoint infopoint) {
@@ -621,7 +632,7 @@
 
     /**
      * Records an instruction mark within this method.
-     * 
+     *
      * @param codePos the position in the code that is covered by the handler
      * @param markId the identifier for this mark
      * @param references an array of other marks that this mark references
@@ -636,7 +647,7 @@
      * Allows a method to specify the offset of the epilogue that restores the callee saved
      * registers. Must be called iff the method is a callee saved method and stores callee registers
      * on the stack.
-     * 
+     *
      * @param registerRestoreEpilogueOffset the offset in the machine code where the epilogue begins
      */
     public void setRegisterRestoreEpilogueOffset(int registerRestoreEpilogueOffset) {
@@ -645,16 +656,6 @@
     }
 
     /**
-     * The frame size of the method in bytes.
-     * 
-     * @return the frame size
-     */
-    public int getFrameSize() {
-        assert frameSize != -1 : "frame size not yet initialized!";
-        return frameSize;
-    }
-
-    /**
      * @return the code offset of the start of the epilogue that restores all callee saved
      *         registers, or -1 if this is not a callee saved method
      */
@@ -664,7 +665,7 @@
 
     /**
      * Offset in bytes for the custom stack area (relative to sp).
-     * 
+     *
      * @return the offset in bytes
      */
     public int getCustomStackAreaOffset() {
--- a/graal/com.oracle.graal.api.meta/src/com/oracle/graal/api/meta/Constant.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.api.meta/src/com/oracle/graal/api/meta/Constant.java	Thu Apr 17 13:17:25 2014 +0200
@@ -138,7 +138,11 @@
     public abstract double asDouble();
 
     public String toValueString() {
-        return getKind().format(asBoxedPrimitive());
+        if (getKind() == Kind.Illegal) {
+            return "illegal";
+        } else {
+            return getKind().format(asBoxedPrimitive());
+        }
     }
 
     @Override
--- a/graal/com.oracle.graal.api.meta/src/com/oracle/graal/api/meta/PrimitiveConstant.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.api.meta/src/com/oracle/graal/api/meta/PrimitiveConstant.java	Thu Apr 17 13:17:25 2014 +0200
@@ -104,7 +104,7 @@
             case Double:
                 return Double.valueOf(asDouble());
             default:
-                throw new IllegalArgumentException();
+                throw new IllegalArgumentException("unexpected kind " + getKind());
         }
     }
 
--- a/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILNodeLIRBuilder.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILNodeLIRBuilder.java	Thu Apr 17 13:17:25 2014 +0200
@@ -25,7 +25,6 @@
 
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.compiler.gen.*;
-import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.hsail.*;
@@ -63,11 +62,6 @@
     }
 
     @Override
-    public void visitSafepointNode(SafepointNode i) {
-        Debug.log("visitSafePointNode unimplemented");
-    }
-
-    @Override
     public void emitNullCheck(ValueNode v, DeoptimizingNode deopting) {
         assert v.stamp() instanceof ObjectStamp;
         Variable obj = newVariable(Kind.Object);
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/GraalCompilerTest.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/GraalCompilerTest.java	Thu Apr 17 13:17:25 2014 +0200
@@ -169,7 +169,7 @@
     protected void assertEquals(StructuredGraph expected, StructuredGraph graph, boolean excludeVirtual, boolean checkConstants) {
         String expectedString = getCanonicalGraphString(expected, excludeVirtual, checkConstants);
         String actualString = getCanonicalGraphString(graph, excludeVirtual, checkConstants);
-        String mismatchString = "mismatch in graphs:\n========= expected =========\n" + expectedString + "\n\n========= actual =========\n" + actualString;
+        String mismatchString = "mismatch in graphs:\n========= expected (" + expected + ") =========\n" + expectedString + "\n\n========= actual (" + graph + ") =========\n" + actualString;
 
         if (!excludeVirtual && getNodeCountExcludingUnusedConstants(expected) != getNodeCountExcludingUnusedConstants(graph)) {
             Debug.dump(expected, "Node count not matching - expected");
--- a/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/NodeClass.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/NodeClass.java	Thu Apr 17 13:17:25 2014 +0200
@@ -816,7 +816,7 @@
     }
 
     public boolean valueEqual(Node a, Node b) {
-        if (!canGVN || a.getClass() != b.getClass()) {
+        if (a.getClass() != b.getClass()) {
             return a == b;
         }
         for (int i = 0; i < dataOffsets.length; ++i) {
--- a/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotBackend.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotBackend.java	Thu Apr 17 13:17:25 2014 +0200
@@ -225,7 +225,7 @@
         Assembler masm = createAssembler(frameMap);
         HotSpotFrameContext frameContext = new HotSpotFrameContext(stub != null, omitFrame);
         CompilationResultBuilder crb = factory.createBuilder(getCodeCache(), getForeignCalls(), frameMap, masm, frameContext, compilationResult);
-        crb.setFrameSize(frameMap.frameSize());
+        crb.setTotalFrameSize(frameMap.totalFrameSize());
         StackSlot deoptimizationRescueSlot = gen.getDeoptimizationRescueSlot();
         if (deoptimizationRescueSlot != null && stub == null) {
             crb.compilationResult.setCustomStackAreaOffset(frameMap.offsetForStackSlot(deoptimizationRescueSlot));
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Thu Apr 17 13:17:25 2014 +0200
@@ -61,7 +61,7 @@
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.asm.*;
 import com.oracle.graal.lir.hsail.*;
-import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizeOp;
+import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp;
 import com.oracle.graal.lir.hsail.HSAILMove.AtomicGetAndAddOp;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.StructuredGraph.GuardsStage;
@@ -294,7 +294,7 @@
         assert hsailCode.getDataReferences().isEmpty();
 
         // from host code
-        result.setFrameSize(hostCode.getFrameSize());
+        result.setTotalFrameSize(hostCode.getTotalFrameSize());
         result.setCustomStackAreaOffset(hostCode.getCustomStackAreaOffset());
         result.setRegisterRestoreEpilogueOffset(hostCode.getRegisterRestoreEpilogueOffset());
         result.setTargetCode(hostCode.getTargetCode(), hostCode.getTargetCodeSize());
@@ -432,7 +432,7 @@
         // save lirGen for later use by setHostGraph
         CompilationResultBuilder crb = new HSAILCompilationResultBuilder(getCodeCache(), getForeignCalls(), frameMap, masm, frameContext, compilationResult,
                         (HSAILHotSpotLIRGenerationResult) lirGenRes);
-        crb.setFrameSize(frameMap.frameSize());
+        crb.setTotalFrameSize(frameMap.totalFrameSize());
         return crb;
     }
 
@@ -443,6 +443,7 @@
 
         HotSpotVMConfig config = getRuntime().getConfig();
         boolean useHSAILDeoptimization = config.useHSAILDeoptimization;
+        boolean useHSAILSafepoints = config.useHSAILSafepoints;
 
         // see what graph nodes we have to see if we are using the thread register
         // if not, we don't have to emit the code that sets that up
@@ -576,13 +577,23 @@
 
             // Aliases for d17
             RegisterValue d17_donorThreadIndex = HSAIL.d17.asValue(wordKind);
+            RegisterValue d17_safepointFlagAddrIndex = d17_donorThreadIndex;
 
             // Aliases for s34
             RegisterValue s34_deoptOccurred = HSAIL.s34.asValue(Kind.Int);
             RegisterValue s34_donorThreadIndex = s34_deoptOccurred;
 
             asm.emitLoadKernelArg(d16_deoptInfo, asm.getDeoptInfoName(), "u64");
-            asm.emitComment("// Check if a deopt has occurred and abort if true before doing any work");
+            asm.emitComment("// Check if a deopt or safepoint has occurred and abort if true before doing any work");
+
+            if (useHSAILSafepoints) {
+                // Load address of _notice_safepoints field
+                asm.emitLoad(wordKind, d17_safepointFlagAddrIndex, new HSAILAddressValue(wordKind, d16_deoptInfo, config.hsailNoticeSafepointsOffset).toAddress());
+                // Load int value from that field
+                asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(Kind.Int, d17_safepointFlagAddrIndex, 0).toAddress());
+                asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
+                asm.cbr(deoptInProgressLabel);
+            }
             asm.emitLoadAcquire(s34_deoptOccurred, new HSAILAddressValue(Kind.Int, d16_deoptInfo, config.hsailDeoptOccurredOffset).toAddress());
             asm.emitCompare(Kind.Int, s34_deoptOccurred, Constant.forInt(0), "ne", false, false);
             asm.cbr(deoptInProgressLabel);
@@ -693,11 +704,11 @@
             AllocatableValue waveMathScratch1 = HSAIL.d18.asValue(wordKind);
             AllocatableValue waveMathScratch2 = HSAIL.d19.asValue(wordKind);
 
-            AllocatableValue actionAndReasonReg = HSAIL.s32.asValue(Kind.Int);
-            AllocatableValue codeBufferOffsetReg = HSAIL.s33.asValue(Kind.Int);
+            AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(Kind.Int);
+            AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(Kind.Int);
             AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int);
             AllocatableValue workidreg = HSAIL.s35.asValue(Kind.Int);
-            AllocatableValue dregOopMapReg = HSAIL.s39.asValue(Kind.Int);
+            AllocatableValue dregOopMapReg = HSAIL.dregOopMapReg.asValue(Kind.Int);
 
             HSAILAddress deoptNextIndexAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToDeoptNextIndex).toAddress();
             HSAILAddress neverRanArrayAddr = new HSAILAddressValue(Kind.Int, scratch64, offsetToNeverRanArray).toAddress();
@@ -833,7 +844,7 @@
         compilationResult.setHostGraph(prepareHostGraph(method, lirGenRes.getDeopts(), getProviders(), config));
     }
 
-    private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List<DeoptimizeOp> deopts, HotSpotProviders providers, HotSpotVMConfig config) {
+    private static StructuredGraph prepareHostGraph(ResolvedJavaMethod method, List<DeoptimizingOp> deopts, HotSpotProviders providers, HotSpotVMConfig config) {
         if (deopts.isEmpty()) {
             return null;
         }
@@ -847,12 +858,12 @@
         int[] keySuccessors = new int[deopts.size() + 1];
         double[] keyProbabilities = new double[deopts.size() + 1];
         int i = 0;
-        Collections.sort(deopts, new Comparator<DeoptimizeOp>() {
-            public int compare(DeoptimizeOp o1, DeoptimizeOp o2) {
+        Collections.sort(deopts, new Comparator<DeoptimizingOp>() {
+            public int compare(DeoptimizingOp o1, DeoptimizingOp o2) {
                 return o1.getCodeBufferPos() - o2.getCodeBufferPos();
             }
         });
-        for (DeoptimizeOp deopt : deopts) {
+        for (DeoptimizingOp deopt : deopts) {
             keySuccessors[i] = i;
             keyProbabilities[i] = 1.0 / deopts.size();
             keys[i] = deopt.getCodeBufferPos();
@@ -881,7 +892,7 @@
         return BeginNode.begin(vmError);
     }
 
-    private static BeginNode createHostDeoptBranch(DeoptimizeOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers, HotSpotVMConfig config) {
+    private static BeginNode createHostDeoptBranch(DeoptimizingOp deopt, ParameterNode hsailFrame, ValueNode reasonAndAction, ValueNode speculation, HotSpotProviders providers, HotSpotVMConfig config) {
         BeginNode branch = hsailFrame.graph().add(new BeginNode());
         DynamicDeoptimizeNode deoptimization = hsailFrame.graph().add(new DynamicDeoptimizeNode(reasonAndAction, speculation));
         deoptimization.setStateBefore(createFrameState(deopt.getFrameState().topFrame, hsailFrame, providers, config));
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerationResult.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerationResult.java	Thu Apr 17 13:17:25 2014 +0200
@@ -26,21 +26,21 @@
 
 import com.oracle.graal.compiler.gen.*;
 import com.oracle.graal.lir.*;
-import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizeOp;
+import com.oracle.graal.lir.hsail.HSAILControlFlow.DeoptimizingOp;
 
 public class HSAILHotSpotLIRGenerationResult extends LIRGenerationResultBase {
 
-    private List<DeoptimizeOp> deopts = new ArrayList<>();
+    private List<DeoptimizingOp> deopts = new ArrayList<>();
 
     public HSAILHotSpotLIRGenerationResult(LIR lir, FrameMap frameMap) {
         super(lir, frameMap);
     }
 
-    public List<DeoptimizeOp> getDeopts() {
+    public List<DeoptimizingOp> getDeopts() {
         return deopts;
     }
 
-    public void addDeopt(DeoptimizeOp deopt) {
+    public void addDeopt(DeoptimizingOp deopt) {
         deopts.add(deopt);
     }
 
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotNodeLIRBuilder.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotNodeLIRBuilder.java	Thu Apr 17 13:17:25 2014 +0200
@@ -28,6 +28,7 @@
 import com.oracle.graal.asm.*;
 import com.oracle.graal.compiler.gen.*;
 import com.oracle.graal.compiler.hsail.*;
+import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.hotspot.*;
 import com.oracle.graal.hotspot.HotSpotVMConfig.CompressEncoding;
@@ -111,6 +112,19 @@
     }
 
     @Override
+    public void visitSafepointNode(SafepointNode i) {
+        HotSpotVMConfig config = getGen().config;
+        if (config.useHSAILSafepoints == true) {
+            LIRFrameState info = gen.state(i);
+            HSAILHotSpotSafepointOp safepoint = new HSAILHotSpotSafepointOp(info, config, this);
+            ((HSAILHotSpotLIRGenerationResult) getGen().getResult()).addDeopt(safepoint);
+            append(safepoint);
+        } else {
+            Debug.log("HSAIL safepoints turned off");
+        }
+    }
+
+    @Override
     public void emitPrefetchAllocate(ValueNode address, ValueNode distance) {
         // nop
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotSafepointOp.java	Thu Apr 17 13:17:25 2014 +0200
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.hotspot.hsail;
+
+import static com.oracle.graal.api.code.ValueUtil.*;
+
+import com.oracle.graal.api.code.*;
+import com.oracle.graal.api.meta.*;
+import com.oracle.graal.asm.hsail.*;
+import com.oracle.graal.hotspot.*;
+import com.oracle.graal.hsail.*;
+import com.oracle.graal.lir.*;
+import com.oracle.graal.lir.asm.*;
+import com.oracle.graal.lir.hsail.*;
+import com.oracle.graal.nodes.spi.*;
+
+/**
+ * Emits a safepoint deoptimization from HSA back to CPU.
+ */
+@Opcode("SAFEPOINT")
+public class HSAILHotSpotSafepointOp extends HSAILLIRInstruction implements HSAILControlFlow.DeoptimizingOp {
+    private Constant actionAndReason;
+    @State protected LIRFrameState frameState;
+    protected int codeBufferPos = -1;
+    protected int dregOopMap = 0;
+    final int offsetToNoticeSafepoints;
+
+    public HSAILHotSpotSafepointOp(LIRFrameState state, HotSpotVMConfig config, NodeLIRBuilderTool tool) {
+        actionAndReason = tool.getLIRGeneratorTool().getMetaAccess().encodeDeoptActionAndReason(DeoptimizationAction.None, DeoptimizationReason.None, 0);
+        frameState = state;
+        offsetToNoticeSafepoints = config.hsailNoticeSafepointsOffset;
+    }
+
+    @Override
+    public void emitCode(CompilationResultBuilder crb, HSAILAssembler masm) {
+
+        // get a unique codeBuffer position
+        // when we save our state, we will save this as well (it can be used as a key to get the
+        // debugInfo)
+        codeBufferPos = masm.position();
+
+        masm.emitComment(" /* HSAIL safepoint bci=" + frameState.debugInfo().getBytecodePosition().getBCI() + ", frameState=" + frameState + " */");
+        String afterSafepointLabel = "@LAfterSafepoint_at_pos_" + codeBufferPos;
+
+        AllocatableValue scratch64 = HSAIL.d16.asValue(Kind.Object);
+        AllocatableValue spAddrReg = HSAIL.d17.asValue(Kind.Object);
+        AllocatableValue scratch32 = HSAIL.s34.asValue(Kind.Int);
+        masm.emitLoadKernelArg(scratch64, masm.getDeoptInfoName(), "u64");
+
+        // Build address of noticeSafepoints field
+        HSAILAddress noticeSafepointsAddr = new HSAILAddressValue(Kind.Object, scratch64, offsetToNoticeSafepoints).toAddress();
+        masm.emitLoad(Kind.Object, spAddrReg, noticeSafepointsAddr);
+
+        // Load int value from that field
+        HSAILAddress noticeSafepointsIntAddr = new HSAILAddressValue(Kind.Int, spAddrReg, 0).toAddress();
+        masm.emitLoadAcquire(scratch32, noticeSafepointsIntAddr);
+        masm.emitCompare(Kind.Int, scratch32, Constant.forInt(0), "eq", false, false);
+        masm.cbr(afterSafepointLabel);
+
+        BytecodeFrame frame = frameState.debugInfo().frame();
+        for (int i = 0; i < frame.numLocals + frame.numStack; i++) {
+            Value val = frame.values[i];
+            if (isLegal(val) && isRegister(val)) {
+                Register reg = asRegister(val);
+                if (val.getKind() == Kind.Object) {
+                    dregOopMap |= 1 << (reg.encoding());
+                }
+            }
+        }
+
+        AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(Kind.Int);
+        AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(Kind.Int);
+        AllocatableValue dregOopMapReg = HSAIL.dregOopMapReg.asValue(Kind.Int);
+        masm.emitMov(Kind.Int, actionAndReasonReg, actionAndReason);
+        masm.emitMov(Kind.Int, codeBufferOffsetReg, Constant.forInt(codeBufferPos));
+        masm.emitMov(Kind.Int, dregOopMapReg, Constant.forInt(dregOopMap));
+        masm.emitJumpToLabelName(masm.getDeoptLabelName());
+
+        masm.emitString0(afterSafepointLabel + ":\n");
+
+        // now record the debuginfo
+        crb.recordInfopoint(codeBufferPos, frameState, InfopointReason.SAFEPOINT);
+    }
+
+    public LIRFrameState getFrameState() {
+        return frameState;
+    }
+
+    public int getCodeBufferPos() {
+        return codeBufferPos;
+    }
+}
\ No newline at end of file
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackend.java	Thu Apr 17 13:17:25 2014 +0200
@@ -339,7 +339,7 @@
         Assembler masm = createAssembler(frameMap);
         PTXFrameContext frameContext = new PTXFrameContext();
         CompilationResultBuilder crb = factory.createBuilder(getCodeCache(), getForeignCalls(), frameMap, masm, frameContext, compilationResult);
-        crb.setFrameSize(0);
+        crb.setTotalFrameSize(0);
         return crb;
     }
 
--- a/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotBackend.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotBackend.java	Thu Apr 17 13:17:25 2014 +0200
@@ -174,7 +174,7 @@
         // On SPARC we always use stack frames.
         HotSpotFrameContext frameContext = new HotSpotFrameContext(stub != null);
         CompilationResultBuilder crb = factory.createBuilder(getProviders().getCodeCache(), getProviders().getForeignCalls(), frameMap, masm, frameContext, compilationResult);
-        crb.setFrameSize(frameMap.frameSize());
+        crb.setTotalFrameSize(frameMap.totalFrameSize());
         StackSlot deoptimizationRescueSlot = gen.getDeoptimizationRescueSlot();
         if (deoptimizationRescueSlot != null && stub == null) {
             crb.compilationResult.setCustomStackAreaOffset(frameMap.offsetForStackSlot(deoptimizationRescueSlot));
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Thu Apr 17 13:17:25 2014 +0200
@@ -1013,11 +1013,13 @@
     @HotSpotVMField(name = "ThreadShadow::_pending_failed_speculation", type = "oop", get = HotSpotVMField.Type.OFFSET) @Stable public int pendingFailedSpeculationOffset;
 
     @HotSpotVMFlag(name = "UseHSAILDeoptimization") @Stable public boolean useHSAILDeoptimization;
+    @HotSpotVMFlag(name = "UseHSAILSafepoints") @Stable public boolean useHSAILSafepoints;
 
     /**
      * Offsets of Hsail deoptimization fields (defined in gpu_hsail.hpp). Used to propagate
      * exceptions from Hsail back to C++ runtime.
      */
+    @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_notice_safepoints", type = "jint*", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailNoticeSafepointsOffset;
     @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_save_states[0]", type = "Hsail::HSAILKernelDeoptimization", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailSaveStatesOffset0;
     @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_save_states[1]", type = "Hsail::HSAILKernelDeoptimization", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailSaveStatesOffset1;
     @HotSpotVMField(name = "Hsail::HSAILDeoptimizationInfo::_deopt_occurred", type = "jint", get = HotSpotVMField.Type.OFFSET) @Stable public int hsailDeoptOccurredOffset;
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/nfi/HotSpotNativeFunctionHandle.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/nfi/HotSpotNativeFunctionHandle.java	Thu Apr 17 13:17:25 2014 +0200
@@ -63,7 +63,7 @@
         assert checkArgs(args);
         try {
             traceCall(args);
-            Object res = code.executeVarargs(args);
+            Object res = code.executeVarargs(args, null, null);
             traceResult(res);
             return res;
         } catch (InvalidInstalledCodeException e) {
--- a/graal/com.oracle.graal.hsail/src/com/oracle/graal/hsail/HSAIL.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.hsail/src/com/oracle/graal/hsail/HSAIL.java	Thu Apr 17 13:17:25 2014 +0200
@@ -132,7 +132,11 @@
     public static final Register d18 = new Register(82, 18, "d18", CPU);
     public static final Register d19 = new Register(83, 19, "d19", CPU);
     public static final Register d20 = new Register(84, 20, "d20", CPU);
+
     public static final Register threadRegister = d20;
+    public static final Register actionAndReasonReg = s32;
+    public static final Register codeBufferOffsetReg = s33;
+    public static final Register dregOopMapReg = s39;
 
     // @formatter:off
     public static final Register[] cRegisters = {
--- a/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILControlFlow.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILControlFlow.java	Thu Apr 17 13:17:25 2014 +0200
@@ -129,11 +129,17 @@
         }
     }
 
+    public interface DeoptimizingOp {
+        public LIRFrameState getFrameState();
+
+        public int getCodeBufferPos();
+    }
+
     /***
      * The ALIVE annotation is so we can get a scratch32 register that does not clobber
      * actionAndReason.
      */
-    public static class DeoptimizeOp extends ReturnOp {
+    public static class DeoptimizeOp extends ReturnOp implements DeoptimizingOp {
 
         @Alive({REG, CONST}) protected Value actionAndReason;
         @State protected LIRFrameState frameState;
@@ -173,6 +179,8 @@
             // debugInfo)
             codeBufferPos = masm.position();
 
+            masm.emitComment("/* HSAIL Deoptimization pos=" + codeBufferPos + ", bci=" + frameState.debugInfo().getBytecodePosition().getBCI() + ", frameState=" + frameState + " */");
+
             // get the bitmap of $d regs that contain references
             ReferenceMap referenceMap = frameState.debugInfo().getReferenceMap();
             for (int dreg = HSAIL.d0.number; dreg <= HSAIL.d15.number; dreg++) {
@@ -181,14 +189,9 @@
                 }
             }
 
-            // here we will by convention use some never-allocated registers to pass to the epilogue
-            // deopt code
-            // todo: define these in HSAIL.java
-            // we need to pass the actionAndReason and the codeBufferPos
-
-            AllocatableValue actionAndReasonReg = HSAIL.s32.asValue(Kind.Int);
-            AllocatableValue codeBufferOffsetReg = HSAIL.s33.asValue(Kind.Int);
-            AllocatableValue dregOopMapReg = HSAIL.s39.asValue(Kind.Int);
+            AllocatableValue actionAndReasonReg = HSAIL.actionAndReasonReg.asValue(Kind.Int);
+            AllocatableValue codeBufferOffsetReg = HSAIL.codeBufferOffsetReg.asValue(Kind.Int);
+            AllocatableValue dregOopMapReg = HSAIL.dregOopMapReg.asValue(Kind.Int);
             masm.emitMov(Kind.Int, actionAndReasonReg, actionAndReason);
             masm.emitMov(Kind.Int, codeBufferOffsetReg, Constant.forInt(codeBufferPos));
             masm.emitMov(Kind.Int, dregOopMapReg, Constant.forInt(dregOopMap));
--- a/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/asm/CompilationResultBuilder.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/asm/CompilationResultBuilder.java	Thu Apr 17 13:17:25 2014 +0200
@@ -88,8 +88,8 @@
         assert frameContext != null;
     }
 
-    public void setFrameSize(int frameSize) {
-        compilationResult.setFrameSize(frameSize);
+    public void setTotalFrameSize(int frameSize) {
+        compilationResult.setTotalFrameSize(frameSize);
     }
 
     private static final CompilationResult.Mark[] NO_REFS = {};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/BinaryLogicNode.java	Thu Apr 17 13:17:25 2014 +0200
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.nodes;
+
+import com.oracle.graal.nodes.extended.*;
+import com.oracle.graal.nodes.spi.*;
+
+public abstract class BinaryLogicNode extends LogicNode implements LIRLowerable, MemoryArithmeticLIRLowerable {
+
+    @Input private ValueNode x;
+    @Input private ValueNode y;
+
+    public ValueNode x() {
+        return x;
+    }
+
+    public ValueNode y() {
+        return y;
+    }
+
+    protected void setX(ValueNode x) {
+        updateUsages(this.x, x);
+        this.x = x;
+    }
+
+    protected void setY(ValueNode y) {
+        updateUsages(this.y, y);
+        this.y = y;
+    }
+
+    public BinaryLogicNode(ValueNode x, ValueNode y) {
+        assert x != null && y != null && x.getKind() == y.getKind();
+        this.x = x;
+        this.y = y;
+    }
+
+    @Override
+    public boolean verify() {
+        assertTrue(x.stamp().isCompatible(y.stamp()), "stamps not compatible: %s, %s", x.stamp(), y.stamp());
+        return super.verify();
+    }
+
+    @Override
+    public void generate(NodeLIRBuilderTool gen) {
+    }
+
+    @Override
+    public boolean generate(MemoryArithmeticLIRLowerer gen, Access access) {
+        return false;
+    }
+}
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/calc/CompareNode.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/calc/CompareNode.java	Thu Apr 17 13:17:25 2014 +0200
@@ -26,8 +26,6 @@
 import com.oracle.graal.graph.*;
 import com.oracle.graal.graph.spi.*;
 import com.oracle.graal.nodes.*;
-import com.oracle.graal.nodes.extended.*;
-import com.oracle.graal.nodes.spi.*;
 
 /* TODO (thomaswue/gdub) For high-level optimization purpose the compare node should be a boolean *value* (it is currently only a helper node)
  * But in the back-end the comparison should not always be materialized (for example in x86 the comparison result will not be in a register but in a flag)
@@ -35,49 +33,32 @@
  * Compare should probably be made a value (so that it can be canonicalized for example) and in later stages some Compare usage should be transformed
  * into variants that do not materialize the value (CompareIf, CompareGuard...)
  */
-public abstract class CompareNode extends LogicNode implements Canonicalizable, LIRLowerable, MemoryArithmeticLIRLowerable {
-
-    @Input private ValueNode x;
-    @Input private ValueNode y;
-
-    public ValueNode x() {
-        return x;
-    }
-
-    public ValueNode y() {
-        return y;
-    }
+public abstract class CompareNode extends BinaryLogicNode implements Canonicalizable {
 
     /**
      * Constructs a new Compare instruction.
-     * 
+     *
      * @param x the instruction producing the first input to the instruction
      * @param y the instruction that produces the second input to this instruction
      */
     public CompareNode(ValueNode x, ValueNode y) {
-        assert x != null && y != null && x.getKind() == y.getKind();
-        this.x = x;
-        this.y = y;
+        super(x, y);
     }
 
     /**
      * Gets the condition (comparison operation) for this instruction.
-     * 
+     *
      * @return the condition
      */
     public abstract Condition condition();
 
     /**
      * Checks whether unordered inputs mean true or false (only applies to float operations).
-     * 
+     *
      * @return {@code true} if unordered inputs produce true
      */
     public abstract boolean unorderedIsTrue();
 
-    @Override
-    public void generate(NodeLIRBuilderTool gen) {
-    }
-
     private LogicNode optimizeConditional(Constant constant, ConditionalNode conditionalNode, ConstantReflectionProvider constantReflection, Condition cond) {
         Constant trueConstant = conditionalNode.trueValue().asConstant();
         Constant falseConstant = conditionalNode.falseValue().asConstant();
@@ -102,16 +83,6 @@
         return this;
     }
 
-    protected void setX(ValueNode x) {
-        updateUsages(this.x, x);
-        this.x = x;
-    }
-
-    protected void setY(ValueNode y) {
-        updateUsages(this.y, y);
-        this.y = y;
-    }
-
     protected LogicNode optimizeNormalizeCmp(Constant constant, NormalizeCompareNode normalizeNode, boolean mirrored) {
         throw new GraalInternalError("NormalizeCompareNode connected to %s (%s %s %s)", this, constant, normalizeNode, mirrored);
     }
@@ -193,14 +164,4 @@
 
         return graph.unique(comparison);
     }
-
-    public boolean generate(MemoryArithmeticLIRLowerer gen, Access access) {
-        return false;
-    }
-
-    @Override
-    public boolean verify() {
-        assertTrue(x.stamp().isCompatible(y.stamp()), "stamps not compatible: %s, %s", x.stamp(), y.stamp());
-        return super.verify();
-    }
 }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/calc/IntegerTestNode.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/calc/IntegerTestNode.java	Thu Apr 17 13:17:25 2014 +0200
@@ -25,8 +25,6 @@
 import com.oracle.graal.graph.*;
 import com.oracle.graal.graph.spi.*;
 import com.oracle.graal.nodes.*;
-import com.oracle.graal.nodes.extended.*;
-import com.oracle.graal.nodes.spi.*;
 import com.oracle.graal.nodes.type.*;
 
 /**
@@ -34,33 +32,16 @@
  * expression "(x &amp; y) == 0", meaning that it will return true if (and only if) no bit is set in
  * both x and y.
  */
-public class IntegerTestNode extends LogicNode implements Canonicalizable, LIRLowerable, MemoryArithmeticLIRLowerable {
-
-    @Input private ValueNode x;
-    @Input private ValueNode y;
-
-    public ValueNode x() {
-        return x;
-    }
-
-    public ValueNode y() {
-        return y;
-    }
+public class IntegerTestNode extends BinaryLogicNode implements Canonicalizable {
 
     /**
      * Constructs a new Test instruction.
-     * 
+     *
      * @param x the instruction producing the first input to the instruction
      * @param y the instruction that produces the second input to this instruction
      */
     public IntegerTestNode(ValueNode x, ValueNode y) {
-        assert x != null && y != null && x.stamp().isCompatible(y.stamp());
-        this.x = x;
-        this.y = y;
-    }
-
-    @Override
-    public void generate(NodeLIRBuilderTool gen) {
+        super(x, y);
     }
 
     @Override
@@ -73,13 +54,10 @@
             IntegerStamp yStamp = (IntegerStamp) y().stamp();
             if ((xStamp.upMask() & yStamp.upMask()) == 0) {
                 return LogicConstantNode.tautology(graph());
+            } else if ((xStamp.downMask() & yStamp.downMask()) != 0) {
+                return LogicConstantNode.contradiction(graph());
             }
         }
         return this;
     }
-
-    @Override
-    public boolean generate(MemoryArithmeticLIRLowerer gen, Access access) {
-        return false;
-    }
 }
--- a/graal/com.oracle.graal.virtual/src/com/oracle/graal/virtual/phases/ea/EffectsPhase.java	Thu Apr 17 13:17:16 2014 +0200
+++ b/graal/com.oracle.graal.virtual/src/com/oracle/graal/virtual/phases/ea/EffectsPhase.java	Thu Apr 17 13:17:25 2014 +0200
@@ -90,7 +90,9 @@
                         listener.getChangedNodes().add(node);
                     }
                 }
-                canonicalizer.applyIncremental(graph, context, listener.getChangedNodes());
+                if (canonicalizer != null) {
+                    canonicalizer.applyIncremental(graph, context, listener.getChangedNodes());
+                }
             }
             changed = true;
         }
--- a/make/linux/makefiles/buildtree.make	Thu Apr 17 13:17:16 2014 +0200
+++ b/make/linux/makefiles/buildtree.make	Thu Apr 17 13:17:25 2014 +0200
@@ -258,6 +258,10 @@
 	echo "$(call gamma-path,altsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/$(OS_FAMILY)/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
+	echo "$(call gamma-path,altsrc,gpu/ptx/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu/ptx/vm)" \\; \
+	echo "$(call gamma-path,altsrc,gpu/hsail/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu/hsail/vm) \\"; \
 	echo "$(call gamma-path,altsrc,gpu) \\"; \
 	echo "$(call gamma-path,commonsrc,gpu)"; \
 	[ -n "$(CFLAGS_BROWSE)" ] && \
--- a/make/linux/makefiles/vm.make	Thu Apr 17 13:17:16 2014 +0200
+++ b/make/linux/makefiles/vm.make	Thu Apr 17 13:17:25 2014 +0200
@@ -185,8 +185,10 @@
 
 GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/share/vm/graal)
 GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/ptx/vm)
+GRAAL_PATHS += $(call altsrc,$(HS_COMMON_SRC)/gpu/hsail/vm)
 GRAAL_PATHS += $(HS_COMMON_SRC)/share/vm/graal
 GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/ptx/vm
+GRAAL_PATHS += $(HS_COMMON_SRC)/gpu/hsail/vm
 
 # Include dirs per type.
 Src_Dirs/CORE      := $(CORE_PATHS)
--- a/mx/mx_graal.py	Thu Apr 17 13:17:16 2014 +0200
+++ b/mx/mx_graal.py	Thu Apr 17 13:17:25 2014 +0200
@@ -1404,7 +1404,16 @@
             f.write(json.dumps(results))
 
 def jmh(args):
-    """run the JMH_BENCHMARKS"""
+    """run the JMH_BENCHMARKS
+
+    The benchmarks are running with the default VM.
+    You can override this with an explicit option.
+    For example:
+
+        mx jmh -server ...
+
+    Will force the benchmarks to be run with the server VM.
+"""
 
     # TODO: add option for `mvn clean package'
 
@@ -1413,7 +1422,7 @@
     benchmarks = [b for b in benchmarksAndJsons if not b.startswith('{')]
     jmhArgJsons = [b for b in benchmarksAndJsons if b.startswith('{')]
 
-    jmhArgs = {'-v' : 'EXTRA' if mx._opts.verbose else 'NORMAL'}
+    jmhArgs = {'-rff' : join(_graal_home, 'mx', 'jmh', 'jmh.out'), '-v' : 'EXTRA' if mx._opts.verbose else 'NORMAL'}
 
     # e.g. '{"-wi" : 20}'
     for j in jmhArgJsons:
@@ -1427,18 +1436,54 @@
             mx.abort('error parsing JSON input: {}"\n{}'.format(j, e))
 
     jmhPath = mx.get_env('JMH_BENCHMARKS', None)
-    if not jmhPath or not exists(jmhPath):
-        mx.abort("$JMH_BENCHMARKS not properly defined: " + str(jmhPath))
+    if not jmhPath:
+        probe = join(dirname(_graal_home), 'java-benchmarks')
+        if exists(probe):
+            jmhPath = probe
+
+    if not jmhPath:
+        mx.abort("Please set the JMH_BENCHMARKS environment variable to point to the java-benchmarks workspace")
+    if not exists(jmhPath):
+        mx.abort("The directory denoted by the JMH_BENCHMARKS environment variable does not exist: " + jmhPath)
+    mx.log('Using benchmarks in ' + jmhPath)
 
-    def _blackhole(x):
-        mx.logv(x[:-1])
+    timestamp = mx.TimeStampFile(join(_graal_home, 'mx', 'jmh', jmhPath.replace(os.sep, '_') + '.timestamp'))
+    buildJmh = False
+    jmhTree = []
+    for root, dirnames, filenames in os.walk(jmhPath):
+        if root == jmhPath:
+            for n in ['.hg', '.metadata']:
+                if n in dirnames:
+                    dirnames.remove(n)
+        jmhTree.append(os.path.relpath(root, jmhPath) + ':')
+        jmhTree = jmhTree + filenames
+        jmhTree.append('')
 
+        files = [join(root, f) for f in filenames]
+        if timestamp.isOlderThan(files):
+            buildJmh = True
 
-    env = os.environ.copy()
-    env['JAVA_HOME'] = _jdk(vmToCheck='graal')
-    env['MAVEN_OPTS'] = '-graal'
-    mx.log("Building benchmarks...")
-    mx.run(['mvn', 'package'], cwd=jmhPath, out=_blackhole, env=env)
+    if not buildJmh:
+        with open(timestamp.path) as fp:
+            oldJmhTree = fp.read().split('\n')
+            if oldJmhTree != jmhTree:
+                import difflib
+                diff = difflib.unified_diff(oldJmhTree, jmhTree)
+                mx.log("Need to rebuild JMH due to change in JMH directory tree indicated by this diff:")
+                mx.log('\n'.join(diff))
+                buildJmh = True
+
+    if buildJmh:
+        def _blackhole(x):
+            mx.logv(x[:-1])
+        env = os.environ.copy()
+        env['JAVA_HOME'] = _jdk(vmToCheck='graal')
+        env['MAVEN_OPTS'] = '-graal'
+        mx.log("Building benchmarks...")
+        mx.run(['mvn', 'package'], cwd=jmhPath, out=_blackhole, env=env)
+        timestamp.touch()
+        with open(timestamp.path, 'w') as fp:
+            fp.write('\n'.join(jmhTree))
 
     matchedSuites = set()
     numBench = [0]
--- a/src/gpu/hsail/vm/gpu_hsail.cpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/gpu/hsail/vm/gpu_hsail.cpp	Thu Apr 17 13:17:25 2014 +0200
@@ -71,6 +71,7 @@
 };
 
 void * Hsail::_device_context = NULL;
+jint   Hsail::_notice_safepoints = false;
 
 Hsail::okra_create_context_func_t  Hsail::_okra_create_context;
 Hsail::okra_create_kernel_func_t   Hsail::_okra_create_kernel;
@@ -122,6 +123,18 @@
 
 static Stats kernelStats;
 
+//static jint in_kernel = 0;
+
+void Hsail::notice_safepoints() {
+  _notice_safepoints = true;
+//  if (TraceGPUInteraction) {
+//    tty->print_cr("[HSAIL] Notice safepoint in_kernel=%d", in_kernel);
+//  }
+}
+
+void Hsail::ignore_safepoints() {
+  _notice_safepoints = false;
+}
 
 void Hsail::register_heap() {
   // After the okra functions are set up and the heap is initialized, register the java heap with HSA
@@ -203,8 +216,7 @@
 }
 
 
-
-jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oops_save,
+jboolean Hsail::execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oops_save, 
                                                 jobject donor_threads, int allocBytesPerWorkitem, TRAPS) {
   ResourceMark rm(THREAD);
   objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args);
@@ -258,6 +270,9 @@
 
   // This object sets up the kernel arguments
   HSAILKernelArguments hka((address) kernel, mh->signature(), argsArray, mh->is_static(), e);
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] range=%d", dimX);
+  }
 
   // if any object passed was null, throw an exception here
   // doing this means the kernel code can avoid null checks on the object parameters.
@@ -273,8 +288,12 @@
   // Run the kernel
   bool success = false;
   {
-    TraceTime t1("execute kernel", TraceGPUInteraction);
+    TraceTime t("execute kernel", TraceGPUInteraction);
+
+    //in_kernel = 1;
+    // Run the kernel
     success = _okra_execute_with_range(kernel, dimX);
+    //in_kernel = 0;
   }
 
   // fix up any tlab tops that overflowed
@@ -297,17 +316,23 @@
   }
 
   if (UseHSAILDeoptimization) {
+    kernelStats.incDeopts();
     // check if any workitem requested a deopt
     // currently we only support at most one such workitem
     int deoptcode = e->deopt_occurred();
-    if (deoptcode != 0) {
-      if (deoptcode != 1) {
+    if (deoptcode != 1) {
+      if (deoptcode == 0) {
+        if (TraceGPUInteraction && _notice_safepoints != 0) {
+          tty->print_cr("[HSAIL] observed safepoint during kernel");
+        }
+      } else {
         // error condition detected in deopt code
         char msg[200];
         sprintf(msg, "deopt error detected, slot for workitem %d was not empty", -1 * (deoptcode + 1));
         guarantee(deoptcode == 1, msg);
       }
-      kernelStats.incDeopts();
+    } else {
+
       {
         TraceTime t3("handle deoptimizing workitems", TraceGPUInteraction);
         if (TraceGPUInteraction) {
@@ -402,48 +427,47 @@
           tty->print_cr("[HSAIL] Deoptimizing to host completed for %d workitems", count_deoptimized);
         }
       }
-
-      {
-        TraceTime t3("handle never-rans", TraceGPUInteraction);
+    }
 
-        // Handle any never_ran workitems if there were any
-        int count_never_ran = 0;
-        bool handleNeverRansHere = true;
-        // turn off verbose trace stuff for javacall arg setup
-        bool savedTraceGPUInteraction = TraceGPUInteraction;
-        TraceGPUInteraction = false;
-        jboolean *never_ran_array = e->never_ran_array();
-        if (handleNeverRansHere) {
-          for (int k = 0; k < dimX; k++) {
-            if (never_ran_array[k]) {
-              // run it as a javaCall
-              KlassHandle methKlass = mh->method_holder();
-              Thread* THREAD = Thread::current();
-              JavaValue result(T_VOID);
-              JavaCallArguments javaArgs;
-              // re-resolve the args_handle here
-              objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args);
-              // This object sets up the javaCall arguments
-              // the way argsArray is set up, this should work for instance methods as well
-              // (the receiver will be the first oop pushed)
-              HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static());
-              if (mh->is_static()) {
-                JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
-              } else {
-                JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
-              }
-              count_never_ran++;
+    // Handle any never_ran workitems if there were any
+    {
+      TraceTime t("handle never-rans ", TraceGPUInteraction);
+      int count_never_ran = 0;
+      bool handleNeverRansHere = true;
+      // turn off verbose trace stuff for javacall arg setup
+      bool savedTraceGPUInteraction = TraceGPUInteraction;
+      TraceGPUInteraction = false;
+      jboolean *never_ran_array = e->never_ran_array();
+      if (handleNeverRansHere) {
+        for (int k = 0; k < dimX; k++) {
+          if (never_ran_array[k]) {
+            // run it as a javaCall
+            KlassHandle methKlass = mh->method_holder();
+            Thread* THREAD = Thread::current();
+            JavaValue result(T_VOID);
+            JavaCallArguments javaArgs;
+            // re-resolve the args_handle here
+            objArrayOop resolvedArgsArray = (objArrayOop) JNIHandles::resolve(args);
+            // This object sets up the javaCall arguments
+            // the way argsArray is set up, this should work for instance methods as well
+            // (the receiver will be the first oop pushed)
+            HSAILJavaCallArguments hjca(&javaArgs, k, mh->signature(), resolvedArgsArray, mh->is_static());
+            if (mh->is_static()) {
+              JavaCalls::call_static(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
+            } else {
+              JavaCalls::call_virtual(&result, methKlass, mh->name(), mh->signature(), &javaArgs, THREAD);
             }
+            count_never_ran++;
           }
-          TraceGPUInteraction = savedTraceGPUInteraction;
-          if (TraceGPUInteraction) {
-            tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran);
-            showRanges(never_ran_array, dimX);
-          }
-        } // end of never-ran handling
-      }
+        }
+        TraceGPUInteraction = savedTraceGPUInteraction;
+        if (TraceGPUInteraction) {
+          tty->print_cr("%d workitems never ran, have been run via JavaCall", count_never_ran);
+          showRanges(never_ran_array, dimX);
+        }
+      } // end of never-ran handling
     }
-    
+
     FREE_C_HEAP_ARRAY(jboolean, e->never_ran_array(), mtInternal);
     delete e;
   }
@@ -547,7 +571,7 @@
   LOOKUP_OKRA_FUNCTION(okra_register_heap, okra_register_heap);
   // if we made it this far, real success
 
-  gpu::initialized_gpu("Okra");
+  Gpu::initialized_gpu(new Hsail());
 
   return true;
 GPU_END
--- a/src/gpu/hsail/vm/gpu_hsail.hpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/gpu/hsail/vm/gpu_hsail.hpp	Thu Apr 17 13:17:25 2014 +0200
@@ -27,11 +27,9 @@
 
 #include "utilities/exceptions.hpp"
 #include "graal/graalEnv.hpp"
-// #include "graal/graalCodeInstaller.hpp"
 #include "gpu_hsail_Frame.hpp"
 
-class Hsail {
-  friend class gpu;
+class Hsail : public Gpu {
 
   public:
   class HSAILKernelDeoptimization {
@@ -58,6 +56,7 @@
   class HSAILDeoptimizationInfo : public ResourceObj {
     friend class VMStructs;
    private:
+    jint* _notice_safepoints;
     jint _deopt_occurred;
     jint _deopt_next_index;
     JavaThread** _donor_threads;
@@ -67,12 +66,12 @@
     HSAILKernelDeoptimization _deopt_save_states[MAX_DEOPT_SAVE_STATES_SIZE];
 
     inline HSAILDeoptimizationInfo() {
+      _notice_safepoints = &Hsail::_notice_safepoints;
       _deopt_occurred = 0;
       _deopt_next_index = 0;
     }
 
     inline jint deopt_occurred() {
-      // Check that hsail did not write in the wrong place
       return _deopt_occurred;
     }
     inline jint num_deopts() { return _deopt_next_index; }
@@ -102,8 +101,8 @@
   static void getNewTlabForDonorThread(ThreadLocalAllocBuffer* tlab, size_t tlabMinHsail);
 
   static jboolean execute_kernel_void_1d_internal(address kernel, int dimX, jobject args, methodHandle& mh, nmethod *nm, jobject oopsSave,
-                                                  jobject donorThreads, int allocBytesPerWorkitem, TRAPS);
-
+                                                  jobject donor_threads, int allocBytesPerWorkitem, TRAPS);
+  
   static void register_heap();
 
   static GraalEnv::CodeInstallResult install_code(Handle& compiled_code, CodeBlob*& cb, Handle installed_code, Handle triggered_deoptimizations);
@@ -113,6 +112,11 @@
   // Registers the implementations for the native methods in HSAILHotSpotBackend
   static bool register_natives(JNIEnv* env);
 
+  virtual const char* name() { return "HSAIL"; }
+
+  virtual void notice_safepoints();
+  virtual void ignore_safepoints();
+
 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
   typedef unsigned long long CUdeviceptr;
 #else
@@ -149,5 +153,8 @@
   
 protected:
   static void* _device_context;
+
+  // true if safepoints are activated
+  static jint _notice_safepoints;
 };
 #endif // GPU_HSAIL_HPP
--- a/src/gpu/hsail/vm/vmStructs_hsail.hpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/gpu/hsail/vm/vmStructs_hsail.hpp	Thu Apr 17 13:17:25 2014 +0200
@@ -41,6 +41,7 @@
   nonstatic_field(Hsail::HSAILKernelDeoptimization, _actionAndReason,                           jint)                                 \
   nonstatic_field(Hsail::HSAILKernelDeoptimization, _first_frame,                               HSAILFrame)                           \
                                                                                                                                       \
+  nonstatic_field(Hsail::HSAILDeoptimizationInfo, _notice_safepoints,                      jint*)                                     \
   nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_occurred,                         jint)                                      \
   nonstatic_field(Hsail::HSAILDeoptimizationInfo, _deopt_next_index,                       jint)                                      \
   nonstatic_field(Hsail::HSAILDeoptimizationInfo, _donor_threads,                          JavaThread**)                              \
--- a/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.cpp	Thu Apr 17 13:17:25 2014 +0200
@@ -271,7 +271,7 @@
     tty->print_cr("[CUDA] Success: Created context for device: %d", _cu_device);
   }
 
-  gpu::initialized_gpu(device_name);
+  Gpu::initialized_gpu(new Ptx());
 
   return true;
 GPU_END
--- a/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/gpu/ptx/vm/gpu_ptx.hpp	Thu Apr 17 13:17:25 2014 +0200
@@ -93,7 +93,7 @@
 
 #define GRAAL_SUPPORTED_COMPUTE_CAPABILITY_VERSION 3.0
 
-class Ptx {
+class Ptx : public Gpu {
   friend class PtxCall;
 
 private:
@@ -120,6 +120,9 @@
   static int ncores(int major, int minor);
 
 public:
+
+  virtual const char* name() { return "PTX"; }
+
   // Registers the implementations for the native methods in PTXHotSpotBackend
   static bool register_natives(JNIEnv* env);
 
--- a/src/os/bsd/vm/gpu_bsd.cpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/os/bsd/vm/gpu_bsd.cpp	Thu Apr 17 13:17:25 2014 +0200
@@ -27,7 +27,7 @@
 #include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"
 
-jobject gpu::probe_gpus(JNIEnv* env) {
+jobject Gpu::probe_gpus(JNIEnv* env) {
 #ifdef __APPLE__
   /*
    * Let the CUDA driver initialization be the gate to GPU for now, pending
--- a/src/os/linux/vm/gpu_linux.cpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/os/linux/vm/gpu_linux.cpp	Thu Apr 17 13:17:25 2014 +0200
@@ -38,7 +38,7 @@
 
 #define PCI_DRIVER_NAME_START_POS 255
 
-jobject gpu::probe_gpus(JNIEnv* env) {
+jobject Gpu::probe_gpus(JNIEnv* env) {
   bool hsail = false;
   bool ptx = false;
 
--- a/src/os/windows/vm/gpu_windows.cpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/os/windows/vm/gpu_windows.cpp	Thu Apr 17 13:17:25 2014 +0200
@@ -27,7 +27,7 @@
 #include "hsail/vm/gpu_hsail.hpp"
 #include "utilities/ostream.hpp"
 
-jobject gpu::probe_gpus(JNIEnv* env) {
+jobject Gpu::probe_gpus(JNIEnv* env) {
   // TODO: add detection of PTX/NVidia
   if (Hsail::register_natives(env)) {
     return env->NewStringUTF("HSAIL");
--- a/src/share/vm/graal/graalCodeInstaller.cpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/share/vm/graal/graalCodeInstaller.cpp	Thu Apr 17 13:17:25 2014 +0200
@@ -455,8 +455,7 @@
 
   _code = (arrayOop) CompilationResult::targetCode(comp_result);
   _code_size = CompilationResult::targetCodeSize(comp_result);
-  // The frame size we get from the target method does not include the return address, so add one word for it here.
-  _total_frame_size = CompilationResult::frameSize(comp_result) + HeapWordSize;  // FIXME this is an x86-ism
+  _total_frame_size = CompilationResult::totalFrameSize(comp_result);
   _custom_stack_area_offset = CompilationResult::customStackAreaOffset(comp_result);
 
   // Pre-calculate the constants section size.  This is required for PC-relative addressing.
--- a/src/share/vm/graal/graalCompilerToVM.cpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/share/vm/graal/graalCompilerToVM.cpp	Thu Apr 17 13:17:25 2014 +0200
@@ -757,7 +757,7 @@
 
 C2V_ENTRY(jobject, getGPUs, (JNIEnv *env, jobject))
 #if defined(TARGET_OS_FAMILY_bsd) || defined(TARGET_OS_FAMILY_linux) || defined(TARGET_OS_FAMILY_windows)
-  return gpu::probe_gpus(env);
+  return Gpu::probe_gpus(env);
 #else
   return env->NewStringUTF("");
 #endif
--- a/src/share/vm/graal/graalGlobals.hpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/share/vm/graal/graalGlobals.hpp	Thu Apr 17 13:17:25 2014 +0200
@@ -88,6 +88,9 @@
   product(bool, UseHSAILDeoptimization, true,                               \
           "Code gen and runtime support for deoptimizing HSAIL kernels")    \
                                                                             \
+  product(bool, UseHSAILSafepoints, true,                                   \
+          "Code gen and runtime support for safepoints in HSAIL kernels")   \
+                                                                            \
   product(bool, GPUOffload, false,                                          \
           "Offload execution to GPU whenever possible")                     \
                                                                             \
--- a/src/share/vm/graal/graalJavaAccess.hpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/share/vm/graal/graalJavaAccess.hpp	Thu Apr 17 13:17:25 2014 +0200
@@ -116,7 +116,7 @@
     long_field(ExternalCompilationResult, entryPoint)                                                                                                          \
   end_class                                                                                                                                                    \
   start_class(CompilationResult)                                                                                                                               \
-    int_field(CompilationResult, frameSize)                                                                                                                    \
+    int_field(CompilationResult, totalFrameSize)                                                                                                               \
     int_field(CompilationResult, customStackAreaOffset)                                                                                                        \
     oop_field(CompilationResult, targetCode, "[B")                                                                                                             \
     oop_field(CompilationResult, assumptions, "Lcom/oracle/graal/api/code/Assumptions;")                                                                       \
--- a/src/share/vm/runtime/gpu.cpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/share/vm/runtime/gpu.cpp	Thu Apr 17 13:17:25 2014 +0200
@@ -26,11 +26,24 @@
 #include "runtime/gpu.hpp"
 #include "runtime/handles.hpp"
 
-int gpu::_initialized_gpus = 0;
+int  Gpu::_initialized_gpus_count = 0;
+Gpu* Gpu::_initialized_gpus[MAX_GPUS];
 
-void gpu::initialized_gpu(const char* name) {
-    _initialized_gpus++;
-    if (TraceGPUInteraction) {
-      tty->print_cr("[GPU] registered initialization of %s (total initialized: %d)", name, _initialized_gpus);
+void Gpu::initialized_gpu(Gpu* gpu) {
+  // GPUs are always initialized on the same thread so no need for locking
+  guarantee(_initialized_gpus_count < MAX_GPUS, "oob");
+  if (TraceGPUInteraction) {
+    tty->print_cr("[GPU] registered initialization of %s (total initialized: %d)", gpu->name(), _initialized_gpus);
+  }
+  _initialized_gpus[_initialized_gpus_count++] = gpu;
+}
+
+void Gpu::safepoint_event(SafepointEvent event) {
+  for (int i = 0; i < _initialized_gpus_count; i++) {
+    if (event == SafepointBegin) {
+      _initialized_gpus[i]->notice_safepoints();
+    } else {
+      _initialized_gpus[i]->ignore_safepoints();
     }
+  }
 }
--- a/src/share/vm/runtime/gpu.hpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/share/vm/runtime/gpu.hpp	Thu Apr 17 13:17:25 2014 +0200
@@ -29,21 +29,41 @@
 #include "oops/symbol.hpp"
 #include "utilities/array.hpp"
 
+#define MAX_GPUS 2
+
 // Defines the interface to the graphics processor(s).
-class gpu : AllStatic {
+class Gpu {
  private:
-  static int _initialized_gpus;  // number of initialize GPU devices
+  static int _initialized_gpus_count;
+  static Gpu* _initialized_gpus[MAX_GPUS];
 
  public:
 
   // Notification of a GPU device that has been initialized.
-  static void initialized_gpu(const char* name);
+  static void initialized_gpu(Gpu* gpu);
 
   // Gets a comma separated list of supported GPU architecture names.
   static jobject probe_gpus(JNIEnv* env);
   
   // Gets the number of GPU devices that have been initialized.
-  static int initialized_gpus() { return _initialized_gpus; }
+  static int initialized_gpus() { return _initialized_gpus_count; }
+
+  enum SafepointEvent {
+    SafepointBegin,
+    SafepointEnd
+  };
+
+  // Called when a safepoint has been activated.
+  static void safepoint_event(SafepointEvent event);
+
+  // Name of this GPU
+  virtual const char* name() = 0;
+
+  // Called when a safepoint has been activated.
+  virtual void notice_safepoints() {};
+
+  // Called when a safepoint has been deactivated.
+  virtual void ignore_safepoints() {};
 };
 
 #endif // SHARE_VM_RUNTIME_GPU_HPP
--- a/src/share/vm/runtime/safepoint.cpp	Thu Apr 17 13:17:16 2014 +0200
+++ b/src/share/vm/runtime/safepoint.cpp	Thu Apr 17 13:17:25 2014 +0200
@@ -39,6 +39,7 @@
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/frame.inline.hpp"
+#include "runtime/gpu.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/osThread.hpp"
@@ -205,6 +206,12 @@
     os::make_polling_page_unreadable();
   }
 
+#ifdef GRAAL
+  if (UseHSAILSafepoints) {
+    Gpu::safepoint_event(Gpu::SafepointBegin);
+  }
+#endif
+  
   // Consider using active_processor_count() ... but that call is expensive.
   int ncpus = os::processor_count() ;
 
@@ -438,6 +445,12 @@
   // Remove safepoint check from interpreter
   Interpreter::ignore_safepoints();
 
+#ifdef GRAAL
+  if (UseHSAILSafepoints) {
+    Gpu::safepoint_event(Gpu::SafepointEnd);
+  }
+#endif
+
   {
     MutexLocker mu(Safepoint_lock);