changeset 12753:53f8adcbc474

Merge.
author Christian Humer <christian.humer@gmail.com>
date Mon, 11 Nov 2013 21:40:18 +0100
parents 71991b7a0f14 (current diff) d61e3ca7c89b (diff)
children 7311354f5bf8
files
diffstat 69 files changed, 1725 insertions(+), 254 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.api.meta/src/com/oracle/graal/api/meta/MetaAccessProvider.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.api.meta/src/com/oracle/graal/api/meta/MetaAccessProvider.java	Mon Nov 11 21:40:18 2013 +0100
@@ -74,7 +74,7 @@
      * 
      * @return the encoded value as an integer
      */
-    Constant encodeDeoptActionAndReason(DeoptimizationAction action, DeoptimizationReason reason, short speculationId);
+    Constant encodeDeoptActionAndReason(DeoptimizationAction action, DeoptimizationReason reason, int speculationId);
 
     DeoptimizationReason decodeDeoptReason(Constant constant);
 
--- a/graal/com.oracle.graal.asm.amd64/src/com/oracle/graal/asm/amd64/AMD64AsmOptions.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.asm.amd64/src/com/oracle/graal/asm/amd64/AMD64AsmOptions.java	Mon Nov 11 21:40:18 2013 +0100
@@ -23,11 +23,9 @@
 package com.oracle.graal.asm.amd64;
 
 public class AMD64AsmOptions {
-
-    public static int Atomics = 0;
-    public static boolean UseNormalNop = false;
-    public static boolean UseAddressNop = true;
-    public static boolean UseIncDec = true;
-    public static boolean UseXmmLoadAndClearUpper = true;
-    public static boolean UseXmmRegToRegMoveAll = true;
+    public static final boolean UseNormalNop = false;
+    public static final boolean UseAddressNop = true;
+    public static final boolean UseIncDec = true;
+    public static final boolean UseXmmLoadAndClearUpper = true;
+    public static final boolean UseXmmRegToRegMoveAll = true;
 }
--- a/graal/com.oracle.graal.asm.amd64/src/com/oracle/graal/asm/amd64/AMD64Assembler.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.asm.amd64/src/com/oracle/graal/asm/amd64/AMD64Assembler.java	Mon Nov 11 21:40:18 2013 +0100
@@ -524,26 +524,10 @@
     // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
     // The ZF is set if the compared values were equal, and cleared otherwise.
     public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
-        if ((Atomics & 2) != 0) {
-            // caveat: no instructionmark, so this isn't relocatable.
-            // Emit a synthetic, non-atomic, CAS equivalent.
-            // Beware. The synthetic form sets all ICCs, not just ZF.
-            // cmpxchg r,[m] is equivalent to X86.rax, = CAS (m, X86.rax, r)
-            cmpl(rax, adr);
-            movl(rax, adr);
-            if (reg.equals(rax)) {
-                Label l = new Label();
-                jccb(ConditionFlag.NotEqual, l);
-                movl(adr, reg);
-                bind(l);
-            }
-        } else {
-
-            prefix(adr, reg);
-            emitByte(0x0F);
-            emitByte(0xB1);
-            emitOperandHelper(reg, adr);
-        }
+        prefix(adr, reg);
+        emitByte(0x0F);
+        emitByte(0xB1);
+        emitOperandHelper(reg, adr);
     }
 
     public final void cvtsd2ss(Register dst, AMD64Address src) {
@@ -860,12 +844,7 @@
     }
 
     public final void lock() {
-        if ((Atomics & 1) != 0) {
-            // Emit either nothing, a NOP, or a NOP: prefix
-            emitByte(0x90);
-        } else {
-            emitByte(0xF0);
-        }
+        emitByte(0xF0);
     }
 
     public final void movapd(Register dst, Register src) {
--- a/graal/com.oracle.graal.asm.hsail/src/com/oracle/graal/asm/hsail/HSAILAssembler.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.asm.hsail/src/com/oracle/graal/asm/hsail/HSAILAssembler.java	Mon Nov 11 21:40:18 2013 +0100
@@ -25,6 +25,7 @@
 
 import com.oracle.graal.api.code.*;
 
+import static com.oracle.graal.api.code.MemoryBarriers.*;
 import static com.oracle.graal.api.code.ValueUtil.*;
 
 import com.oracle.graal.api.meta.*;
@@ -108,6 +109,23 @@
         emitString(instr + " " + HSAIL.mapRegister(reg) + ", " + mapAddress(addr) + ";");
     }
 
+    /**
+     * Emits a memory barrier instruction.
+     * 
+     * @param barriers the kind of barrier to emit
+     */
+    public final void emitMembar(int barriers) {
+        if (barriers == 0) {
+            emitString("// no barrier before volatile read");
+        } else if (barriers == JMM_POST_VOLATILE_READ) {
+            emitString("sync; // barriers=" + MemoryBarriers.barriersString(barriers));
+        } else if (barriers == JMM_PRE_VOLATILE_WRITE) {
+            emitString("sync; // barriers=" + MemoryBarriers.barriersString(barriers));
+        } else if (barriers == JMM_POST_VOLATILE_WRITE) {
+            emitString("sync; // barriers=" + MemoryBarriers.barriersString(barriers));
+        }
+    }
+
     public final void emitLoad(Kind kind, Value dest, HSAILAddress addr) {
         emitLoad(dest, addr, getArgTypeFromKind(kind));
     }
@@ -242,6 +260,17 @@
         emitString(prefix + destType + "_" + srcType + " " + HSAIL.mapRegister(dest) + ", " + HSAIL.mapRegister(src) + ";");
     }
 
+    /**
+     * Emits a convert instruction that uses unsigned prefix, regardless of the type of dest and
+     * src.
+     * 
+     * @param dest the destination operand
+     * @param src the source operand
+     */
+    public void emitConvertForceUnsigned(Value dest, Value src) {
+        emitString("cvt_" + getArgTypeForceUnsigned(dest) + "_" + getArgTypeForceUnsigned(src) + " " + HSAIL.mapRegister(dest) + ", " + HSAIL.mapRegister(src) + ";");
+    }
+
     public static String mapAddress(HSAILAddress addr) {
         return "[$d" + addr.getBase().encoding() + " + " + addr.getDisplacement() + "]";
     }
@@ -427,6 +456,20 @@
     }
 
     /**
+     * Emits an atomic_cas_global instruction.
+     * 
+     * @param result result operand that gets the original contents of the memory location
+     * @param address the memory location
+     * @param cmpValue the value that will be compared against the memory location
+     * @param newValue the new value that will be written to the memory location if the cmpValue
+     *            comparison matches
+     */
+    public void emitAtomicCas(AllocatableValue result, HSAILAddress address, AllocatableValue cmpValue, AllocatableValue newValue) {
+        emitString(String.format("atomic_cas_global_b%d   %s, %s, %s, %s;", getArgSize(cmpValue), HSAIL.mapRegister(result), mapAddress(address), HSAIL.mapRegister(cmpValue),
+                        HSAIL.mapRegister(newValue)));
+    }
+
+    /**
      * Emits a comment. Useful for debugging purposes.
      * 
      * @param comment
--- a/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/GraalKernelTester.java	Mon Nov 11 21:40:18 2013 +0100
@@ -32,8 +32,11 @@
 import java.io.*;
 import java.lang.reflect.*;
 
+import com.oracle.graal.api.code.*;
+import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.hotspot.hsail.*;
+import com.oracle.graal.hotspot.meta.*;
 import com.oracle.graal.options.*;
 
 public abstract class GraalKernelTester extends KernelTester {
@@ -43,9 +46,9 @@
     private boolean saveInFile = false;
 
     @Override
-    public String getCompiledHSAILSource(Method testMethod) {
+    public String getCompiledHSAILSource(Method method) {
         if (hsailCompResult == null) {
-            hsailCompResult = HSAILCompilationResult.getHSAILCompilationResult(testMethod);
+            hsailCompResult = HSAILCompilationResult.getHSAILCompilationResult(method);
         }
         String hsailSource = hsailCompResult.getHSAILCode();
         if (showHsailSource) {
@@ -78,6 +81,58 @@
         return (canGenerateCalls && canExecuteCalls);
     }
 
+    @Override
+    protected void dispatchLambdaMethodKernelOkra(int range, MyIntConsumer consumer) {
+        HSAILCompilationResult hcr = HSAILCompilationResult.getCompiledLambda(consumer.getClass());
+        HotSpotNmethod code = (HotSpotNmethod) hcr.getInstalledCode();
+
+        logger.info("To determine parameters to pass to hsail kernel, we will examine   " + consumer.getClass());
+        Field[] fields = consumer.getClass().getDeclaredFields();
+        Object[] args = new Object[fields.length];
+        int argIndex = 0;
+        for (Field f : fields) {
+            logger.info("... " + f);
+            args[argIndex++] = getFieldFromObject(f, consumer);
+        }
+
+        if (code != null) {
+            try {
+                // No return value from HSAIL kernels
+                code.executeParallel(range, 0, 0, args);
+            } catch (InvalidInstalledCodeException e) {
+                Debug.log("WARNING:Invalid installed code: " + e);
+                e.printStackTrace();
+            }
+        }
+    }
+
+    @Override
+    protected void dispatchMethodKernelOkra(int range, Object... args) {
+        Object[] fixedArgs = fixArgTypes(args);
+
+        HSAILCompilationResult hcr = HSAILCompilationResult.getHSAILCompilationResult(testMethod);
+        HotSpotNmethod code = (HotSpotNmethod) hcr.getInstalledCode();
+
+        if (code != null) {
+            try {
+                if (Modifier.isStatic(testMethod.getModifiers())) {
+                    code.executeParallel(range, 0, 0, fixedArgs);
+                } else {
+                    // If it is a non-static method we have to push "this" as the first argument.
+                    Object[] newFixedArgs = new Object[fixedArgs.length + 1];
+                    System.arraycopy(fixedArgs, 0, newFixedArgs, 1, fixedArgs.length);
+                    newFixedArgs[0] = this;
+                    code.executeParallel(range, 0, 0, newFixedArgs);
+                }
+            } catch (InvalidInstalledCodeException e) {
+                Debug.log("WARNING:Invalid installed code: " + e);
+                e.printStackTrace();
+            }
+        } else {
+            super.dispatchMethodKernelOkra(range, args);
+        }
+    }
+
     public static OptionValue<?> getOptionFromField(Class declaringClass, String fieldName) {
         try {
             Field f = declaringClass.getDeclaredField(fieldName);
--- a/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/KernelTester.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.compiler.hsail.test.infra/src/com/oracle/graal/compiler/hsail/test/infra/KernelTester.java	Mon Nov 11 21:40:18 2013 +0100
@@ -72,7 +72,7 @@
     public DispatchMode dispatchMode;
     // Where the hsail comes from.
     private HsailMode hsailMode;
-    private Method testMethod;
+    protected Method testMethod;
     // What type of okra dispatch to use when client calls.
     private boolean useLambdaMethod;
     private Class<?>[] testMethodParams = null;
@@ -581,7 +581,7 @@
         }
     }
 
-    private void dispatchMethodKernelOkra(int range, Object... args) {
+    protected void dispatchMethodKernelOkra(int range, Object... args) {
         Object[] fixedArgs = fixArgTypes(args);
         if (Modifier.isStatic(testMethod.getModifiers())) {
             dispatchKernelOkra(range, fixedArgs);
@@ -598,7 +598,7 @@
      * For primitive arg parameters, make sure arg types are cast to whatever the testMethod
      * signature says they should be.
      */
-    private Object[] fixArgTypes(Object[] args) {
+    protected Object[] fixArgTypes(Object[] args) {
         Object[] fixedArgs = new Object[args.length];
         for (int i = 0; i < args.length; i++) {
             Class<?> paramClass = testMethodParams[i];
@@ -644,7 +644,7 @@
      * the lambda method itself as opposed to the wrapper that calls the lambda method. From the
      * consumer object, we need to find the fields and pass them to the kernel.
      */
-    private void dispatchLambdaMethodKernelOkra(int range, MyIntConsumer consumer) {
+    protected void dispatchLambdaMethodKernelOkra(int range, MyIntConsumer consumer) {
         logger.info("To determine parameters to pass to hsail kernel, we will examine   " + consumer.getClass());
         Field[] fields = consumer.getClass().getDeclaredFields();
         Object[] args = new Object[fields.length];
@@ -747,7 +747,7 @@
         newInstance().compareOkraToSeq(HsailMode.INJECT_OCL);
     }
 
-    private static Object getFieldFromObject(Field f, Object fromObj) {
+    protected static Object getFieldFromObject(Field f, Object fromObj) {
         try {
             f.setAccessible(true);
             Type type = f.getType();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/AtomicIntGetAndAddTest.java	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.compiler.hsail.test;
+
+import java.util.*;
+import java.util.concurrent.atomic.*;
+
+import org.junit.*;
+
+import sun.misc.*;
+
+import com.oracle.graal.compiler.hsail.test.infra.*;
+
+/**
+ * Tests {@link AtomicInteger#getAndAdd(int)} which indirectly tests
+ * {@link Unsafe#compareAndSwapInt(Object, long, int, int)}.
+ */
+public class AtomicIntGetAndAddTest extends GraalKernelTester {
+
+    static final int NUM = 20;
+    @Result public int[] outArray = new int[NUM];
+    AtomicInteger atomicInt = new AtomicInteger();
+
+    void setupArrays() {
+        for (int i = 0; i < NUM; i++) {
+            outArray[i] = -i;
+        }
+    }
+
+    @Override
+    public void runTest() {
+        setupArrays();
+
+        dispatchMethodKernel(NUM);
+
+        // note: the actual order of entries in outArray is not predictable
+        // thus we sort before we compare results
+        Arrays.sort(outArray);
+    }
+
+    public void run(int gid) {
+        outArray[gid] = atomicInt.getAndAdd(0x7);
+    }
+
+    @Test
+    public void test() {
+        testGeneratedHsail();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/AtomicLongGetAndAddTest.java	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.compiler.hsail.test;
+
+import java.util.*;
+import java.util.concurrent.atomic.*;
+
+import org.junit.*;
+
+import sun.misc.*;
+
+import com.oracle.graal.compiler.hsail.test.infra.*;
+
+/**
+ * Tests {@link AtomicLong#getAndAdd(long)} which indirectly tests
+ * {@link Unsafe#compareAndSwapLong(Object, long, long, long)}.
+ */
+public class AtomicLongGetAndAddTest extends GraalKernelTester {
+
+    static final int NUM = 20;
+    @Result public long[] outArray = new long[NUM];
+    AtomicLong atomicLong = new AtomicLong();
+
+    void setupArrays() {
+        for (int i = 0; i < NUM; i++) {
+            outArray[i] = -i;
+        }
+    }
+
+    @Override
+    public void runTest() {
+        setupArrays();
+
+        dispatchMethodKernel(NUM);
+
+        // note: the actual order of entries in outArray is not predictable
+        // thus we sort before we compare results
+        Arrays.sort(outArray);
+    }
+
+    public void run(int gid) {
+        outArray[gid] = atomicLong.getAndAdd(0x7);
+    }
+
+    @Test
+    public void test() {
+        testGeneratedHsail();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/AtomicReferenceGetAndSetTest.java	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.compiler.hsail.test;
+
+import java.util.concurrent.atomic.*;
+
+import org.junit.*;
+
+import sun.misc.*;
+
+import com.oracle.graal.compiler.hsail.test.infra.*;
+import com.oracle.graal.debug.*;
+
+/**
+ * Tests {@link AtomicReference#getAndSet(Object)} which indirectly tests
+ * {@link Unsafe#compareAndSwapObject(Object, long, Object, Object)}. The latter requires special
+ * handling if compressed oops are enabled.
+ */
+public class AtomicReferenceGetAndSetTest extends GraalKernelTester {
+
+    static final int NUM = 20;
+    @Result public int[] followedCount = new int[NUM];
+    public MyObj[] inArray = new MyObj[NUM];
+    AtomicReference<MyObj> atomicRef = new AtomicReference<>();
+
+    public static class MyObj {
+        public int val;
+        public boolean[] followedBy = new boolean[NUM + 1];
+
+        MyObj(int n) {
+            val = n;
+        }
+    }
+
+    void setupArrays() {
+        for (int i = 0; i < NUM; i++) {
+            inArray[i] = new MyObj(i + 1);
+        }
+        atomicRef.set(new MyObj(0)); // initial value
+    }
+
+    private static final boolean DEBUG = false;
+
+    @Override
+    public void runTest() {
+        setupArrays();
+
+        dispatchMethodKernel(NUM);
+
+        // make a fake followedBy for the final object
+        MyObj finalObj = atomicRef.get();
+        finalObj.followedBy[0] = true;
+
+        // When the kernel is done, compute the number of true bits in each followedBy array;
+        for (int i = 0; i < NUM; i++) {
+            MyObj obj = inArray[i];
+            int count = 0;
+            for (int j = 0; j < NUM + 1; j++) {
+                boolean b = obj.followedBy[j];
+                if (b) {
+                    count++;
+                    if (DEBUG) {
+                        TTY.println("obj " + obj.val + " was followed by " + j);
+                    }
+                }
+
+            }
+            followedCount[i] = count;
+        }
+    }
+
+    public void run(int gid) {
+        MyObj newObj = inArray[gid];
+        MyObj oldObj = atomicRef.getAndSet(newObj);
+        oldObj.followedBy[newObj.val] = true;
+    }
+
+    @Test
+    public void test() {
+        testGeneratedHsail();
+    }
+}
--- a/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILLIRGenerator.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/HSAILLIRGenerator.java	Mon Nov 11 21:40:18 2013 +0100
@@ -37,6 +37,7 @@
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.StandardOp.JumpOp;
 import com.oracle.graal.lir.hsail.*;
+import com.oracle.graal.lir.hsail.HSAILArithmetic.ConvertOp;
 import com.oracle.graal.lir.hsail.HSAILArithmetic.Op1Stack;
 import com.oracle.graal.lir.hsail.HSAILArithmetic.Op2Reg;
 import com.oracle.graal.lir.hsail.HSAILArithmetic.Op2Stack;
@@ -47,11 +48,11 @@
 import com.oracle.graal.lir.hsail.HSAILControlFlow.FloatCondMoveOp;
 import com.oracle.graal.lir.hsail.HSAILControlFlow.ReturnOp;
 import com.oracle.graal.lir.hsail.HSAILMove.LeaOp;
+import com.oracle.graal.lir.hsail.HSAILMove.MembarOp;
 import com.oracle.graal.lir.hsail.HSAILMove.MoveFromRegOp;
 import com.oracle.graal.lir.hsail.HSAILMove.MoveToRegOp;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.calc.*;
-import com.oracle.graal.nodes.java.*;
 import com.oracle.graal.phases.util.*;
 
 /**
@@ -170,7 +171,7 @@
         append(new JumpOp(label));
     }
 
-    private static HSAILCompare mapKindToCompareOp(Kind kind) {
+    protected static HSAILCompare mapKindToCompareOp(Kind kind) {
         switch (kind) {
             case Int:
                 return ICMP;
@@ -612,7 +613,8 @@
 
     @Override
     public void emitMembar(int barriers) {
-        throw GraalInternalError.unimplemented();
+        int necessaryBarriers = target().arch.requiredBarriers(barriers);
+        append(new MembarOp(necessaryBarriers));
     }
 
     @Override
@@ -702,11 +704,6 @@
     }
 
     @Override
-    public void visitCompareAndSwap(LoweredCompareAndSwapNode node, Value address) {
-        throw GraalInternalError.unimplemented();
-    }
-
-    @Override
     public void visitBreakpointNode(BreakpointNode node) {
         throw GraalInternalError.unimplemented();
     }
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/alloc/LinearScan.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/alloc/LinearScan.java	Mon Nov 11 21:40:18 2013 +0100
@@ -527,7 +527,7 @@
         Interval interval;
         interval = createUnhandledLists(mustStoreAtDefinition, null).first;
         if (DetailedAsserts.getValue()) {
-            checkIntervals(interval, getTraceLevel());
+            checkIntervals(interval);
         }
 
         LIRInsertionBuffer insertionBuffer = new LIRInsertionBuffer();
@@ -596,7 +596,7 @@
         assert interval == Interval.EndMarker : "missed an interval";
     }
 
-    private static void checkIntervals(Interval interval, int traceLevel) {
+    private void checkIntervals(Interval interval) {
         Interval prev = null;
         Interval temp = interval;
         while (temp != Interval.EndMarker) {
--- a/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/internal/DebugValueMap.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/internal/DebugValueMap.java	Mon Nov 11 21:40:18 2013 +0100
@@ -56,7 +56,9 @@
     }
 
     public void reset() {
-        Arrays.fill(values, 0L);
+        if (values != null) {
+            Arrays.fill(values, 0L);
+        }
         if (children != null) {
             for (DebugValueMap child : children) {
                 child.reset();
--- a/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/Node.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/Node.java	Mon Nov 11 21:40:18 2013 +0100
@@ -745,12 +745,12 @@
         }
         if (recordsUsages()) {
             for (Node usage : usages()) {
-                assertFalse(usage.isDeleted(), "usage must never be deleted");
+                assertFalse(usage.isDeleted(), "usage %s must never be deleted", usage);
                 assertTrue(usage.inputs().contains(this), "missing input in usage %s", usage);
             }
         }
         if (predecessor != null) {
-            assertFalse(predecessor.isDeleted(), "predecessor must never be deleted");
+            assertFalse(predecessor.isDeleted(), "predecessor %s must never be deleted", predecessor);
             assertTrue(predecessor.successors().contains(this), "missing successor in predecessor %s", predecessor);
         }
         return true;
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/ForEachToGraal.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/ForEachToGraal.java	Mon Nov 11 21:40:18 2013 +0100
@@ -23,20 +23,10 @@
 
 package com.oracle.graal.hotspot.hsail;
 
-import java.lang.reflect.*;
-
-import com.amd.okra.*;
 import com.oracle.graal.api.code.*;
-import com.oracle.graal.api.meta.*;
 import com.oracle.graal.compiler.hsail.*;
 import com.oracle.graal.debug.*;
-import com.oracle.graal.graph.*;
-import com.oracle.graal.graph.iterators.*;
 import com.oracle.graal.hotspot.meta.*;
-import com.oracle.graal.java.*;
-import com.oracle.graal.nodes.*;
-import com.oracle.graal.nodes.java.*;
-import com.oracle.graal.phases.*;
 
 /**
  * Implements compile and dispatch of Java code containing lambda constructs. Currently only used by
@@ -44,78 +34,41 @@
  */
 public class ForEachToGraal implements CompileAndDispatch {
 
-    private static CompilationResult getCompiledLambda(Class consumerClass) {
-        /**
-         * Find the accept() method in the IntConsumer, then use Graal API to find the target lambda
-         * that accept will call.
-         */
-        Method[] icMethods = consumerClass.getMethods();
-        Method acceptMethod = null;
-        for (Method m : icMethods) {
-            if (m.getName().equals("accept") && acceptMethod == null) {
-                acceptMethod = m;
-            }
-        }
-        HotSpotProviders providers = HSAILCompilationResult.backend.getProviders();
-        MetaAccessProvider metaAccess = providers.getMetaAccess();
-        ResolvedJavaMethod method = metaAccess.lookupJavaMethod(acceptMethod);
-        StructuredGraph graph = new StructuredGraph(method);
-        ForeignCallsProvider foreignCalls = providers.getForeignCalls();
-        new GraphBuilderPhase(metaAccess, foreignCalls, GraphBuilderConfiguration.getEagerDefault(), OptimisticOptimizations.ALL).apply(graph);
-        NodeIterable<Node> nin = graph.getNodes();
-        ResolvedJavaMethod lambdaMethod = null;
-        for (Node n : nin) {
-            if (n instanceof MethodCallTargetNode) {
-                lambdaMethod = ((MethodCallTargetNode) n).targetMethod();
-                Debug.log("target ... " + lambdaMethod);
-                break;
-            }
-        }
-        if (lambdaMethod == null) {
-            // Did not find call in Consumer.accept.
-            Debug.log("Should not Reach here, did not find call in accept()");
-            return null;
-        }
-        // Now that we have the target lambda, compile it.
-        HSAILCompilationResult hsailCompResult = HSAILCompilationResult.getHSAILCompilationResult(lambdaMethod);
-        if (hsailCompResult != null) {
-            hsailCompResult.dumpCompilationResult();
-        }
-        return hsailCompResult;
+    private static HSAILCompilationResult getCompiledLambda(Class consumerClass) {
+        return HSAILCompilationResult.getCompiledLambda(consumerClass);
     }
 
     // Implementations of the CompileAndDispatch interface.
     @Override
     public Object createKernel(Class<?> consumerClass) {
         try {
-            CompilationResult result = getCompiledLambda(consumerClass);
-            if (result != null) {
-                String code = new String(new String(result.getTargetCode(), 0, result.getTargetCodeSize()));
-                OkraContext okraContext = new OkraContext();
-                OkraKernel okraKernel = new OkraKernel(okraContext, code, "&run");
-                if (okraKernel.isValid()) {
-                    return okraKernel;
-                }
-            }
+            return getCompiledLambda(consumerClass);
         } catch (Throwable e) {
             // Note: Graal throws Errors. We want to revert to regular Java in these cases.
             Debug.log("WARNING:Graal compilation failed.");
             e.printStackTrace();
             return null;
         }
-        // If we got this far, return null.
-        return null;
     }
 
     @Override
     public boolean dispatchKernel(Object kernel, int jobSize, Object[] args) {
-        if (!(kernel instanceof OkraKernel)) {
-            Debug.log("unknown kernel for dispatchKernel");
+        // kernel is an HSAILCompilationResult
+        HotSpotNmethod code = (HotSpotNmethod) ((HSAILCompilationResult) kernel).getInstalledCode();
+
+        if (code != null) {
+            try {
+                // No return value from HSAIL kernels
+                code.executeParallel(jobSize, 0, 0, args);
+                return true;
+            } catch (InvalidInstalledCodeException iice) {
+                Debug.log("WARNING:Invalid installed code at exec time." + iice);
+                iice.printStackTrace();
+                return false;
+            }
+        } else {
+            // Should throw something sensible here
             return false;
         }
-        OkraKernel okraKernel = (OkraKernel) kernel;
-        okraKernel.setLaunchAttributes(jobSize);
-        int status = okraKernel.dispatchWithArgs(args);
-        return (status == 0);
     }
 }
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILCompilationResult.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILCompilationResult.java	Mon Nov 11 21:40:18 2013 +0100
@@ -35,10 +35,14 @@
 import com.oracle.graal.compiler.*;
 import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
+import com.oracle.graal.graph.iterators.*;
+import com.oracle.graal.hotspot.*;
+import com.oracle.graal.hotspot.bridge.CompilerToGPU;
 import com.oracle.graal.hotspot.meta.*;
 import com.oracle.graal.hsail.*;
 import com.oracle.graal.java.*;
 import com.oracle.graal.nodes.*;
+import com.oracle.graal.nodes.java.MethodCallTargetNode;
 import com.oracle.graal.nodes.type.*;
 import com.oracle.graal.phases.*;
 import com.oracle.graal.phases.PhasePlan.PhasePosition;
@@ -49,10 +53,24 @@
 /**
  * Class that represents a HSAIL compilation result. Includes the compiled HSAIL code.
  */
-public class HSAILCompilationResult extends CompilationResult {
+public class HSAILCompilationResult extends ExternalCompilationResult {
 
     private static final long serialVersionUID = -4178700465275724625L;
 
+    private static CompilerToGPU toGPU = HotSpotGraalRuntime.runtime().getCompilerToGPU();
+    private static boolean validDevice = toGPU.deviceInit();
+
+    // The installedCode is the executable representation of the kernel in the code cache
+    private InstalledCode installedCode;
+
+    public void setInstalledCode(InstalledCode newCode) {
+        installedCode = newCode;
+    }
+
+    public InstalledCode getInstalledCode() {
+        return installedCode;
+    }
+
     private static final String propPkgName = HSAILCompilationResult.class.getPackage().getName();
     private static Level logLevel;
     private static ConsoleHandler consoleHandler;
@@ -131,6 +149,48 @@
         return registerConfig.getCallingConvention(type, retType, argTypes, target, stackOnly);
     }
 
+    public static HSAILCompilationResult getCompiledLambda(Class consumerClass) {
+        /**
+         * Find the accept() method in the IntConsumer, then use Graal API to find the target lambda
+         * that accept will call.
+         */
+        Method[] icMethods = consumerClass.getMethods();
+        Method acceptMethod = null;
+        for (Method m : icMethods) {
+            if (m.getName().equals("accept") && acceptMethod == null) {
+                acceptMethod = m;
+                break;
+            }
+        }
+
+        Providers providers = backend.getProviders();
+        HotSpotMetaAccessProvider metaAccess = (HotSpotMetaAccessProvider) providers.getMetaAccess();
+        ResolvedJavaMethod rm = metaAccess.lookupJavaMethod(acceptMethod);
+        StructuredGraph graph = new StructuredGraph(rm);
+        GraphBuilderPhase graphBuilderPhase = new GraphBuilderPhase(providers.getMetaAccess(), providers.getForeignCalls(), GraphBuilderConfiguration.getDefault(), OptimisticOptimizations.ALL);
+        graphBuilderPhase.apply(graph);
+        NodeIterable<Node> nin = graph.getNodes();
+        ResolvedJavaMethod lambdaMethod = null;
+        for (Node n : nin) {
+            if (n instanceof MethodCallTargetNode) {
+                lambdaMethod = ((MethodCallTargetNode) n).targetMethod();
+                Debug.log("target ... " + lambdaMethod);
+                break;
+            }
+        }
+        if (lambdaMethod == null) {
+            // Did not find call in Consumer.accept.
+            Debug.log("Should not Reach here, did not find call in accept()");
+            return null;
+        }
+        // Now that we have the target lambda, compile it.
+        HSAILCompilationResult hsailCompResult = HSAILCompilationResult.getHSAILCompilationResult(lambdaMethod);
+        if (hsailCompResult != null) {
+            hsailCompResult.dumpCompilationResult();
+        }
+        return hsailCompResult;
+    }
+
     public static HSAILCompilationResult getHSAILCompilationResult(StructuredGraph graph) {
         Debug.dump(graph, "Graph");
         Providers providers = backend.getProviders();
@@ -145,7 +205,22 @@
         try {
             HSAILCompilationResult compResult = GraalCompiler.compileGraph(graph, cc, graph.method(), providers, backend, target, null, phasePlan, OptimisticOptimizations.NONE, new SpeculationLog(),
                             suitesProvider.getDefaultSuites(), new HSAILCompilationResult());
+            if ((validDevice) && (compResult.getTargetCode() != null)) {
+                long kernel = toGPU.generateKernel(compResult.getTargetCode(), graph.method().getName());
+
+                if (kernel == 0) {
+                    throw new GraalInternalError("Failed to compile kernel.");
+                }
+
+                ((ExternalCompilationResult) compResult).setEntryPoint(kernel);
+                HotSpotResolvedJavaMethod compiledMethod = (HotSpotResolvedJavaMethod) graph.method();
+                InstalledCode installedCode = ((HotSpotCodeCacheProvider) providers.getCodeCache()).addExternalMethod(compiledMethod, compResult);
+                compResult.setInstalledCode(installedCode);
+            }
             return compResult;
+        } catch (InvalidInstalledCodeException e) {
+            e.printStackTrace();
+            return null;
         } catch (GraalInternalError e) {
             String partialCode = backend.getPartialCodeString();
             if (partialCode != null && !partialCode.equals("")) {
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerator.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerator.java	Mon Nov 11 21:40:18 2013 +0100
@@ -23,6 +23,8 @@
 
 package com.oracle.graal.hotspot.hsail;
 
+import sun.misc.*;
+
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.compiler.hsail.*;
@@ -32,6 +34,8 @@
 import com.oracle.graal.lir.hsail.HSAILControlFlow.*;
 import com.oracle.graal.lir.hsail.HSAILMove.*;
 import com.oracle.graal.nodes.*;
+import com.oracle.graal.nodes.calc.*;
+import com.oracle.graal.nodes.java.*;
 import com.oracle.graal.phases.util.*;
 
 /**
@@ -74,6 +78,41 @@
         return access != null && ((HeapAccess) access).isCompressible();
     }
 
+    /**
+     * Appends either a {@link CompareAndSwapOp} or a {@link CompareAndSwapCompressedOp} depending
+     * on whether the memory location of a given {@link LoweredCompareAndSwapNode} contains a
+     * compressed oop. For the {@link CompareAndSwapCompressedOp} case, allocates a number of
+     * scratch registers. The result {@link #operand(ValueNode) operand} for {@code node} complies
+     * with the API for {@link Unsafe#compareAndSwapInt(Object, long, int, int)}.
+     * 
+     * @param address the memory location targeted by the operation
+     */
+    @Override
+    public void visitCompareAndSwap(LoweredCompareAndSwapNode node, Value address) {
+        Kind kind = node.getNewValue().kind();
+        assert kind == node.getExpectedValue().kind();
+        Variable expected = load(operand(node.getExpectedValue()));
+        Variable newValue = load(operand(node.getNewValue()));
+        HSAILAddressValue addressValue = asAddressValue(address);
+        Variable casResult = newVariable(kind);
+        if (config.useCompressedOops && node.isCompressible()) {
+            // make 64-bit scratch variables for expected and new
+            Variable scratchExpected64 = newVariable(Kind.Long);
+            Variable scratchNewValue64 = newVariable(Kind.Long);
+            // make 32-bit scratch variables for expected and new and result
+            Variable scratchExpected32 = newVariable(Kind.Int);
+            Variable scratchNewValue32 = newVariable(Kind.Int);
+            Variable scratchCasResult32 = newVariable(Kind.Int);
+            append(new CompareAndSwapCompressedOp(casResult, addressValue, expected, newValue, scratchExpected64, scratchNewValue64, scratchExpected32, scratchNewValue32, scratchCasResult32,
+                            getNarrowOopBase(), getNarrowOopShift(), getLogMinObjectAlignment()));
+        } else {
+            append(new CompareAndSwapOp(casResult, addressValue, expected, newValue));
+        }
+        Variable nodeResult = newVariable(node.kind());
+        append(new CondMoveOp(mapKindToCompareOp(kind), casResult, expected, nodeResult, Condition.EQ, Constant.INT_1, Constant.INT_0));
+        setResult(node, nodeResult);
+    }
+
     @Override
     public Variable emitLoad(Kind kind, Value address, DeoptimizingNode access) {
         HSAILAddressValue loadAddress = asAddressValue(address);
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotReplacementsImpl.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotReplacementsImpl.java	Mon Nov 11 21:40:18 2013 +0100
@@ -47,7 +47,11 @@
 
     @Override
     protected ResolvedJavaMethod registerMethodSubstitution(Member originalMethod, Method substituteMethod) {
-        if (substituteMethod.getDeclaringClass() == IntegerSubstitutions.class || substituteMethod.getDeclaringClass() == LongSubstitutions.class) {
+        if (substituteMethod.getDeclaringClass().getDeclaringClass() == BoxingSubstitutions.class) {
+            if (config.useHeapProfiler) {
+                return null;
+            }
+        } else if (substituteMethod.getDeclaringClass() == IntegerSubstitutions.class || substituteMethod.getDeclaringClass() == LongSubstitutions.class) {
             if (substituteMethod.getName().equals("bitCount")) {
                 if (!config.usePopCountInstruction) {
                     return null;
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotVMConfig.java	Mon Nov 11 21:40:18 2013 +0100
@@ -671,6 +671,7 @@
     }
 
     @HotSpotVMFlag(name = "GraalDeferredInitBarriers") @Stable public boolean useDeferredInitBarriers;
+    @HotSpotVMFlag(name = "GraalHProfEnabled") @Stable public boolean useHeapProfiler;
 
     // Compressed Oops related values.
     @HotSpotVMFlag(name = "UseCompressedOops") @Stable public boolean useCompressedOops;
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/VMToCompilerImpl.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/bridge/VMToCompilerImpl.java	Mon Nov 11 21:40:18 2013 +0100
@@ -384,61 +384,68 @@
     }
 
     private void printDebugValues(String phase, boolean reset) throws GraalInternalError {
-        TTY.println();
-        if (phase != null) {
-            TTY.println("<DebugValues:" + phase + ">");
-        } else {
-            TTY.println("<DebugValues>");
-        }
         if (Debug.isEnabled() && areMetricsOrTimersEnabled()) {
+            TTY.println();
+            if (phase != null) {
+                TTY.println("<DebugValues:" + phase + ">");
+            } else {
+                TTY.println("<DebugValues>");
+            }
             List<DebugValueMap> topLevelMaps = DebugValueMap.getTopLevelMaps();
             List<DebugValue> debugValues = KeyRegistry.getDebugValues();
             if (debugValues.size() > 0) {
-                ArrayList<DebugValue> sortedValues = new ArrayList<>(debugValues);
-                Collections.sort(sortedValues);
+                try {
+                    ArrayList<DebugValue> sortedValues = new ArrayList<>(debugValues);
+                    Collections.sort(sortedValues);
 
-                String summary = DebugValueSummary.getValue();
-                if (summary == null) {
-                    summary = "Complete";
-                }
-                switch (summary) {
-                    case "Name":
-                        printSummary(topLevelMaps, sortedValues);
-                        break;
-                    case "Partial": {
-                        DebugValueMap globalMap = new DebugValueMap("Global");
-                        for (DebugValueMap map : topLevelMaps) {
-                            flattenChildren(map, globalMap);
-                        }
-                        globalMap.normalize();
-                        printMap(new DebugValueScope(null, globalMap), sortedValues);
-                        break;
+                    String summary = DebugValueSummary.getValue();
+                    if (summary == null) {
+                        summary = "Complete";
                     }
-                    case "Complete": {
-                        DebugValueMap globalMap = new DebugValueMap("Global");
-                        for (DebugValueMap map : topLevelMaps) {
-                            globalMap.addChild(map);
+                    switch (summary) {
+                        case "Name":
+                            printSummary(topLevelMaps, sortedValues);
+                            break;
+                        case "Partial": {
+                            DebugValueMap globalMap = new DebugValueMap("Global");
+                            for (DebugValueMap map : topLevelMaps) {
+                                flattenChildren(map, globalMap);
+                            }
+                            globalMap.normalize();
+                            printMap(new DebugValueScope(null, globalMap), sortedValues);
+                            break;
                         }
-                        globalMap.group();
-                        globalMap.normalize();
-                        printMap(new DebugValueScope(null, globalMap), sortedValues);
-                        break;
+                        case "Complete": {
+                            DebugValueMap globalMap = new DebugValueMap("Global");
+                            for (DebugValueMap map : topLevelMaps) {
+                                globalMap.addChild(map);
+                            }
+                            globalMap.group();
+                            globalMap.normalize();
+                            printMap(new DebugValueScope(null, globalMap), sortedValues);
+                            break;
+                        }
+                        case "Thread":
+                            for (DebugValueMap map : topLevelMaps) {
+                                TTY.println("Showing the results for thread: " + map.getName());
+                                map.group();
+                                map.normalize();
+                                printMap(new DebugValueScope(null, map), sortedValues);
+                            }
+                            break;
+                        default:
+                            throw new GraalInternalError("Unknown summary type: %s", summary);
                     }
-                    case "Thread":
-                        for (DebugValueMap map : topLevelMaps) {
-                            TTY.println("Showing the results for thread: " + map.getName());
-                            map.group();
-                            map.normalize();
-                            printMap(new DebugValueScope(null, map), sortedValues);
+                    if (reset) {
+                        for (DebugValueMap topLevelMap : topLevelMaps) {
+                            topLevelMap.reset();
                         }
-                        break;
-                    default:
-                        throw new GraalInternalError("Unknown summary type: %s", summary);
-                }
-            }
-            if (reset) {
-                for (DebugValueMap topLevelMap : topLevelMaps) {
-                    topLevelMap.reset();
+                    }
+                } catch (Throwable e) {
+                    // Don't want this to change the exit status of the VM
+                    PrintStream err = System.err;
+                    err.println("Error while printing debug values:");
+                    e.printStackTrace();
                 }
             }
             if (phase != null) {
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotMetaAccessProvider.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotMetaAccessProvider.java	Mon Nov 11 21:40:18 2013 +0100
@@ -84,13 +84,15 @@
     private static final int REASON_SHIFT = 3;
     private static final int REASON_MASK = 0x1f;
     private static final int DEBUG_SHIFT = 8;
-    private static final int DEBUG_MASK = 0xffff;
+    private static final int DEBUG_MASK = 0x7fffff;
 
     @Override
-    public Constant encodeDeoptActionAndReason(DeoptimizationAction action, DeoptimizationReason reason, short speculationId) {
+    public Constant encodeDeoptActionAndReason(DeoptimizationAction action, DeoptimizationReason reason, int speculationId) {
         int actionValue = convertDeoptAction(action);
         int reasonValue = convertDeoptReason(reason);
-        Constant c = Constant.forInt(~((speculationId << DEBUG_SHIFT) | (reasonValue << REASON_SHIFT) | (actionValue << ACTION_SHIFT)));
+        int speculationValue = speculationId & DEBUG_MASK;
+        Constant c = Constant.forInt(~((speculationValue << DEBUG_SHIFT) | (reasonValue << REASON_SHIFT) | (actionValue << ACTION_SHIFT)));
+        assert c.asInt() < 0;
         return c;
     }
 
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotNmethod.java	Mon Nov 11 21:40:18 2013 +0100
@@ -116,7 +116,9 @@
     }
 
     public Object executeParallel(int dimX, int dimY, int dimZ, Object... args) throws InvalidInstalledCodeException {
-        assert checkArgs(args);
+
+        // For HSAIL, we do not pass the iteration variable, it comes from the workitemid
+        // assert checkArgs(args);
 
         assert isExternal(); // for now
 
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/NewObjectSnippets.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/NewObjectSnippets.java	Mon Nov 11 21:40:18 2013 +0100
@@ -23,7 +23,9 @@
 package com.oracle.graal.hotspot.replacements;
 
 import static com.oracle.graal.api.code.UnsignedMath.*;
+import static com.oracle.graal.api.meta.MetaUtil.*;
 import static com.oracle.graal.hotspot.replacements.HotSpotReplacementsUtil.*;
+import static com.oracle.graal.hotspot.replacements.NewObjectSnippets.Options.*;
 import static com.oracle.graal.nodes.PiArrayNode.*;
 import static com.oracle.graal.nodes.PiNode.*;
 import static com.oracle.graal.nodes.extended.BranchProbabilityNode.*;
@@ -64,11 +66,11 @@
 
     public static final LocationIdentity INIT_LOCATION = new NamedLocationIdentity("Initialization");
 
-    public static class Options {
+    static class Options {
 
         //@formatter:off
         @Option(help = "")
-        private static final OptionValue<Boolean> ProfileAllocations = new OptionValue<>(false);
+        static final OptionValue<Boolean> ProfileAllocations = new OptionValue<>(false);
         //@formatter:on
     }
 
@@ -114,7 +116,7 @@
 
     @Fold
     private static boolean doProfile() {
-        return Options.ProfileAllocations.getValue();
+        return ProfileAllocations.getValue();
     }
 
     private static void profileAllocation(String path, long size, String typeContext) {
@@ -315,7 +317,7 @@
             args.add("hub", hub);
             args.add("prototypeMarkWord", type.prototypeMarkWord());
             args.addConst("fillContents", newInstanceNode.fillContents());
-            args.addConst("typeContext", MetaUtil.toJavaName(type, false));
+            args.addConst("typeContext", ProfileAllocations.getValue() ? toJavaName(type, false) : "");
 
             SnippetTemplate template = template(args);
             Debug.log("Lowering allocateInstance in %s: node=%s, template=%s, arguments=%s", graph, newInstanceNode, template, args);
@@ -342,7 +344,7 @@
             args.addConst("headerSize", headerSize);
             args.addConst("log2ElementSize", log2ElementSize);
             args.addConst("fillContents", newArrayNode.fillContents());
-            args.addConst("typeContext", MetaUtil.toJavaName(arrayType, false));
+            args.addConst("typeContext", ProfileAllocations.getValue() ? toJavaName(arrayType, false) : "");
 
             SnippetTemplate template = template(args);
             Debug.log("Lowering allocateArray in %s: node=%s, template=%s, arguments=%s", graph, newArrayNode, template, args);
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/UnsafeArrayCopySnippets.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/UnsafeArrayCopySnippets.java	Mon Nov 11 21:40:18 2013 +0100
@@ -98,7 +98,7 @@
                 srcOffset -= VECTOR_SIZE;
                 destOffset -= VECTOR_SIZE;
                 Long a = UnsafeLoadNode.load(src, arrayBaseOffset + srcOffset, VECTOR_KIND, locationIdentity);
-                UnsafeStoreNode.store(dest, arrayBaseOffset + destOffset, a.longValue(), VECTOR_KIND, locationIdentity);
+                UnsafeStoreNode.store(dest, arrayBaseOffset + destOffset, a, VECTOR_KIND, locationIdentity);
             }
             // Pre-loop
             for (long i = 0; i < preLoopBytes; i += elementSize) {
@@ -118,7 +118,7 @@
             // Main-loop
             for (long i = 0; i < mainLoopBytes; i += VECTOR_SIZE) {
                 Long a = UnsafeLoadNode.load(src, arrayBaseOffset + srcOffset, VECTOR_KIND, locationIdentity);
-                UnsafeStoreNode.store(dest, arrayBaseOffset + destOffset, a.longValue(), VECTOR_KIND, locationIdentity);
+                UnsafeStoreNode.store(dest, arrayBaseOffset + destOffset, a, VECTOR_KIND, locationIdentity);
                 srcOffset += VECTOR_SIZE;
                 destOffset += VECTOR_SIZE;
             }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/WriteBarrierSnippets.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/WriteBarrierSnippets.java	Mon Nov 11 21:40:18 2013 +0100
@@ -65,11 +65,7 @@
     public static final LocationIdentity GC_INDEX_LOCATION = new NamedLocationIdentity("GC-Index");
 
     @Snippet
-    public static void serialWriteBarrier(Object object, Object location, @ConstantParameter boolean usePrecise, @ConstantParameter boolean alwaysNull) {
-        // No barriers are added if we are always storing a null.
-        if (alwaysNull) {
-            return;
-        }
+    public static void serialWriteBarrier(Object object, Object location, @ConstantParameter boolean usePrecise) {
         Object fixedObject = FixedValueAnchorNode.getObject(object);
         Pointer oop;
         if (usePrecise) {
@@ -165,11 +161,7 @@
     }
 
     @Snippet
-    public static void g1PostWriteBarrier(Object object, Object value, Object location, @ConstantParameter boolean usePrecise, @ConstantParameter boolean alwaysNull, @ConstantParameter boolean trace) {
-        // No barriers are added if we are always storing a null.
-        if (alwaysNull) {
-            return;
-        }
+    public static void g1PostWriteBarrier(Object object, Object value, Object location, @ConstantParameter boolean usePrecise, @ConstantParameter boolean trace) {
         Word thread = thread();
         Object fixedObject = FixedValueAnchorNode.getObject(object);
         Object fixedValue = FixedValueAnchorNode.getObject(value);
@@ -341,11 +333,14 @@
         }
 
         public void lower(SerialWriteBarrier writeBarrier, @SuppressWarnings("unused") LoweringTool tool) {
+            if (writeBarrier.alwaysNull()) {
+                writeBarrier.graph().removeFixed(writeBarrier);
+                return;
+            }
             Arguments args = new Arguments(serialWriteBarrier, writeBarrier.graph().getGuardsStage());
             args.add("object", writeBarrier.getObject());
             args.add("location", writeBarrier.getLocation());
             args.addConst("usePrecise", writeBarrier.usePrecise());
-            args.addConst("alwaysNull", writeBarrier.alwaysNull());
             template(args).instantiate(providers.getMetaAccess(), writeBarrier, DEFAULT_REPLACER, args);
         }
 
@@ -380,12 +375,15 @@
         }
 
         public void lower(G1PostWriteBarrier writeBarrierPost, @SuppressWarnings("unused") LoweringTool tool) {
+            if (writeBarrierPost.alwaysNull()) {
+                writeBarrierPost.graph().removeFixed(writeBarrierPost);
+                return;
+            }
             Arguments args = new Arguments(g1PostWriteBarrier, writeBarrierPost.graph().getGuardsStage());
             args.add("object", writeBarrierPost.getObject());
             args.add("value", writeBarrierPost.getValue());
             args.add("location", writeBarrierPost.getLocation());
             args.addConst("usePrecise", writeBarrierPost.usePrecise());
-            args.addConst("alwaysNull", writeBarrierPost.alwaysNull());
             args.addConst("trace", traceBarrier());
             template(args).instantiate(providers.getMetaAccess(), writeBarrierPost, DEFAULT_REPLACER, args);
         }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.jtt/src/com/oracle/graal/jtt/loop/LoopUnroll.java	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.jtt.loop;
+
+import com.oracle.graal.jtt.*;
+import org.junit.*;
+
+/*
+ */
+public class LoopUnroll extends JTTTest {
+
+    public static int test(int input) {
+        int ret = 2;
+        int current = input;
+        for (int i = 0; i < 7; i++) {
+            ret *= 2 + current;
+            current /= 50;
+        }
+        return ret;
+    }
+
+    @Test
+    public void run0() throws Throwable {
+        runTest("test", 42);
+    }
+
+}
--- a/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILMove.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.lir.hsail/src/com/oracle/graal/lir/hsail/HSAILMove.java	Mon Nov 11 21:40:18 2013 +0100
@@ -153,6 +153,20 @@
         }
     }
 
+    public static class MembarOp extends HSAILLIRInstruction {
+
+        private final int barriers;
+
+        public MembarOp(final int barriers) {
+            this.barriers = barriers;
+        }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, HSAILAssembler masm) {
+            masm.emitMembar(barriers);
+        }
+    }
+
     public static class LoadOp extends MemOp {
 
         @Def({REG}) protected AllocatableValue result;
@@ -316,7 +330,54 @@
 
         @Override
         public void emitCode(TargetMethodAssembler tasm, HSAILAssembler masm) {
-            compareAndSwap(tasm, masm, result, address, cmpValue, newValue);
+            masm.emitAtomicCas(result, address.toAddress(), cmpValue, newValue);
+        }
+    }
+
+    @Opcode("CAS")
+    public static class CompareAndSwapCompressedOp extends CompareAndSwapOp {
+
+        @Temp({REG}) private AllocatableValue scratchCmpValue64;
+        @Temp({REG}) private AllocatableValue scratchNewValue64;
+        @Temp({REG}) private AllocatableValue scratchCmpValue32;
+        @Temp({REG}) private AllocatableValue scratchNewValue32;
+        @Temp({REG}) private AllocatableValue scratchCasResult32;
+        private final long base;
+        private final int shift;
+        private final int alignment;
+
+        public CompareAndSwapCompressedOp(AllocatableValue result, HSAILAddressValue address, AllocatableValue cmpValue, AllocatableValue newValue, AllocatableValue scratchCmpValue64,
+                        AllocatableValue scratchNewValue64, AllocatableValue scratchCmpValue32, AllocatableValue scratchNewValue32, AllocatableValue scratchCasResult32, long base, int shift,
+                        int alignment) {
+            super(result, address, cmpValue, newValue);
+            this.scratchCmpValue64 = scratchCmpValue64;
+            this.scratchNewValue64 = scratchNewValue64;
+            this.scratchCmpValue32 = scratchCmpValue32;
+            this.scratchNewValue32 = scratchNewValue32;
+            this.scratchCasResult32 = scratchCasResult32;
+            this.base = base;
+            this.shift = shift;
+            this.alignment = alignment;
+        }
+
+        @Override
+        public void emitCode(TargetMethodAssembler tasm, HSAILAssembler masm) {
+            // assume any encoded or decoded value could be null
+            boolean testForNull = true;
+            // set up scratch registers to be encoded versions
+            masm.emitMov(scratchCmpValue64, cmpValue);
+            encodePointer(masm, scratchCmpValue64, base, shift, alignment, testForNull);
+            masm.emitMov(scratchNewValue64, newValue);
+            encodePointer(masm, scratchNewValue64, base, shift, alignment, testForNull);
+            // get encoded versions into 32-bit registers
+            masm.emitConvertForceUnsigned(scratchCmpValue32, scratchCmpValue64);
+            masm.emitConvertForceUnsigned(scratchNewValue32, scratchNewValue64);
+            // finally do the cas
+            masm.emitAtomicCas(scratchCasResult32, address.toAddress(), scratchCmpValue32, scratchNewValue32);
+            // and convert the 32-bit CasResult back to 64-bit
+            masm.emitConvertForceUnsigned(result, scratchCasResult32);
+            // and decode/uncompress the 64-bit cas result
+            decodePointer(masm, result, base, shift, alignment, testForNull);
         }
     }
 
@@ -362,9 +423,4 @@
             throw GraalInternalError.shouldNotReachHere();
         }
     }
-
-    @SuppressWarnings("unused")
-    protected static void compareAndSwap(TargetMethodAssembler tasm, HSAILAssembler masm, AllocatableValue result, HSAILAddressValue address, AllocatableValue cmpValue, AllocatableValue newValue) {
-        throw new InternalError("NYI");
-    }
 }
--- a/graal/com.oracle.graal.loop/src/com/oracle/graal/loop/BasicInductionVariable.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.loop/src/com/oracle/graal/loop/BasicInductionVariable.java	Mon Nov 11 21:40:18 2013 +0100
@@ -123,19 +123,25 @@
         Kind fromKind = phi.kind();
         StructuredGraph graph = graph();
         ValueNode stride = strideNode();
-        ValueNode maxTripCount = loop.counted().maxTripCountNode(assumePositiveTripCount);
         ValueNode initNode = this.initNode();
         if (fromKind != kind) {
             stride = graph.unique(new ConvertNode(fromKind, kind, stride));
-            maxTripCount = graph.unique(new ConvertNode(fromKind, kind, maxTripCount));
             initNode = graph.unique(new ConvertNode(fromKind, kind, initNode));
         }
+        ValueNode maxTripCount = loop.counted().maxTripCountNode(assumePositiveTripCount);
+        if (maxTripCount.kind() != kind) {
+            maxTripCount = graph.unique(new ConvertNode(maxTripCount.kind(), kind, maxTripCount));
+        }
         return IntegerArithmeticNode.add(graph, IntegerArithmeticNode.mul(graph, stride, IntegerArithmeticNode.sub(graph, maxTripCount, ConstantNode.forIntegerKind(kind, 1, graph))), initNode);
     }
 
     @Override
     public ValueNode exitValueNode() {
+        Kind kind = phi.kind();
         ValueNode maxTripCount = loop.counted().maxTripCountNode(false);
+        if (maxTripCount.kind() != kind) {
+            maxTripCount = graph().unique(new ConvertNode(maxTripCount.kind(), kind, maxTripCount));
+        }
         return IntegerArithmeticNode.add(graph(), IntegerArithmeticNode.mul(graph(), strideNode(), maxTripCount), initNode());
     }
 
--- a/graal/com.oracle.graal.loop/src/com/oracle/graal/loop/LoopEx.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.loop/src/com/oracle/graal/loop/LoopEx.java	Mon Nov 11 21:40:18 2013 +0100
@@ -64,6 +64,11 @@
         return whole;
     }
 
+    public void invalidateFragments() {
+        inside = null;
+        whole = null;
+    }
+
     @SuppressWarnings("unused")
     public LoopFragmentInsideFrom insideFrom(FixedNode point) {
         // TODO (gd)
--- a/graal/com.oracle.graal.loop/src/com/oracle/graal/loop/LoopFragment.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.loop/src/com/oracle/graal/loop/LoopFragment.java	Mon Nov 11 21:40:18 2013 +0100
@@ -151,6 +151,10 @@
     protected static NodeBitMap computeNodes(Graph graph, Iterable<AbstractBeginNode> blocks, Iterable<AbstractBeginNode> earlyExits) {
         final NodeBitMap nodes = graph.createNodeBitMap(true);
         for (AbstractBeginNode b : blocks) {
+            if (b.isDeleted()) {
+                continue;
+            }
+
             for (Node n : b.getBlockNodes()) {
                 if (n instanceof Invoke) {
                     nodes.mark(((Invoke) n).callTarget());
@@ -165,6 +169,10 @@
             }
         }
         for (AbstractBeginNode earlyExit : earlyExits) {
+            if (earlyExit.isDeleted()) {
+                continue;
+            }
+
             FrameState stateAfter = earlyExit.stateAfter();
             if (stateAfter != null) {
                 nodes.mark(stateAfter);
@@ -184,6 +192,10 @@
 
         final NodeBitMap notloopNodes = graph.createNodeBitMap(true);
         for (AbstractBeginNode b : blocks) {
+            if (b.isDeleted()) {
+                continue;
+            }
+
             for (Node n : b.getBlockNodes()) {
                 if (n instanceof CommitAllocationNode) {
                     for (VirtualObjectNode obj : ((CommitAllocationNode) n).getVirtualObjects()) {
--- a/graal/com.oracle.graal.loop/src/com/oracle/graal/loop/LoopTransformations.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.loop/src/com/oracle/graal/loop/LoopTransformations.java	Mon Nov 11 21:40:18 2013 +0100
@@ -25,8 +25,8 @@
 import static com.oracle.graal.phases.GraalOptions.*;
 
 import com.oracle.graal.api.code.*;
+import com.oracle.graal.graph.Graph.Mark;
 import com.oracle.graal.graph.*;
-import com.oracle.graal.graph.Graph.Mark;
 import com.oracle.graal.graph.NodeClass.NodeClassIterator;
 import com.oracle.graal.graph.NodeClass.Position;
 import com.oracle.graal.nodes.*;
@@ -63,6 +63,7 @@
             Mark mark = graph.getMark();
             peel(loop);
             canonicalizer.applyIncremental(graph, context, mark);
+            loop.invalidateFragments();
             if (iterations++ > UNROLL_LIMIT || graph.getNodeCount() > MaximumDesiredSize.getValue() * 3) {
                 throw new BailoutException("FullUnroll : Graph seems to grow out of proportion");
             }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/DeoptimizeNode.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/DeoptimizeNode.java	Mon Nov 11 21:40:18 2013 +0100
@@ -31,13 +31,13 @@
 
     private final DeoptimizationAction action;
     private final DeoptimizationReason reason;
-    private final short speculationId;
+    private final int speculationId;
 
     public DeoptimizeNode(DeoptimizationAction action, DeoptimizationReason reason) {
         this(action, reason, (short) 0);
     }
 
-    public DeoptimizeNode(DeoptimizationAction action, DeoptimizationReason reason, short speculationId) {
+    public DeoptimizeNode(DeoptimizationAction action, DeoptimizationReason reason, int speculationId) {
         assert action != null;
         assert reason != null;
         this.action = action;
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/PhiNode.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/PhiNode.java	Mon Nov 11 21:40:18 2013 +0100
@@ -51,7 +51,7 @@
     @Input(notDataflow = true) private MergeNode merge;
     @Input private final NodeInputList<ValueNode> values = new NodeInputList<>(this);
     private final PhiType type;
-    private final Object identity;
+    private final LocationIdentity identity;
 
     /**
      * Create a value phi ({@link PhiType#Value}) with the specified kind.
@@ -77,7 +77,7 @@
      * @param type the type of the new phi
      * @param merge the merge that the new phi belongs to
      */
-    public PhiNode(PhiType type, MergeNode merge, Object identity) {
+    public PhiNode(PhiType type, MergeNode merge, LocationIdentity identity) {
         super(type.stamp);
         assert type.stamp != null : merge + " " + type;
         this.type = type;
@@ -93,7 +93,7 @@
         return merge;
     }
 
-    public Object getIdentity() {
+    public LocationIdentity getIdentity() {
         assert type != PhiType.Value;
         return identity;
     }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/ProxyNode.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/ProxyNode.java	Mon Nov 11 21:40:18 2013 +0100
@@ -22,6 +22,7 @@
  */
 package com.oracle.graal.nodes;
 
+import com.oracle.graal.api.meta.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.graph.Node.ValueNumberable;
 import com.oracle.graal.graph.spi.*;
@@ -41,9 +42,9 @@
     @Input(notDataflow = true) private AbstractBeginNode proxyPoint;
     @Input private ValueNode value;
     private final PhiType type;
-    private final Object identity;
+    private final LocationIdentity identity;
 
-    public ProxyNode(ValueNode value, AbstractBeginNode exit, PhiType type, Object identity) {
+    public ProxyNode(ValueNode value, AbstractBeginNode exit, PhiType type, LocationIdentity identity) {
         super(type == PhiType.Value ? value.stamp() : type.stamp);
         this.type = type;
         this.identity = identity;
@@ -69,7 +70,7 @@
         return type;
     }
 
-    public Object getIdentity() {
+    public LocationIdentity getIdentity() {
         assert type != PhiType.Value;
         return identity;
     }
@@ -113,7 +114,7 @@
         return graph.unique(new ProxyNode(value, exit, PhiType.Value, null));
     }
 
-    public static ProxyNode forMemory(ValueNode value, AbstractBeginNode exit, Object location, StructuredGraph graph) {
+    public static ProxyNode forMemory(ValueNode value, AbstractBeginNode exit, LocationIdentity location, StructuredGraph graph) {
         return graph.unique(new ProxyNode(value, exit, PhiType.Memory, location));
     }
 
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/calc/CompareNode.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/calc/CompareNode.java	Mon Nov 11 21:40:18 2013 +0100
@@ -140,10 +140,35 @@
                 setX(convertX.value());
                 setY(convertY.value());
             }
+        } else if (x() instanceof ConvertNode && y().isConstant()) {
+            ConvertNode convertX = (ConvertNode) x();
+            ConstantNode newY = canonicalConvertConstant(convertX, y().asConstant());
+            if (newY != null) {
+                setX(convertX.value());
+                setY(newY);
+            }
+        } else if (y() instanceof ConvertNode && x().isConstant()) {
+            ConvertNode convertY = (ConvertNode) y();
+            ConstantNode newX = canonicalConvertConstant(convertY, x().asConstant());
+            if (newX != null) {
+                setX(newX);
+                setY(convertY.value());
+            }
         }
         return this;
     }
 
+    private static ConstantNode canonicalConvertConstant(ConvertNode convert, Constant constant) {
+        if (convert.isLossless()) {
+            assert constant.getKind() == convert.getToKind();
+            Constant reverseConverted = ConvertNode.convert(convert.getToKind(), convert.getFromKind(), constant);
+            if (convert.evalConst(reverseConverted).equals(constant)) {
+                return ConstantNode.forPrimitive(reverseConverted, convert.graph());
+            }
+        }
+        return null;
+    }
+
     public static CompareNode createCompareNode(StructuredGraph graph, Condition condition, ValueNode x, ValueNode y) {
         assert x.kind() == y.kind();
         assert condition.isCanonical() : "condition is not canonical: " + condition;
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/calc/ConvertNode.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/calc/ConvertNode.java	Mon Nov 11 21:40:18 2013 +0100
@@ -87,9 +87,7 @@
         throw GraalInternalError.shouldNotReachHere();
     }
 
-    public Constant evalConst(Constant... inputs) {
-        assert inputs.length == 1;
-        Constant c = inputs[0];
+    public static Constant convert(Kind from, Kind to, Constant c) {
         switch (from) {
             case Byte:
                 byte byteVal = (byte) c.asInt();
@@ -228,10 +226,26 @@
         throw GraalInternalError.shouldNotReachHere();
     }
 
+    public Constant evalConst(Constant... inputs) {
+        assert inputs.length == 1;
+        return convert(from, to, inputs[0]);
+    }
+
     @Override
     public Node canonical(CanonicalizerTool tool) {
-        if (value.isConstant()) {
+        if (from == to) {
+            return value;
+        } else if (value.isConstant()) {
             return ConstantNode.forPrimitive(evalConst(value.asConstant()), graph());
+        } else if (value instanceof ConvertNode) {
+            ConvertNode other = (ConvertNode) value;
+            if (other.isLossless() && other.to != Kind.Char) {
+                if (other.from == this.to) {
+                    return other.value();
+                } else {
+                    return graph().unique(new ConvertNode(other.from, this.to, other.value()));
+                }
+            }
         }
         return this;
     }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/FloatingAccessNode.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/FloatingAccessNode.java	Mon Nov 11 21:40:18 2013 +0100
@@ -22,10 +22,11 @@
  */
 package com.oracle.graal.nodes.extended;
 
+import com.oracle.graal.api.meta.*;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.type.*;
 
-public abstract class FloatingAccessNode extends FloatingGuardedNode implements Access {
+public abstract class FloatingAccessNode extends FloatingGuardedNode implements Access, MemoryAccess {
 
     @Input private ValueNode object;
     @Input private LocationNode location;
@@ -46,6 +47,10 @@
         return location;
     }
 
+    public LocationIdentity getLocationIdentity() {
+        return location.getLocationIdentity();
+    }
+
     public boolean getNullCheck() {
         return nullCheck;
     }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/FloatingReadNode.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/FloatingReadNode.java	Mon Nov 11 21:40:18 2013 +0100
@@ -51,7 +51,7 @@
         this.lastLocationAccess = lastLocationAccess;
     }
 
-    public Node lastLocationAccess() {
+    public Node getLastLocationAccess() {
         return lastLocationAccess;
     }
 
@@ -90,7 +90,7 @@
 
     @Override
     public boolean verify() {
-        Node lla = lastLocationAccess();
+        Node lla = getLastLocationAccess();
         assert lla == null || isMemoryCheckPoint(lla) || isMemoryPhi(lla) || isMemoryProxy(lla) : "lastLocationAccess of " + this + " should be a MemoryCheckpoint, but is " + lla;
         return super.verify();
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/MemoryAccess.java	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.nodes.extended;
+
+import com.oracle.graal.api.meta.*;
+import com.oracle.graal.graph.*;
+
+/**
+ * This interface marks nodes that access some memory location, and that have an edge to the last
+ * node that kills this location.
+ */
+public interface MemoryAccess {
+
+    LocationIdentity getLocationIdentity();
+
+    Node getLastLocationAccess();
+
+    void setLastLocationAccess(Node lla);
+}
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/UnsafeAccessNode.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/UnsafeAccessNode.java	Mon Nov 11 21:40:18 2013 +0100
@@ -73,6 +73,7 @@
                 // the null check and if a field is found, the offset is so small that this is
                 // never a valid access of an arbitrary address.
                 if (field != null && field.getKind() == this.accessKind()) {
+                    assert !graph().isAfterFloatingReadPhase() : "cannot add more precise memory location after floating read phase";
                     return cloneAsFieldAccess(field);
                 }
             }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/WriteNode.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/WriteNode.java	Mon Nov 11 21:40:18 2013 +0100
@@ -23,6 +23,7 @@
 package com.oracle.graal.nodes.extended;
 
 import com.oracle.graal.api.meta.*;
+import com.oracle.graal.graph.*;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.extended.LocationNode.Location;
 import com.oracle.graal.nodes.spi.*;
@@ -32,12 +33,14 @@
 /**
  * Writes a given {@linkplain #value() value} a {@linkplain AccessNode memory location}.
  */
-public final class WriteNode extends AccessNode implements StateSplit, LIRLowerable, MemoryCheckpoint.Single, Virtualizable {
+public final class WriteNode extends AccessNode implements StateSplit, LIRLowerable, MemoryCheckpoint.Single, MemoryAccess, Virtualizable {
 
     @Input private ValueNode value;
     @Input(notDataflow = true) private FrameState stateAfter;
     private final boolean initialization;
 
+    @Input private Node lastLocationAccess;
+
     public FrameState stateAfter() {
         return stateAfter;
     }
@@ -89,6 +92,15 @@
         return location().getLocationIdentity();
     }
 
+    public Node getLastLocationAccess() {
+        return lastLocationAccess;
+    }
+
+    public void setLastLocationAccess(Node lla) {
+        updateUsages(lastLocationAccess, lla);
+        lastLocationAccess = lla;
+    }
+
     @Override
     public void virtualize(VirtualizerTool tool) {
         if (location() instanceof ConstantLocationNode) {
--- a/graal/com.oracle.graal.options.test/src/com/oracle/graal/options/test/TestOptionValue.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.options.test/src/com/oracle/graal/options/test/TestOptionValue.java	Mon Nov 11 21:40:18 2013 +0100
@@ -25,6 +25,8 @@
 import static com.oracle.graal.options.test.TestOptionValue.Options.*;
 import static org.junit.Assert.*;
 
+import java.util.*;
+
 import org.junit.*;
 
 import com.oracle.graal.options.*;
@@ -99,4 +101,27 @@
             // expected
         }
     }
+
+    @Test
+    public void toStringTest() {
+        assertEquals("com.oracle.graal.options.test.TestOptionValue$Options.Mutable=original", Mutable.toString());
+        try (OverrideScope s1 = OptionValue.override(Mutable, "override1")) {
+            assertEquals("com.oracle.graal.options.test.TestOptionValue$Options.Mutable=override1", Mutable.toString());
+            try (OverrideScope s2 = OptionValue.override(Mutable, "override2")) {
+                assertEquals("com.oracle.graal.options.test.TestOptionValue$Options.Mutable=override2", Mutable.toString());
+            }
+        }
+    }
+
+    @Test
+    public void getValuesTest() {
+        assertEquals(Arrays.asList("original"), Mutable.getValues(null));
+        assertEquals(Arrays.asList(true), Stable.getValues(null));
+        try (OverrideScope s1 = OptionValue.override(Mutable, "override1")) {
+            assertEquals(Arrays.asList("override1", "original"), Mutable.getValues(null));
+            try (OverrideScope s2 = OptionValue.override(Mutable, "override2")) {
+                assertEquals(Arrays.asList("override2", "override1", "original"), Mutable.getValues(null));
+            }
+        }
+    }
 }
--- a/graal/com.oracle.graal.options/src/com/oracle/graal/options/OptionValue.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.options/src/com/oracle/graal/options/OptionValue.java	Mon Nov 11 21:40:18 2013 +0100
@@ -194,7 +194,7 @@
 
     @Override
     public String toString() {
-        return getName() + "=" + value;
+        return getName() + "=" + getValue();
     }
 
     /**
@@ -220,6 +220,26 @@
     }
 
     /**
+     * Gets the values of this option including overridden values.
+     * 
+     * @param c the collection to which the values are added. If null, one is allocated.
+     * @return the collection to which the values were added in order from most overridden to
+     *         current value
+     */
+    @SuppressWarnings("unchecked")
+    public Collection<T> getValues(Collection<T> c) {
+        Collection<T> values = c == null ? new ArrayList<T>() : c;
+        if (!(this instanceof StableOptionValue)) {
+            OverrideScope overrideScope = overrideScopes.get();
+            if (overrideScope != null) {
+                overrideScope.getOverrides(this, (Collection<Object>) values);
+            }
+        }
+        values.add(value);
+        return values;
+    }
+
+    /**
      * Sets the value of this option.
      */
     @SuppressWarnings("unchecked")
@@ -236,6 +256,8 @@
 
         abstract <T> T getOverride(OptionValue<T> option);
 
+        abstract void getOverrides(OptionValue<?> option, Collection<Object> c);
+
         public abstract void close();
     }
 
@@ -268,6 +290,13 @@
         }
 
         @Override
+        void getOverrides(OptionValue<?> key, Collection<Object> c) {
+            if (key == this.option) {
+                c.add(value);
+            }
+        }
+
+        @Override
         public void close() {
             overrideScopes.set(null);
         }
@@ -333,6 +362,17 @@
         }
 
         @Override
+        void getOverrides(OptionValue<?> option, Collection<Object> c) {
+            Object v = overrides.get(option);
+            if (v != null) {
+                c.add(v);
+            }
+            if (parent != null) {
+                parent.getOverrides(option, c);
+            }
+        }
+
+        @Override
         public void close() {
             if (!overrides.isEmpty()) {
                 overrideScopes.set(parent);
--- a/graal/com.oracle.graal.phases.common/src/com/oracle/graal/phases/common/FloatingReadPhase.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.phases.common/src/com/oracle/graal/phases/common/FloatingReadPhase.java	Mon Nov 11 21:40:18 2013 +0100
@@ -176,6 +176,10 @@
 
         @Override
         protected MemoryMapImpl processNode(FixedNode node, MemoryMapImpl state) {
+            if (node instanceof MemoryAccess) {
+                processAccess((MemoryAccess) node, state);
+            }
+
             if (node instanceof FloatableAccessNode && execmode == ExecutionMode.CREATE_FLOATING_READS) {
                 processFloatable((FloatableAccessNode) node, state);
             } else if (node instanceof MemoryCheckpoint.Single) {
@@ -191,6 +195,14 @@
             return state;
         }
 
+        private static void processAccess(MemoryAccess access, MemoryMapImpl state) {
+            LocationIdentity locationIdentity = access.getLocationIdentity();
+            if (locationIdentity != LocationIdentity.ANY_LOCATION) {
+                ValueNode lastLocationAccess = state.getLastLocationAccess(locationIdentity);
+                access.setLastLocationAccess(lastLocationAccess);
+            }
+        }
+
         private static void processCheckpoint(MemoryCheckpoint.Single checkpoint, MemoryMapImpl state) {
             processIdentity(checkpoint.getLocationIdentity(), checkpoint, state);
         }
--- a/graal/com.oracle.graal.phases.common/src/com/oracle/graal/phases/common/GuardLoweringPhase.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.phases.common/src/com/oracle/graal/phases/common/GuardLoweringPhase.java	Mon Nov 11 21:40:18 2013 +0100
@@ -27,7 +27,6 @@
 import java.util.*;
 import java.util.Map.Entry;
 
-import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.StructuredGraph.GuardsStage;
@@ -35,6 +34,7 @@
 import com.oracle.graal.nodes.cfg.*;
 import com.oracle.graal.nodes.extended.*;
 import com.oracle.graal.nodes.util.*;
+import com.oracle.graal.options.*;
 import com.oracle.graal.phases.*;
 import com.oracle.graal.phases.graph.*;
 import com.oracle.graal.phases.schedule.*;
@@ -54,6 +54,12 @@
  * does the actual control-flow expansion of the remaining {@link GuardNode GuardNodes}.
  */
 public class GuardLoweringPhase extends BasePhase<MidTierContext> {
+    static class Options {
+        //@formatter:off
+        @Option(help = "")
+        public static final OptionValue<Boolean> UseGuardIdAsSpeculationId = new OptionValue<>(false);
+        //@formatter:on
+    }
 
     private static class UseImplicitNullChecks extends ScheduledNodeIterator {
 
@@ -125,9 +131,11 @@
     private static class LowerGuards extends ScheduledNodeIterator {
 
         private final Block block;
+        private boolean useGuardIdAsSpeculationId;
 
         public LowerGuards(Block block) {
             this.block = block;
+            this.useGuardIdAsSpeculationId = Options.UseGuardIdAsSpeculationId.getValue();
         }
 
         @Override
@@ -146,7 +154,7 @@
             StructuredGraph graph = guard.graph();
             AbstractBeginNode fastPath = graph.add(new BeginNode());
             @SuppressWarnings("deprecation")
-            DeoptimizeNode deopt = graph.add(new DeoptimizeNode(guard.action(), guard.reason(), (short) (Debug.isEnabled() ? guard.getId() : 0)));
+            DeoptimizeNode deopt = graph.add(new DeoptimizeNode(guard.action(), guard.reason(), useGuardIdAsSpeculationId ? guard.getId() : 0));
             AbstractBeginNode deoptBranch = AbstractBeginNode.begin(deopt);
             AbstractBeginNode trueSuccessor;
             AbstractBeginNode falseSuccessor;
--- a/graal/com.oracle.graal.phases.common/src/com/oracle/graal/phases/common/ReadEliminationPhase.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.phases.common/src/com/oracle/graal/phases/common/ReadEliminationPhase.java	Mon Nov 11 21:40:18 2013 +0100
@@ -35,7 +35,7 @@
         for (FloatingReadNode n : graph.getNodes(FloatingReadNode.class)) {
             if (isReadEliminable(n)) {
                 NodeMap<ValueNode> nodeMap = n.graph().createNodeMap();
-                ValueNode value = getValue(n, n.lastLocationAccess(), nodeMap);
+                ValueNode value = getValue(n, n.getLastLocationAccess(), nodeMap);
                 Debug.log("Eliminated memory read %1.1s and replaced with node %s", n, value);
                 graph.replaceFloating(n, value);
             }
@@ -43,7 +43,7 @@
     }
 
     private static boolean isReadEliminable(FloatingReadNode n) {
-        return isWrites(n, n.lastLocationAccess(), n.graph().createNodeBitMap());
+        return isWrites(n, n.getLastLocationAccess(), n.graph().createNodeBitMap());
     }
 
     private static boolean isWrites(FloatingReadNode n, Node lastLocationAccess, NodeBitMap visited) {
--- a/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/schedule/SchedulePhase.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/schedule/SchedulePhase.java	Mon Nov 11 21:40:18 2013 +0100
@@ -189,7 +189,7 @@
                 MergeNode mergeNode = (MergeNode) block.getBeginNode();
                 for (PhiNode phi : mergeNode.usages().filter(PhiNode.class)) {
                     if (phi.type() == PhiType.Memory) {
-                        LocationIdentity identity = (LocationIdentity) phi.getIdentity();
+                        LocationIdentity identity = phi.getIdentity();
                         locationKilledBy(identity, phi, currentState);
                     }
                 }
@@ -266,7 +266,7 @@
                 for (Node usage : begin.usages()) {
                     if (usage instanceof ProxyNode && ((ProxyNode) usage).type() == PhiType.Memory) {
                         ProxyNode proxy = (ProxyNode) usage;
-                        LocationIdentity identity = (LocationIdentity) proxy.getIdentity();
+                        LocationIdentity identity = proxy.getIdentity();
                         locationKilledBy(identity, proxy, exitState);
                     }
                 }
@@ -347,7 +347,7 @@
                 if (n.location().getLocationIdentity() == FINAL_LOCATION) {
                     continue;
                 }
-                Node first = n.lastLocationAccess();
+                Node first = n.getLastLocationAccess();
                 assert first != null;
 
                 Map<LocationIdentity, Node> killMap = blockToKillMapInit.get(forKillLocation(first));
@@ -424,7 +424,7 @@
         } else if (n instanceof FloatingReadNode) {
             FloatingReadNode frn = (FloatingReadNode) n;
             Debug.printf(" // from %s", frn.location().getLocationIdentity());
-            Debug.printf(", lastAccess: %s", frn.lastLocationAccess());
+            Debug.printf(", lastAccess: %s", frn.getLastLocationAccess());
             Debug.printf(", object: %s", frn.object());
         } else if (n instanceof GuardNode) {
             Debug.printf(", guard: %s", ((GuardNode) n).getGuard());
@@ -544,7 +544,7 @@
         LocationIdentity locid = n.location().getLocationIdentity();
         assert locid != FINAL_LOCATION;
 
-        Node upperBound = n.lastLocationAccess();
+        Node upperBound = n.getLastLocationAccess();
         Block upperBoundBlock = forKillLocation(upperBound);
         Block earliestBlock = earliestBlock(n);
         assert upperBoundBlock.dominates(earliestBlock) : "upper bound (" + upperBoundBlock + ") should dominate earliest (" + earliestBlock + ")";
@@ -925,7 +925,7 @@
                     FloatingReadNode frn = (FloatingReadNode) i;
                     if (frn.location().getLocationIdentity() != FINAL_LOCATION) {
                         reads.add(frn);
-                        if (nodesFor(b).contains(frn.lastLocationAccess())) {
+                        if (nodesFor(b).contains(frn.getLastLocationAccess())) {
                             assert !beforeLastLocation.isMarked(frn);
                             beforeLastLocation.mark(frn);
                         }
@@ -970,7 +970,7 @@
         for (FloatingReadNode frn : new ArrayList<>(reads)) { // TODO: change to iterator?
             LocationIdentity readLocation = frn.location().getLocationIdentity();
             assert readLocation != FINAL_LOCATION;
-            if (frn.lastLocationAccess() == node) {
+            if (frn.getLastLocationAccess() == node) {
                 assert identity == ANY_LOCATION || readLocation == identity : "location doesn't match: " + readLocation + ", " + identity;
                 beforeLastLocation.clear(frn);
             } else if (!beforeLastLocation.isMarked(frn) && (readLocation == identity || (!(node instanceof StartNode) && ANY_LOCATION == identity))) {
--- a/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/NodeIntrinsificationPhase.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/NodeIntrinsificationPhase.java	Mon Nov 11 21:40:18 2013 +0100
@@ -350,6 +350,13 @@
             unbox.replaceAtUsages(intrinsifiedNode);
             graph.removeFloating(unbox);
             Debug.log("%s: Removed an UnboxNode", Debug.contextSnapshot(JavaMethod.class));
+        } else if (usage instanceof UnsafeStoreNode) {
+            UnsafeStoreNode store = (UnsafeStoreNode) usage;
+            store.replaceFirstInput(input, intrinsifiedNode);
+        } else if (usage instanceof LoadFieldNode) {
+            LoadFieldNode load = (LoadFieldNode) usage;
+            load.replaceAtUsages(intrinsifiedNode);
+            graph.removeFixed(load);
         } else if (usage instanceof MethodCallTargetNode) {
             MethodCallTargetNode checkCastCallTarget = (MethodCallTargetNode) usage;
             assert checkCastCallTarget.targetMethod().getAnnotation(NodeIntrinsic.class) != null : "checkcast at " + sourceLocation(input) +
--- a/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/ReplacementsImpl.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/ReplacementsImpl.java	Mon Nov 11 21:40:18 2013 +0100
@@ -36,6 +36,7 @@
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.api.replacements.*;
 import com.oracle.graal.debug.*;
+import com.oracle.graal.debug.internal.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.java.*;
 import com.oracle.graal.nodes.*;
@@ -80,6 +81,7 @@
     }
 
     private static final boolean UseSnippetGraphCache = Boolean.parseBoolean(System.getProperty("graal.useSnippetGraphCache", "true"));
+    private static final DebugTimer SnippetPreparationTime = Debug.timer("SnippetPreparationTime");
 
     public StructuredGraph getSnippet(ResolvedJavaMethod method) {
         assert method.getAnnotation(Snippet.class) != null : "Snippet must be annotated with @" + Snippet.class.getSimpleName();
@@ -87,13 +89,15 @@
 
         StructuredGraph graph = UseSnippetGraphCache ? graphs.get(method) : null;
         if (graph == null) {
-            StructuredGraph newGraph = makeGraph(method, null, inliningPolicy(method), method.getAnnotation(Snippet.class).removeAllFrameStates());
-            if (UseSnippetGraphCache) {
-                return newGraph;
+            try (TimerCloseable a = SnippetPreparationTime.start()) {
+                StructuredGraph newGraph = makeGraph(method, null, inliningPolicy(method), method.getAnnotation(Snippet.class).removeAllFrameStates());
+                Debug.metric("SnippetNodeCount[" + method.getName() + "]").add(newGraph.getNodeCount());
+                if (!UseSnippetGraphCache) {
+                    return newGraph;
+                }
+                graphs.putIfAbsent(method, newGraph);
+                graph = graphs.get(method);
             }
-            graphs.putIfAbsent(method, newGraph);
-            graph = graphs.get(method);
-
         }
         return graph;
     }
--- a/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/SnippetTemplate.java	Mon Nov 11 21:34:44 2013 +0100
+++ b/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/SnippetTemplate.java	Mon Nov 11 21:40:18 2013 +0100
@@ -23,7 +23,9 @@
 package com.oracle.graal.replacements;
 
 import static com.oracle.graal.api.meta.LocationIdentity.*;
+import static com.oracle.graal.api.meta.MetaUtil.*;
 
+import java.io.*;
 import java.lang.reflect.*;
 import java.util.*;
 import java.util.concurrent.*;
@@ -33,8 +35,8 @@
 import com.oracle.graal.api.replacements.*;
 import com.oracle.graal.debug.*;
 import com.oracle.graal.debug.internal.*;
+import com.oracle.graal.graph.Graph.Mark;
 import com.oracle.graal.graph.*;
-import com.oracle.graal.graph.Graph.Mark;
 import com.oracle.graal.graph.iterators.*;
 import com.oracle.graal.loop.*;
 import com.oracle.graal.nodes.*;
@@ -72,8 +74,20 @@
         protected final ResolvedJavaMethod method;
         protected final boolean[] constantParameters;
         protected final boolean[] varargsParameters;
+
+        /**
+         * Times instantiations of all templates derived form this snippet.
+         * 
+         * @see SnippetTemplate#instantiationTimer
+         */
+        private final DebugTimer instantiationTimer;
+
+        /**
+         * Counts instantiations of all templates derived from this snippet.
+         * 
+         * @see SnippetTemplate#instantiationCounter
+         */
         private final DebugMetric instantiationCounter;
-        private final DebugTimer instantiationTimer;
 
         /**
          * The parameter names, taken from the local variables table. Only used for assertion
@@ -102,6 +116,17 @@
             assert initNames();
         }
 
+        private int templateCount;
+
+        void notifyNewTemplate() {
+            templateCount++;
+            if (UseSnippetTemplateCache && templateCount > MaxTemplatesPerSnippet) {
+                PrintStream err = System.err;
+                err.printf("WARNING: Exceeded %d templates for snippet %s%n" + "         Adjust maximum with %s system property%n", MaxTemplatesPerSnippet, format("%h.%n(%p)", method),
+                                MAX_TEMPLATES_PER_SNIPPET_PROPERTY_NAME);
+            }
+        }
+
         private boolean initNames() {
             int slotIdx = 0;
             for (int i = 0; i < names.length; i++) {
@@ -333,12 +358,12 @@
         }
     }
 
-    private static final DebugTimer SnippetCreationAndSpecialization = Debug.timer("SnippetCreationAndSpecialization");
-    private static final DebugMetric SnippetSpecializations = Debug.metric("SnippetSpecializations");
-    private static final DebugMetric SnippetSpecializationsNodeCount = Debug.metric("SnippetSpecializationsNodeCount");
-    private static final DebugMetric SnippetGraphsNodeCount = Debug.metric("SnippetGraphsNodeCount");
+    private static final DebugTimer SnippetTemplateCreationTime = Debug.timer("SnippetTemplateCreationTime");
+    private static final DebugMetric SnippetTemplates = Debug.metric("SnippetTemplateCount");
 
+    private static final String MAX_TEMPLATES_PER_SNIPPET_PROPERTY_NAME = "graal.maxTemplatesPerSnippet";
     private static final boolean UseSnippetTemplateCache = Boolean.parseBoolean(System.getProperty("graal.useSnippetTemplateCache", "true"));
+    private static final int MaxTemplatesPerSnippet = Integer.getInteger(MAX_TEMPLATES_PER_SNIPPET_PROPERTY_NAME, 50);
 
     /**
      * Base class for snippet classes. It provides a cache for {@link SnippetTemplate}s.
@@ -382,8 +407,8 @@
         protected SnippetTemplate template(final Arguments args) {
             SnippetTemplate template = UseSnippetTemplateCache ? templates.get(args.cacheKey) : null;
             if (template == null) {
-                SnippetSpecializations.increment();
-                try (TimerCloseable a = SnippetCreationAndSpecialization.start()) {
+                SnippetTemplates.increment();
+                try (TimerCloseable a = SnippetTemplateCreationTime.start()) {
                     template = Debug.scope("SnippetSpecialization", args.info.method, new Callable<SnippetTemplate>() {
 
                         @Override
@@ -415,12 +440,38 @@
         return false;
     }
 
+    private static String debugValueName(String category, Arguments args) {
+        if (Debug.isEnabled()) {
+            StringBuilder result = new StringBuilder(category).append('[');
+            SnippetInfo info = args.info;
+            result.append(info.method.getName()).append('(');
+            String sep = "";
+            for (int i = 0; i < info.getParameterCount(); i++) {
+                if (info.isConstantParameter(i)) {
+                    result.append(sep);
+                    if (info.names[i] != null) {
+                        result.append(info.names[i]);
+                    } else {
+                        result.append(i);
+                    }
+                    result.append('=').append(args.values[i]);
+                    sep = ", ";
+                }
+            }
+            result.append(")]");
+            return result.toString();
+
+        }
+        return null;
+    }
+
     /**
      * Creates a snippet template.
      */
     protected SnippetTemplate(final Providers providers, Arguments args) {
         StructuredGraph snippetGraph = providers.getReplacements().getSnippet(args.info.method);
-        SnippetGraphsNodeCount.add(snippetGraph.getNodeCount());
+        instantiationTimer = Debug.timer(debugValueName("SnippetTemplateInstantiationTime", args));
+        instantiationCounter = Debug.metric(debugValueName("SnippetTemplateInstantiationCount", args));
 
         ResolvedJavaMethod method = snippetGraph.method();
         Signature signature = method.getSignature();
@@ -462,6 +513,7 @@
         Debug.dump(snippetCopy, "Before specialization");
         if (!nodeReplacements.isEmpty()) {
             // Do deferred intrinsification of node intrinsics
+            new CanonicalizerPhase(true).apply(snippetCopy, phaseContext);
             new NodeIntrinsificationPhase(providers).apply(snippetCopy);
             new CanonicalizerPhase(true).apply(snippetCopy, phaseContext);
         }
@@ -608,7 +660,8 @@
         this.stampNodes = curStampNodes;
         this.returnNode = retNode;
 
-        SnippetSpecializationsNodeCount.add(nodes.size());
+        Debug.metric(debugValueName("SnippetTemplateNodeCount", args)).add(nodes.size());
+        args.info.notifyNewTemplate();
     }
 
     private static boolean checkAllVarargPlaceholdersAreDeleted(int parameterCount, VarargsPlaceholderNode[] placeholders) {
@@ -689,6 +742,20 @@
     private MemoryMapNode memoryMap;
 
     /**
+     * Times instantiations of this template.
+     * 
+     * @see SnippetInfo#instantiationTimer
+     */
+    private final DebugTimer instantiationTimer;
+
+    /**
+     * Counts instantiations of this template.
+     * 
+     * @see SnippetInfo#instantiationCounter
+     */
+    private final DebugMetric instantiationCounter;
+
+    /**
      * Gets the instantiation-time bindings to this template's parameters.
      * 
      * @return the map that will be used to bind arguments to parameters when inlining this template
@@ -796,7 +863,7 @@
                 return;
             }
             for (Node usage : newNode.usages().snapshot()) {
-                if (usage instanceof FloatingReadNode && ((FloatingReadNode) usage).lastLocationAccess() == newNode) {
+                if (usage instanceof FloatingReadNode && ((FloatingReadNode) usage).getLastLocationAccess() == newNode) {
                     assert newNode.graph().isAfterFloatingReadPhase();
 
                     // lastLocationAccess points into the snippet graph. find a proper
@@ -891,8 +958,9 @@
      */
     public Map<Node, Node> instantiate(MetaAccessProvider metaAccess, FixedNode replacee, UsageReplacer replacer, Arguments args) {
         assert checkSnippetKills(replacee);
-        try (TimerCloseable a = args.info.instantiationTimer.start()) {
+        try (TimerCloseable a = args.info.instantiationTimer.start(); TimerCloseable b = instantiationTimer.start()) {
             args.info.instantiationCounter.increment();
+            instantiationCounter.increment();
             // Inline the snippet nodes, replacing parameters with the given args in the process
             StartNode entryPointNode = snippet.start();
             FixedNode firstCFGNode = entryPointNode.next();
@@ -986,6 +1054,7 @@
         assert checkSnippetKills(replacee);
         try (TimerCloseable a = args.info.instantiationTimer.start()) {
             args.info.instantiationCounter.increment();
+            instantiationCounter.increment();
 
             // Inline the snippet nodes, replacing parameters with the given args in the process
             String name = snippet.name == null ? "{copy}" : snippet.name + "{copy}";
--- a/hotspot/.project	Mon Nov 11 21:34:44 2013 +0100
+++ b/hotspot/.project	Mon Nov 11 21:40:18 2013 +0100
@@ -101,6 +101,11 @@
 			<locationURI>PARENT-1-PROJECT_LOC/src/gpu/ptx/vm</locationURI>
 		</link>
 		<link>
+			<name>hsail</name>
+			<type>2</type>
+			<locationURI>PARENT-1-PROJECT_LOC/src/gpu/hsail/vm</locationURI>
+		</link>
+		<link>
 			<name>sparc</name>
 			<type>2</type>
 			<locationURI>PARENT-1-PROJECT_LOC/src/cpu/sparc/vm</locationURI>
@@ -151,6 +156,11 @@
 			<locationURI>PARENT-1-PROJECT_LOC/src/os_cpu/linux_x86/vm</locationURI>
 		</link>
 		<link>
+			<name>windows_hsail</name>
+			<type>2</type>
+			<locationURI>PARENT-1-PROJECT_LOC/src/os_gpu/windows_hsail/vm</locationURI>
+		</link>
+		<link>
 			<name>linux_ptx</name>
 			<type>2</type>
 			<locationURI>PARENT-1-PROJECT_LOC/src/os_gpu/linux_ptx/vm</locationURI>
--- a/make/linux/makefiles/buildtree.make	Mon Nov 11 21:34:44 2013 +0100
+++ b/make/linux/makefiles/buildtree.make	Mon Nov 11 21:40:18 2013 +0100
@@ -242,7 +242,9 @@
 	echo "$(call gamma-path,altsrc,os/posix/vm) \\"; \
 	echo "$(call gamma-path,commonsrc,os/posix/vm) \\"; \
 	echo "$(call gamma-path,altsrc,gpu/ptx/vm) \\"; \
-	echo "$(call gamma-path,commonsrc,gpu/ptx/vm)"; \
+	echo "$(call gamma-path,commonsrc,gpu/ptx/vm)" \\; \
+	echo "$(call gamma-path,altsrc,gpu/hsail/vm) \\"; \
+	echo "$(call gamma-path,commonsrc,gpu/hsail/vm)"; \
 	echo; \
 	echo "Src_Dirs_I = \\"; \
 	echo "$(call gamma-path,altsrc,share/vm/prims) \\"; \
--- a/make/linux/makefiles/vm.make	Mon Nov 11 21:34:44 2013 +0100
+++ b/make/linux/makefiles/vm.make	Mon Nov 11 21:40:18 2013 +0100
@@ -157,6 +157,7 @@
 SOURCE_PATHS+=$(HS_COMMON_SRC)/cpu/$(Platform_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_cpu/$(Platform_os_arch)/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/ptx/vm
+SOURCE_PATHS+=$(HS_COMMON_SRC)/gpu/hsail/vm
 SOURCE_PATHS+=$(HS_COMMON_SRC)/os_gpu/linux_ptx/vm
 
 CORE_PATHS=$(foreach path,$(SOURCE_PATHS),$(call altsrc,$(path)) $(path))
--- a/make/windows/makefiles/projectcreator.make	Mon Nov 11 21:34:44 2013 +0100
+++ b/make/windows/makefiles/projectcreator.make	Mon Nov 11 21:40:18 2013 +0100
@@ -56,6 +56,7 @@
         -relativeInclude src\os\windows\vm \
         -relativeInclude src\os_cpu\windows_$(Platform_arch)\vm \
         -relativeInclude src\cpu\$(Platform_arch)\vm \
+        -relativeInclude src\os_gpu\windows_hsail\vm \
         -relativeInclude src\gpu \
         -absoluteInclude $(HOTSPOTBUILDSPACE)/%f/generated \
         -relativeSrcInclude src \
--- a/make/windows/makefiles/vm.make	Mon Nov 11 21:34:44 2013 +0100
+++ b/make/windows/makefiles/vm.make	Mon Nov 11 21:40:18 2013 +0100
@@ -126,6 +126,7 @@
   /I "$(COMMONSRC)\share\vm\prims" \
   /I "$(COMMONSRC)\os\windows\vm" \
   /I "$(COMMONSRC)\os_cpu\windows_$(Platform_arch)\vm" \
+  /I "$(COMMONSRC)\os_gpu\windows_hsail\vm" \
   /I "$(COMMONSRC)\cpu\$(Platform_arch)\vm"
 
 CXX_DONT_USE_PCH=/D DONT_USE_PRECOMPILED_HEADER
@@ -170,6 +171,7 @@
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/libadt
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/os/windows/vm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/os_gpu/windows_hsail/vm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/cpu/$(Platform_arch)/vm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/opto
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mx/mx_graal.py	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,1 @@
+commands.py
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/hsail/vm/gpu_hsail.cpp	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/gpu.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/ostream.hpp"
+#include "memory/allocation.hpp"
+#include "memory/allocation.inline.hpp"
+#include "hsailKernelArguments.hpp"
+
+void * gpu::Hsail::_device_context;
+
+gpu::Hsail::okra_ctx_create_func_t      gpu::Hsail::_okra_ctx_create;
+gpu::Hsail::okra_kernel_create_func_t   gpu::Hsail::_okra_kernel_create;
+gpu::Hsail::okra_push_object_func_t     gpu::Hsail::_okra_push_object;
+gpu::Hsail::okra_push_boolean_func_t    gpu::Hsail::_okra_push_boolean;
+gpu::Hsail::okra_push_byte_func_t       gpu::Hsail::_okra_push_byte;
+gpu::Hsail::okra_push_double_func_t     gpu::Hsail::_okra_push_double;
+gpu::Hsail::okra_push_float_func_t      gpu::Hsail::_okra_push_float;
+gpu::Hsail::okra_push_int_func_t        gpu::Hsail::_okra_push_int;
+gpu::Hsail::okra_push_long_func_t       gpu::Hsail::_okra_push_long;
+gpu::Hsail::okra_execute_with_range_func_t    gpu::Hsail::_okra_execute_with_range;
+gpu::Hsail::okra_clearargs_func_t       gpu::Hsail::_okra_clearargs;
+gpu::Hsail::okra_register_heap_func_t   gpu::Hsail::_okra_register_heap;
+
+
+bool gpu::Hsail::initialize_gpu() {
+  // All the initialization is done in the okra library so
+  // nothing to do here.
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] Simulator: initialize_gpu");
+  }
+  return true;
+}
+
+unsigned int gpu::Hsail::total_cores() {
+  // This is not important with simulator
+  return 1;
+}
+
+void gpu::Hsail::register_heap() {
+  // After the okra functions are set up and the heap is initialized, register the java heap with HSA
+  guarantee(Universe::heap() != NULL, "heap should be there by now.");
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] heap=" PTR_FORMAT, Universe::heap());
+    tty->print_cr("[HSAIL] base=0x%08x, capacity=%ld", Universe::heap()->base(), Universe::heap()->capacity());
+  }
+  _okra_register_heap(Universe::heap()->base(), Universe::heap()->capacity());
+}
+
+bool  gpu::Hsail::execute_kernel_void_1d(address kernel, int dimX, jobject args, methodHandle& mh) {
+  objArrayOop argsArray = (objArrayOop) JNIHandles::resolve(args);
+
+  // Reset the kernel arguments
+  _okra_clearargs(kernel);
+
+  // This object sets up the kernel arguments
+  HSAILKernelArguments hka(kernel, mh->signature(), argsArray, mh->is_static());
+
+  // Run the kernel
+  bool success = _okra_execute_with_range(kernel, dimX);
+  return success;
+}
+
+void *gpu::Hsail::generate_kernel(unsigned char *code, int code_len, const char *name) {
+
+  gpu::Hsail::register_heap();
+
+  // The kernel entrypoint is always run for the time being  
+  const char* entryPointName = "&run";
+
+  _device_context = _okra_ctx_create();
+
+  // code is not null terminated, must be a better way to do this
+  unsigned char* nullTerminatedCodeBuffer = (unsigned char*) malloc(code_len + 1);
+  memcpy(nullTerminatedCodeBuffer, code, code_len);
+  nullTerminatedCodeBuffer[code_len] = 0;
+  void* kernel = _okra_kernel_create(_device_context, nullTerminatedCodeBuffer, entryPointName);
+  free(nullTerminatedCodeBuffer);
+  return kernel;
+}
+
+#if defined(LINUX)
+static const char okra_library_name[] = "libokra_x86_64.so";
+#elif defined (_WINDOWS)
+static char const okra_library_name[] = "okra_x86_64.dll";
+#else
+static char const okra_library_name[] = "";
+#endif
+
+#define STD_BUFFER_SIZE 1024
+
+bool gpu::Hsail::probe_linkage() {
+  if (okra_library_name != NULL) {
+    char *buffer = (char*)malloc(STD_BUFFER_SIZE);
+    if (TraceGPUInteraction) {
+      tty->print_cr("[HSAIL] library is %s", okra_library_name);
+    }
+    void *handle = os::dll_load(okra_library_name, buffer, STD_BUFFER_SIZE);
+    free(buffer);
+    if (handle != NULL) {
+
+      _okra_ctx_create =
+        CAST_TO_FN_PTR(okra_ctx_create_func_t, os::dll_lookup(handle, "okra_create_context"));
+      _okra_kernel_create =
+        CAST_TO_FN_PTR(okra_kernel_create_func_t, os::dll_lookup(handle, "okra_create_kernel"));
+      _okra_push_object =
+        CAST_TO_FN_PTR(okra_push_object_func_t, os::dll_lookup(handle, "okra_push_object"));
+      _okra_push_boolean =
+        CAST_TO_FN_PTR(okra_push_boolean_func_t, os::dll_lookup(handle, "okra_push_boolean"));
+      _okra_push_byte =
+        CAST_TO_FN_PTR(okra_push_byte_func_t, os::dll_lookup(handle, "okra_push_byte"));
+      _okra_push_double =
+        CAST_TO_FN_PTR(okra_push_double_func_t, os::dll_lookup(handle, "okra_push_double"));
+      _okra_push_float =
+        CAST_TO_FN_PTR(okra_push_float_func_t, os::dll_lookup(handle, "okra_push_float"));
+      _okra_push_int =
+        CAST_TO_FN_PTR(okra_push_int_func_t, os::dll_lookup(handle, "okra_push_int"));
+      _okra_push_long =
+        CAST_TO_FN_PTR(okra_push_long_func_t, os::dll_lookup(handle, "okra_push_long"));
+      _okra_execute_with_range =
+        CAST_TO_FN_PTR(okra_execute_with_range_func_t, os::dll_lookup(handle, "okra_execute_with_range"));
+      _okra_clearargs =
+        CAST_TO_FN_PTR(okra_clearargs_func_t, os::dll_lookup(handle, "okra_clearargs"));
+      _okra_register_heap =
+        CAST_TO_FN_PTR(okra_register_heap_func_t, os::dll_lookup(handle, "okra_register_heap"));
+
+      if (TraceGPUInteraction) {
+        tty->print_cr("[HSAIL] Success: library linkage _okra_clearargs=0x%08x", _okra_clearargs);
+      }
+      return true;
+    } else {
+      // Unable to dlopen okra
+      tty->print_cr("[HSAIL] library load failed.");
+      return false;
+    }
+  } else {
+    tty->print_cr("Unsupported HSAIL platform");
+    return false;
+  }
+  tty->print_cr("Failed to find HSAIL linkage");
+  return false;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/hsail/vm/gpu_hsail.hpp	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef GPU_HSAIL_HPP
+#define GPU_HSAIL_HPP
+
+class Hsail {
+  friend class gpu;
+
+ protected:
+  static bool probe_linkage();
+  static bool initialize_gpu();
+  static unsigned int total_cores();
+  static void* generate_kernel(unsigned char *code, int code_len, const char *name);
+  static bool execute_kernel_void_1d(address kernel, int dimX, jobject args, methodHandle& mh);
+  static void register_heap();
+
+public:
+#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
+  typedef unsigned long long CUdeviceptr;
+#else
+  typedef unsigned int CUdeviceptr;
+#endif
+
+private:
+  typedef void* (*okra_ctx_create_func_t)();
+  typedef void* (*okra_kernel_create_func_t)(void*, unsigned char *, const char *);
+  typedef bool (*okra_push_object_func_t)(void*, void*);
+  typedef bool (*okra_push_boolean_func_t)(void*, jboolean);
+  typedef bool (*okra_push_byte_func_t)(void*, jbyte);
+  typedef bool (*okra_push_double_func_t)(void*, jdouble);
+  typedef bool (*okra_push_float_func_t)(void*, jfloat);
+  typedef bool (*okra_push_int_func_t)(void*, jint);
+  typedef bool (*okra_push_long_func_t)(void*, jlong);
+  typedef bool (*okra_execute_with_range_func_t)(void*, jint);
+  typedef bool (*okra_clearargs_func_t)(void*);
+  typedef bool (*okra_register_heap_func_t)(void*, size_t);
+  
+public:
+  static okra_ctx_create_func_t                 _okra_ctx_create;
+  static okra_kernel_create_func_t              _okra_kernel_create;
+  static okra_push_object_func_t                _okra_push_object;
+  static okra_push_boolean_func_t               _okra_push_boolean;
+  static okra_push_byte_func_t                  _okra_push_byte;
+  static okra_push_double_func_t                _okra_push_double;
+  static okra_push_float_func_t                 _okra_push_float;
+  static okra_push_int_func_t                   _okra_push_int;
+  static okra_push_long_func_t                  _okra_push_long;
+  static okra_execute_with_range_func_t         _okra_execute_with_range;
+  static okra_clearargs_func_t                  _okra_clearargs;
+  static okra_register_heap_func_t              _okra_register_heap;
+  
+protected:
+  static void* _device_context;
+};
+#endif // GPU_HSAIL_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/hsail/vm/hsailKernelArguments.cpp	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "hsailKernelArguments.hpp"
+#include "runtime/javaCalls.hpp"
+
+
+// Get next java argument
+oop HSAILKernelArguments::next_arg(BasicType expectedType) {
+  assert(_index < _args->length(), "out of bounds");
+
+  oop arg = ((objArrayOop) (_args))->obj_at(_index++);
+  assert(expectedType == T_OBJECT ||
+         java_lang_boxing_object::is_instance(arg, expectedType), "arg type mismatch");
+
+  return arg;
+}
+
+void HSAILKernelArguments::do_bool() {
+  // Get the boxed value
+  oop arg = _args->obj_at(_index++);
+  assert(java_lang_boxing_object::is_instance(arg, T_BOOLEAN), "arg type mismatch");
+  
+  jvalue jValue;
+  java_lang_boxing_object::get_value(arg, &jValue);
+  
+  bool pushed = gpu::Hsail::_okra_push_boolean(_kernel, jValue.z);
+  assert(pushed == true, "arg push failed");
+}
+
+void HSAILKernelArguments::do_byte() {
+  // Get the boxed value
+  oop arg = _args->obj_at(_index++);
+  assert(java_lang_boxing_object::is_instance(arg, T_BYTE), "arg type mismatch");
+  
+  jvalue jValue;
+  java_lang_boxing_object::get_value(arg, &jValue);
+  
+  bool pushed = gpu::Hsail::_okra_push_byte(_kernel, jValue.b);
+  assert(pushed == true, "arg push failed");
+}
+
+void HSAILKernelArguments::do_double() {
+  // Get the boxed value
+  oop arg = _args->obj_at(_index++);
+  assert(java_lang_boxing_object::is_instance(arg, T_DOUBLE), "arg type mismatch");
+  
+  jvalue jValue;
+  java_lang_boxing_object::get_value(arg, &jValue);
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] HSAILKernelArguments::double value = %e", jValue.d);
+  }  
+  bool pushed = gpu::Hsail::_okra_push_double(_kernel, jValue.d);
+  assert(pushed == true, "arg push failed");
+}
+
+void HSAILKernelArguments::do_float() {
+  // Get the boxed value
+  oop arg = _args->obj_at(_index++);
+  assert(java_lang_boxing_object::is_instance(arg, T_FLOAT), "arg type mismatch");
+  
+  jvalue jValue;
+  java_lang_boxing_object::get_value(arg, &jValue);
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] HSAILKernelArguments::float value = %f", jValue.f);
+  }    
+  bool pushed = gpu::Hsail::_okra_push_float(_kernel, jValue.f);
+  assert(pushed == true, "float push failed");
+}
+
+void HSAILKernelArguments::do_int() {
+  // The last int is the iteration variable in an IntStream, but we don't pass it
+  // since we use the HSAIL workitemid in place of that int value
+  if (_index == _length) {
+    if (TraceGPUInteraction) {
+      tty->print_cr("[HSAIL] HSAILKernelArguments::not pushing trailing int");
+    }
+    return;
+  }
+
+  // Get the boxed int
+  oop arg = _args->obj_at(_index++);
+  assert(java_lang_boxing_object::is_instance(arg, T_INT), "arg type mismatch");
+  
+  jvalue jValue;
+  java_lang_boxing_object::get_value(arg, &jValue);
+  
+  bool pushed = gpu::Hsail::_okra_push_int(_kernel, jValue.i);
+  assert(pushed == true, "arg push failed");
+}
+
+void HSAILKernelArguments::do_long() {
+  // Get the boxed value
+  oop arg = _args->obj_at(_index++);
+  assert(java_lang_boxing_object::is_instance(arg, T_LONG), "arg type mismatch");
+  
+  jvalue jValue;
+  java_lang_boxing_object::get_value(arg, &jValue);
+  
+  bool pushed = gpu::Hsail::_okra_push_long(_kernel, jValue.j);
+  assert(pushed == true, "arg push failed");  
+}
+
+void HSAILKernelArguments::do_array(int begin, int end) {
+  oop arg = _args->obj_at(_index++);
+  assert(arg->is_array(), "arg type mismatch");
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] HSAILKernelArguments::do_array 0x%08x, is a %s", (address) arg, arg->klass()->external_name());
+  }
+    
+  bool pushed = gpu::Hsail::_okra_push_object(_kernel, arg);
+  assert(pushed == true, "arg push failed");  
+}
+
+void HSAILKernelArguments::do_object() {
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] HSAILKernelArguments::do_object.");
+  }
+  if (_index == _length) {  
+    // last arg in object stream lambda is  the object stream source array
+    if (TraceGPUInteraction) {
+      tty->print_cr("[HSAIL] HSAILKernelArguments::trailing object ref should be object source array ref");
+    }
+  }
+
+  oop arg = _args->obj_at(_index++);
+  assert(arg->is_array(), "arg type mismatch");
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] HSAILKernelArguments::do_object, 0x%08x is a %s", (address) arg, arg->klass()->external_name());
+  }
+    
+  bool pushed = gpu::Hsail::_okra_push_object(_kernel, arg);
+  assert(pushed == true, "arg push failed");  
+}
+
+void HSAILKernelArguments::do_object(int begin, int end) {
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] HSAILKernelArguments::do_object(int begin, int end).");
+  }
+
+  if ((!_is_static && (_index >=(_length-1))) || (_is_static && (_index >=(_length)))) {
+    // last arg in object stream lambda is  the object stream source array
+    if (TraceGPUInteraction) {
+      tty->print_cr("[HSAIL] HSAILKernelArguments::trailing object ref should be object source array ref");
+    }
+  }
+  
+  oop arg = _args->obj_at(_index++);
+  assert(arg->is_array(), "arg type mismatch");
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] HSAILKernelArguments::do_object(int, int), 0x%08x is a %s", (address) arg, arg->klass()->external_name());
+  }
+    
+  bool pushed = gpu::Hsail::_okra_push_object(_kernel, arg);
+  assert(pushed == true, "arg push failed");  
+}
+
+void HSAILKernelArguments::do_void() {
+    return;
+}
+
+// TODO implement other do_*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/hsail/vm/hsailKernelArguments.hpp	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef KERNEL_ARGUMENTS_HSAIL_HPP
+#define KERNEL_ARGUMENTS_HSAIL_HPP
+
+#include "runtime/gpu.hpp"
+#include "runtime/signature.hpp"
+
+class HSAILKernelArguments : public SignatureIterator {
+  friend class gpu::Hsail;
+
+public:
+
+private:
+  // Array of java argument oops
+  objArrayOop _args;
+  // Length of args array
+  int   _length;
+  // Current index into _args
+  int _index;
+  // Kernel to push into
+  address _kernel;
+
+  bool _is_static;
+  
+  // Get next java argument
+  oop next_arg(BasicType expectedType);
+
+ public:
+  HSAILKernelArguments(address kernel, Symbol* signature, objArrayOop args, bool is_static) : SignatureIterator(signature) {
+    this->_return_type = T_ILLEGAL;
+    _index = 0;
+    _args = args;
+    _kernel = kernel;
+    _is_static = is_static;
+    
+    _length = args->length();
+    if (TraceGPUInteraction) {
+      tty->print_cr("[HSAIL] sig:%s  args length=%d", signature->as_C_string(), _length);
+    }    
+    if (!_is_static) {      
+      // First object in args should be 'this'
+      oop arg = args->obj_at(_index++);
+      assert(arg->is_instance() && (! arg->is_array()), "First arg should be 'this'");
+      if (TraceGPUInteraction) {
+        tty->print_cr("[HSAIL] instance method, this 0x%08x, is a %s", (address) arg, arg->klass()->external_name());
+      }
+      bool pushed = gpu::Hsail::_okra_push_object(kernel, arg);
+      assert(pushed == true, "'this' push failed");
+    } else {
+      if (TraceGPUInteraction) {
+        tty->print_cr("[HSAIL] static method");
+      }
+    }
+    // Iterate over the entire signature
+    iterate();
+  }
+
+  void do_bool();
+  void do_byte();
+  void do_double();
+  void do_float();
+  void do_int();
+  void do_long();
+  void do_array(int begin, int end);
+  void do_object();
+  void do_object(int begin, int end);
+
+  void do_void();
+
+  inline void do_char()   {
+    /* TODO : To be implemented */
+    guarantee(false, "do_char:NYI");
+  }
+  inline void do_short()  {
+    /* TODO : To be implemented */
+    guarantee(false, "do_short:NYI");
+  }
+};
+
+#endif  // KERNEL_ARGUMENTS_HPP
--- a/src/os_gpu/linux_ptx/vm/gpu_linux.cpp	Mon Nov 11 21:34:44 2013 +0100
+++ b/src/os_gpu/linux_ptx/vm/gpu_linux.cpp	Mon Nov 11 21:40:18 2013 +0100
@@ -42,6 +42,19 @@
 static unsigned int amd_vendor_id = 0x1002;
 
 bool gpu::Linux::probe_gpu() {
+
+  /*
+   * The simulator only depends on shared libraries.
+   * That linkage is checked in a later step.
+   */
+  if (UseHSAILSimulator) {
+      set_target_il_type(gpu::HSAIL);
+      if (TraceGPUInteraction) {
+        tty->print_cr("Setup HSAIL Simulator");
+      }
+      return true;
+  }
+
   /* 
    * Open /proc/bus/pci/devices to look for the first GPU device. For
    * now, we will just find the first GPU device. Will need to revisit
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_gpu/windows_hsail/vm/gpu_windows.cpp	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/gpu.hpp"
+#include "utilities/ostream.hpp"
+
+void gpu::probe_gpu() {
+  set_available(gpu::Windows::probe_gpu());
+  if (TraceGPUInteraction) {
+    tty->print_cr("probe_gpu(): %d", gpu::is_available());
+  }
+}
+
+bool gpu::Windows::probe_gpu() {
+    
+  /*
+   * We will check the HSA environment in the libraries,
+   * so nothing to do here.
+   * The HSA library linkage is checked in a later step.
+   */  
+  bool gpu_device_exists = true;
+  set_target_il_type(gpu::HSAIL);
+
+  return gpu_device_exists;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_gpu/windows_hsail/vm/gpu_windows.hpp	Mon Nov 11 21:40:18 2013 +0100
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_WINDOWS_VM_GPU_WINDOWS_HPP
+#define OS_WINDOWS_VM_GPU_WINDOWS_HPP
+
+
+class Windows {
+  friend class gpu;
+
+ protected:
+  static bool probe_gpu();
+};
+
+#endif // OS_WINDOWS_VM_GPU_WINDOWS_HPP
--- a/src/share/vm/graal/graalCompilerToGPU.cpp	Mon Nov 11 21:34:44 2013 +0100
+++ b/src/share/vm/graal/graalCompilerToGPU.cpp	Mon Nov 11 21:40:18 2013 +0100
@@ -117,7 +117,6 @@
     }
     return JNIHandles::make_local(o);
   }
-
 C2V_END
 
 C2V_VMENTRY(jobject, executeParallelMethodVarargs, (JNIEnv *env,
@@ -139,9 +138,14 @@
   // start value is the kernel
   jlong startValue = HotSpotInstalledCode::codeStart(hotspotInstalledCode);
 
+  if (UseHSAILSimulator) {
+    gpu::execute_kernel_void_1d((address)startValue, dimX, args, mh);
+    return NULL;
+  }
+
   PTXKernelArguments ptxka(signature, (arrayOop) JNIHandles::resolve(args), mh->is_static());
   JavaValue result(ptxka.get_ret_type());
-if (!gpu::execute_warp(dimX, dimY, dimZ, (address)startValue, ptxka, result)) {
+  if (!gpu::execute_warp(dimX, dimY, dimZ, (address) startValue, ptxka, result)) {
     return NULL;
   }
 
@@ -169,7 +173,6 @@
     }
     return JNIHandles::make_local(o);
   }
-
 C2V_END
 
 C2V_VMENTRY(jboolean, deviceInit, (JNIEnv *env, jobject))
--- a/src/share/vm/graal/graalEnv.cpp	Mon Nov 11 21:34:44 2013 +0100
+++ b/src/share/vm/graal/graalEnv.cpp	Mon Nov 11 21:40:18 2013 +0100
@@ -566,7 +566,7 @@
         }
       }
       
-      if (HotSpotNmethod::isExternal(installed_code())) {
+      if (TraceGPUInteraction && HotSpotNmethod::isExternal(installed_code())) {
         tty->print_cr("External method:%s", method()->name_and_sig_as_C_string());
       }
     }
--- a/src/share/vm/graal/graalGlobals.hpp	Mon Nov 11 21:34:44 2013 +0100
+++ b/src/share/vm/graal/graalGlobals.hpp	Mon Nov 11 21:40:18 2013 +0100
@@ -58,6 +58,9 @@
   product(bool, GraalDeferredInitBarriers, true,                            \
           "Defer write barriers of young objects")                          \
                                                                             \
+  product(bool, GraalHProfEnabled, false,                                   \
+          "Is Heap  Profiler enabled")                                      \
+                                                                            \
   develop(bool, GraalUseFastLocking, true,                                  \
           "Use fast inlined locking code")                                  \
                                                                             \
--- a/src/share/vm/runtime/arguments.cpp	Mon Nov 11 21:34:44 2013 +0100
+++ b/src/share/vm/runtime/arguments.cpp	Mon Nov 11 21:40:18 2013 +0100
@@ -2632,6 +2632,11 @@
           return JNI_ERR;
         }
 #endif // !INCLUDE_JVMTI
+#if defined(GRAAL)
+        if (strcmp(name, "hprof") == 0) {
+          FLAG_SET_CMDLINE(bool, GraalHProfEnabled, true);
+        }
+#endif
         add_init_library(name, options);
       }
     // -agentlib and -agentpath
@@ -2654,6 +2659,12 @@
           return JNI_ERR;
         }
 #endif // !INCLUDE_JVMTI
+#if defined(GRAAL)
+        if (valid_hprof_or_jdwp_agent(name, is_absolute_path)) {
+          FLAG_SET_CMDLINE(bool, GraalHProfEnabled, true);
+        }
+#endif
+
         add_init_agent(name, options, is_absolute_path);
       }
     // -javaagent
--- a/src/share/vm/runtime/globals.hpp	Mon Nov 11 21:34:44 2013 +0100
+++ b/src/share/vm/runtime/globals.hpp	Mon Nov 11 21:40:18 2013 +0100
@@ -3774,6 +3774,9 @@
   product(bool, TraceGPUInteraction, false,                                 \
           "Trace external GPU Interaction")                                 \
                                                                             \
+  product(bool, UseHSAILSimulator, false,                                   \
+          "Run code on HSAIL Simulator")                                    \
+                                                                            \
   diagnostic(ccstr, SharedArchiveFile, NULL,                                \
           "Override the default location of the CDS archive file")          \
                                                                             \
--- a/src/share/vm/runtime/gpu.cpp	Mon Nov 11 21:34:44 2013 +0100
+++ b/src/share/vm/runtime/gpu.cpp	Mon Nov 11 21:40:18 2013 +0100
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "runtime/gpu.hpp"
+#include "runtime/handles.hpp"
 
 bool gpu::_available = false;    // does the hardware exist?
 bool gpu::_gpu_linkage = false;  // is the driver library to access the GPU installed
@@ -31,10 +32,12 @@
 gpu::TargetGPUIL gpu::_targetIL = gpu::NONE; // No GPU detected yet.
 
 void gpu::init() {
-#if defined(TARGET_OS_FAMILY_bsd) || defined(TARGET_OS_FAMILY_linux)
+#if defined(TARGET_OS_FAMILY_bsd) || defined(TARGET_OS_FAMILY_linux) || defined(TARGET_OS_FAMILY_windows)
   gpu::probe_gpu();
   if (gpu::get_target_il_type() == gpu::PTX) {
     set_gpu_linkage(gpu::Ptx::probe_linkage());
+  } else if (gpu::get_target_il_type() == gpu::HSAIL) {
+    set_gpu_linkage(gpu::Hsail::probe_linkage());
   } else {
     set_gpu_linkage(false);
   }
@@ -45,8 +48,9 @@
   if (gpu::has_gpu_linkage()) {
     if (gpu::get_target_il_type() == gpu::PTX) {
       set_initialized(gpu::Ptx::initialize_gpu());
+    } else if (gpu::get_target_il_type() == gpu::HSAIL) {
+      set_initialized(gpu::Hsail::initialize_gpu());
     }
-    // Add initialization of other GPUs here
   }
 }
 
@@ -54,8 +58,9 @@
   if (gpu::has_gpu_linkage()) {
     if (gpu::get_target_il_type() == gpu::PTX) {
       return (gpu::Ptx::generate_kernel(code, code_len, name));
+    } else if (gpu::get_target_il_type() == gpu::HSAIL) {
+      return (gpu::Hsail::generate_kernel(code, code_len, name));
     }
-    // Add kernel generation functionality of other GPUs here
   }
   return NULL;
 }
@@ -70,6 +75,18 @@
     return false;
 }
 
+// This is HSAIL specific to work with Sumatra JDK
+bool gpu::execute_kernel_void_1d(address kernel, int dimX, jobject args, methodHandle& mh) {
+    if (gpu::has_gpu_linkage()) {
+        if (gpu::get_target_il_type() == gpu::HSAIL) {
+            return (gpu::Hsail::execute_kernel_void_1d(kernel, dimX, args, mh));
+        }
+    }
+    return false;
+    
+}
+
+
 bool gpu::execute_warp(int dimX, int dimY, int dimZ,
                        address kernel, PTXKernelArguments & ptxka, JavaValue& ret) {
     if (gpu::has_gpu_linkage()) {
--- a/src/share/vm/runtime/gpu.hpp	Mon Nov 11 21:34:44 2013 +0100
+++ b/src/share/vm/runtime/gpu.hpp	Mon Nov 11 21:40:18 2013 +0100
@@ -53,6 +53,9 @@
 
   static bool execute_kernel(address kernel, PTXKernelArguments & ptxka, JavaValue & ret);
 
+  // No return value from HSAIL kernels
+  static bool execute_kernel_void_1d(address kernel, int dimX, jobject args, methodHandle& mh);
+
   static void set_available(bool value) {
     _available = value;
   }
@@ -92,6 +95,7 @@
 #ifdef TARGET_OS_FAMILY_solaris
 #endif
 #ifdef TARGET_OS_FAMILY_windows
+# include "gpu_windows.hpp"
 #endif
 #ifdef TARGET_OS_FAMILY_bsd
 # include "gpu_bsd.hpp"
@@ -99,6 +103,7 @@
 
 public:
 # include "ptx/vm/gpu_ptx.hpp"
+# include "hsail/vm/gpu_hsail.hpp"
 
 };