changeset 16131:a4a2147a6aa6

Merge with 9410f831fefa2a7af1865d65b32bf6019f197bd8
author Michael Van De Vanter <michael.van.de.vanter@oracle.com>
date Tue, 17 Jun 2014 17:42:24 -0700
parents 6f7d3f3703d3 (current diff) 9410f831fefa (diff)
children 7143d614bb20
files
diffstat 48 files changed, 1068 insertions(+), 247 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/Architecture.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/Architecture.java	Tue Jun 17 17:42:24 2014 -0700
@@ -34,11 +34,11 @@
 public abstract class Architecture {
 
     /**
-     * The number of bits required in a bit map covering all the registers that may store
-     * references. The bit position of a register in the map is the register's
-     * {@linkplain Register#number number}.
+     * The number of entries required in a {@link ReferenceMap} covering all the registers that may
+     * store references. The index of a register in the reference map is given by
+     * {@link Register#getReferenceMapIndex()}.
      */
-    private final int registerReferenceMapBitCount;
+    private final int registerReferenceMapSize;
 
     /**
      * Represents the natural size of words (typically registers and pointers) of this architecture,
@@ -85,7 +85,7 @@
     private final int returnAddressSize;
 
     protected Architecture(String name, int wordSize, ByteOrder byteOrder, boolean unalignedMemoryAccess, Register[] registers, int implicitMemoryBarriers, int nativeCallDisplacementOffset,
-                    int registerReferenceMapBitCount, int returnAddressSize) {
+                    int registerReferenceMapSize, int returnAddressSize) {
         this.name = name;
         this.registers = registers;
         this.wordSize = wordSize;
@@ -93,13 +93,13 @@
         this.unalignedMemoryAccess = unalignedMemoryAccess;
         this.implicitMemoryBarriers = implicitMemoryBarriers;
         this.machineCodeCallDisplacementOffset = nativeCallDisplacementOffset;
-        this.registerReferenceMapBitCount = registerReferenceMapBitCount;
+        this.registerReferenceMapSize = registerReferenceMapSize;
         this.returnAddressSize = returnAddressSize;
     }
 
     /**
      * Converts this architecture to a string.
-     * 
+     *
      * @return the string representation of this architecture
      */
     @Override
@@ -107,8 +107,8 @@
         return getName().toLowerCase();
     }
 
-    public int getRegisterReferenceMapBitCount() {
-        return registerReferenceMapBitCount;
+    public int getRegisterReferenceMapSize() {
+        return registerReferenceMapSize;
     }
 
     /**
@@ -163,7 +163,7 @@
     /**
      * Determines the barriers in a given barrier mask that are explicitly required on this
      * architecture.
-     * 
+     *
      * @param barriers a mask of the barrier constants
      * @return the value of {@code barriers} minus the barriers unnecessary on this architecture
      */
@@ -173,9 +173,9 @@
 
     /**
      * Gets the size in bytes of the specified kind for this target.
-     * 
+     *
      * @param kind the kind for which to get the size
-     * 
+     *
      * @return the size in bytes of {@code kind}
      */
     public int getSizeInBytes(PlatformKind kind) {
@@ -205,7 +205,7 @@
 
     /**
      * Determine whether a kind can be stored in a register of a given category.
-     * 
+     *
      * @param category the category of the register
      * @param kind the kind that should be stored in the register
      */
@@ -213,7 +213,7 @@
 
     /**
      * Return the largest kind that can be stored in a register of a given category.
-     * 
+     *
      * @param category the category of the register
      * @return the largest kind that can be stored in a register {@code category}
      */
--- a/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/Register.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.api.code/src/com/oracle/graal/api/code/Register.java	Tue Jun 17 17:42:24 2014 -0700
@@ -137,6 +137,9 @@
         return registerCategory;
     }
 
+    /**
+     * Get the start index of this register in the {@link ReferenceMap}.
+     */
     public int getReferenceMapIndex() {
         return (encoding << registerCategory.referenceMapShift) + registerCategory.referenceMapOffset;
     }
--- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/ArrayListSetTest.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/ArrayListSetTest.java	Tue Jun 17 17:42:24 2014 -0700
@@ -49,6 +49,11 @@
         // }
     }
 
+    @Override
+    protected boolean supportsRequiredCapabilities() {
+        return (canHandleObjectAllocation());
+    }
+
     @Test
     public void testUsingLambdaMethod() {
         testGeneratedHsailUsingLambdaMethod();
--- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/NewStringEqualsTest.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/NewStringEqualsTest.java	Tue Jun 17 17:42:24 2014 -0700
@@ -52,6 +52,12 @@
         setupArrays();
         String base = "ABCDEFGHIJ";
 
+        // Resolving StringIndexOutOfBoundsException causes compilation of the
+        // lambda to fail as. Combined with use of InlineEverything and RemoveNeverExecutedCode
+        // the inlining budget is blown before String.equals can be inlined leaving
+        // a DirectCallTargetNode in the graph which cannot be lowered by HSAIL.
+        new StringIndexOutOfBoundsException().fillInStackTrace();
+
         dispatchLambdaKernel(NUM, (gid) -> {
             outArray[gid] = new String(chars, 0, 10 + (gid % 3)).equals(base);
         });
@@ -63,6 +69,7 @@
         return (canHandleObjectAllocation());
     }
 
+    @Ignore("see comment in runTest")
     @Test
     public void test() {
         try (DebugConfigScope s = disableIntercept()) {
@@ -70,6 +77,7 @@
         }
     }
 
+    @Ignore("see comment in runTest")
     @Test
     public void testUsingLambdaMethod() {
         try (DebugConfigScope s = disableIntercept()) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/ReduceMaxTest.java	Tue Jun 17 17:42:24 2014 -0700
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.compiler.hsail.test.lambda;
+
+import static com.oracle.graal.hotspot.HotSpotGraalRuntime.runtime;
+import com.oracle.graal.hotspot.HotSpotVMConfig;
+import static org.junit.Assert.*;
+import org.junit.*;
+
+import java.util.*;
+import java.util.stream.IntStream;
+
+public class ReduceMaxTest {
+    // The length of the input array
+    static int jobSize = 1027 * 1023 * 13;
+    static int loops = 1;
+
+    // The source array
+    int bigArray[] = null;
+
+    // result for baseline single threaded stream
+    int resultStream = 0;
+    // result for parallel CPU and offloaded streams
+    int resultOffload = 0;
+
+    int evaluate(boolean doParallelStream) {
+        int result = 0;
+        for (int i = 0; i < loops; i++) {
+            IntStream s = Arrays.stream(bigArray);
+            if (doParallelStream == true) {
+                OptionalInt resultParallel = s.parallel().reduce(Integer::max);
+                result = resultParallel.getAsInt();
+            } else {
+                result = s.reduce(Integer::max).getAsInt();
+            }
+        }
+        return result;
+    }
+
+    int evaluateWithIdentity(boolean doParallelStream) {
+        int result = 0;
+        for (int i = 0; i < loops; i++) {
+            IntStream s = Arrays.stream(bigArray);
+            if (doParallelStream == true) {
+                result = s.parallel().reduce(0, Integer::max);
+            } else {
+                result = s.reduce(0, Integer::max);
+            }
+        }
+        return result;
+    }
+
+    @Test
+    public void testReduce() {
+        // Handmade reduce does not support +UseCompressedOops
+        HotSpotVMConfig config = runtime().getConfig();
+        if (config.useCompressedOops == true || config.useHSAILDeoptimization == true) {
+            return;
+        }
+
+        bigArray = new int[jobSize];
+        for (int i = 0; i < jobSize; i++) {
+            // bigArray[i] = i + 1;
+            bigArray[i] = -1024 + i + 1;
+        }
+
+        // Get non parallel baseline
+        resultStream = evaluate(false);
+
+        // Get OptionalInt version kernel
+        resultOffload = evaluate(true);
+        assertTrue(resultStream == resultOffload);
+
+        // Do identity version kernel
+        // Get non parallel baseline
+        resultStream = evaluateWithIdentity(false);
+
+        resultOffload = evaluateWithIdentity(true);
+        assertTrue(resultStream == resultOffload);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/ReduceMinTest.java	Tue Jun 17 17:42:24 2014 -0700
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.compiler.hsail.test.lambda;
+
+import static com.oracle.graal.hotspot.HotSpotGraalRuntime.runtime;
+import com.oracle.graal.hotspot.HotSpotVMConfig;
+import static org.junit.Assert.*;
+import org.junit.*;
+
+import java.util.*;
+import java.util.stream.IntStream;
+
+public class ReduceMinTest {
+    // The length of the input array
+    static int jobSize = 1027 * 1023 * 13;
+    static int loops = 1;
+
+    // The input array to the kernel
+    int bigArray[] = null;
+
+    // result for baseline single threaded stream
+    int resultStream = 0;
+    // result for parallel CPU and offloaded streams
+    int resultOffload = 0;
+
+    int evaluate(boolean doParallelStream) {
+        int result = 0;
+        for (int i = 0; i < loops; i++) {
+            IntStream s = Arrays.stream(bigArray);
+            if (doParallelStream == true) {
+                OptionalInt resultParallel = s.parallel().reduce(Integer::min);
+                result = resultParallel.getAsInt();
+            } else {
+                result = s.reduce(Integer::min).getAsInt();
+            }
+        }
+        return result;
+    }
+
+    int evaluateWithIdentity(boolean doParallelStream) {
+        int result = 0;
+        for (int i = 0; i < loops; i++) {
+            IntStream s = Arrays.stream(bigArray);
+            if (doParallelStream == true) {
+                result = s.parallel().reduce(0, Integer::min);
+            } else {
+                result = s.reduce(0, Integer::min);
+            }
+        }
+        return result;
+    }
+
+    @Test
+    public void testReduce() {
+        // Handmade reduce does not support +UseCompressedOops
+        HotSpotVMConfig config = runtime().getConfig();
+        if (config.useCompressedOops == true || config.useHSAILDeoptimization == true) {
+            return;
+        }
+
+        bigArray = new int[jobSize];
+        for (int i = 0; i < jobSize; i++) {
+            bigArray[i] = -1024 + i + 1;
+        }
+
+        // Get non parallel baseline
+        resultStream = evaluate(false);
+
+        // Get OptionalInt version kernel
+        resultOffload = evaluate(true);
+        assertTrue(resultStream == resultOffload);
+
+        // Do identity version kernel
+        // Get non parallel baseline
+        resultStream = evaluateWithIdentity(false);
+
+        resultOffload = evaluateWithIdentity(true);
+        assertTrue(resultStream == resultOffload);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/ReduceSumTest.java	Tue Jun 17 17:42:24 2014 -0700
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.compiler.hsail.test.lambda;
+
+//import com.oracle.graal.compiler.common.GraalInternalError;
+import static com.oracle.graal.hotspot.HotSpotGraalRuntime.runtime;
+import com.oracle.graal.hotspot.HotSpotVMConfig;
+import org.junit.*;
+
+import java.util.*;
+import java.util.stream.IntStream;
+
+public class ReduceSumTest {
+    // The length of the input array
+    static int jobSize = 1027 * 1023 * 13;
+    static int loops = 1;
+
+    // The array to be summed
+    int bigArray[] = null;
+
+    // sum for baseline single threaded stream
+    int sumStream = 0;
+    // sum for parallel CPU and offloaded streams
+    int sumOffload = 0;
+
+    int evaluate(boolean doParallelStream) {
+        int sum = 0;
+        for (int i = 0; i < loops; i++) {
+            IntStream s = Arrays.stream(bigArray);
+            if (doParallelStream == true) {
+                OptionalInt resultParallel = s.parallel().reduce(Integer::sum);
+                sum = resultParallel.getAsInt();
+            } else {
+                OptionalInt resultStream = s.reduce(Integer::sum);
+                sum = resultStream.getAsInt();
+            }
+        }
+        return sum;
+    }
+
+    int evaluateWithIdentity(boolean doParallelStream) {
+        int sum = 0;
+        for (int i = 0; i < loops; i++) {
+            IntStream s = Arrays.stream(bigArray);
+            if (doParallelStream == true) {
+                sum = s.parallel().reduce(0, Integer::sum);
+            } else {
+                sum = s.reduce(0, Integer::sum);
+            }
+        }
+        return sum;
+    }
+
+    @Test
+    public void testReduce() {
+        // Handmade reduce does not support +UseCompressedOops
+        HotSpotVMConfig config = runtime().getConfig();
+        if (config.useCompressedOops == true || config.useHSAILDeoptimization == true) {
+            return;
+        }
+
+        bigArray = new int[jobSize];
+        for (int i = 0; i < jobSize; i++) {
+            bigArray[i] = -1024 + i + 1;
+        }
+
+        // Get non parallel baseline
+        sumStream = evaluate(false);
+
+        // Get OptionalInt version kernel
+        sumOffload = evaluate(true);
+        assert sumStream == sumOffload : "Offload sum is wrong, stream:" + sumStream + " != offload:" + sumOffload;
+
+        // Get identity version kernel
+        sumOffload = evaluateWithIdentity(true);
+        assert sumStream == sumOffload : "Offload sum is wrong, stream:" + sumStream + " != offload:" + sumOffload;
+    }
+}
--- a/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/StringBuilderTest.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.compiler.hsail.test/src/com/oracle/graal/compiler/hsail/test/lambda/StringBuilderTest.java	Tue Jun 17 17:42:24 2014 -0700
@@ -45,6 +45,11 @@
         });
     }
 
+    @Override
+    protected boolean supportsRequiredCapabilities() {
+        return (canHandleObjectAllocation());
+    }
+
     @Test
     public void testUsingLambdaMethod() {
         testGeneratedHsailUsingLambdaMethod();
--- a/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/CompileAndDispatch.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.compiler.hsail/src/com/oracle/graal/compiler/hsail/CompileAndDispatch.java	Tue Jun 17 17:42:24 2014 -0700
@@ -32,4 +32,12 @@
     Object createKernel(Class<?> consumerClass);
 
     boolean dispatchKernel(Object kernel, int jobSize, Object[] args);
+
+    Object createKernelFromHsailString(String code, String methodName);
+
+    String getIntegerReduceIntrinsic(String reducerName);
+
+    Integer offloadIntReduceImpl(Object kernel, int identity, int[] streamSource);
+
+    String getIntReduceTargetName(Class<?> opClass);
 }
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/CheckGraalInvariants.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/CheckGraalInvariants.java	Tue Jun 17 17:42:24 2014 -0700
@@ -174,6 +174,7 @@
         new VerifyUsageWithEquals(JavaType.class).apply(graph, context);
         new VerifyUsageWithEquals(JavaMethod.class).apply(graph, context);
         new VerifyUsageWithEquals(JavaField.class).apply(graph, context);
+        new VerifyUsageWithEquals(LIRKind.class).apply(graph, context);
         new VerifyDebugUsage().apply(graph, context);
     }
 
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/alloc/Interval.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/alloc/Interval.java	Tue Jun 17 17:42:24 2014 -0700
@@ -533,7 +533,7 @@
     void assignLocation(AllocatableValue newLocation) {
         if (isRegister(newLocation)) {
             assert this.location == null : "cannot re-assign location for " + this;
-            if (newLocation.getLIRKind() == LIRKind.Illegal && kind != LIRKind.Illegal) {
+            if (newLocation.getLIRKind().equals(LIRKind.Illegal) && !kind.equals(LIRKind.Illegal)) {
                 this.location = asRegister(newLocation).asValue(kind);
                 return;
             }
@@ -542,7 +542,7 @@
         } else {
             assert this.location == null || isRegister(this.location) : "cannot re-assign location for " + this;
             assert isStackSlot(newLocation);
-            assert newLocation.getLIRKind() != LIRKind.Illegal;
+            assert !newLocation.getLIRKind().equals(LIRKind.Illegal);
             assert newLocation.getLIRKind().equals(this.kind);
         }
         this.location = newLocation;
@@ -562,7 +562,7 @@
     }
 
     void setKind(LIRKind kind) {
-        assert isRegister(operand) || this.kind() == LIRKind.Illegal || this.kind() == kind : "overwriting existing type";
+        assert isRegister(operand) || this.kind().equals(LIRKind.Illegal) || this.kind().equals(kind) : "overwriting existing type";
         this.kind = kind;
     }
 
@@ -774,7 +774,7 @@
                 Interval i1 = splitChildren.get(i);
 
                 assert i1.splitParent() == this : "not a split child of this interval";
-                assert i1.kind() == kind() : "must be equal for all split children";
+                assert i1.kind().equals(kind()) : "must be equal for all split children";
                 assert (i1.spillSlot() == null && spillSlot == null) || i1.spillSlot().equals(spillSlot()) : "must be equal for all split children";
 
                 for (int j = i + 1; j < splitChildren.size(); j++) {
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/alloc/LinearScan.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/alloc/LinearScan.java	Tue Jun 17 17:42:24 2014 -0700
@@ -965,7 +965,7 @@
         }
 
         Interval interval = getOrCreateInterval(operand);
-        if (kind != LIRKind.Illegal) {
+        if (!kind.equals(LIRKind.Illegal)) {
             interval.setKind(kind);
         }
 
@@ -983,7 +983,7 @@
         }
 
         Interval interval = getOrCreateInterval(operand);
-        if (kind != LIRKind.Illegal) {
+        if (!kind.equals(LIRKind.Illegal)) {
             interval.setKind(kind);
         }
 
@@ -1005,7 +1005,7 @@
         int defPos = op.id();
 
         Interval interval = getOrCreateInterval(operand);
-        if (kind != LIRKind.Illegal) {
+        if (!kind.equals(LIRKind.Illegal)) {
             interval.setKind(kind);
         }
 
@@ -1918,7 +1918,7 @@
                     throw new GraalInternalError("");
                 }
 
-                if (isVariable(i1.operand) && i1.kind() == LIRKind.Illegal) {
+                if (isVariable(i1.operand) && i1.kind().equals(LIRKind.Illegal)) {
                     Debug.log("Interval %d has no type assigned", i1.operandNumber);
                     Debug.log(i1.logString(this));
                     throw new GraalInternalError("");
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/alloc/MoveResolver.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/alloc/MoveResolver.java	Tue Jun 17 17:42:24 2014 -0700
@@ -209,7 +209,7 @@
     }
 
     private void insertMove(Value fromOpr, Interval toInterval) {
-        assert fromOpr.getLIRKind() == toInterval.kind() : "move between different types";
+        assert fromOpr.getLIRKind().equals(toInterval.kind()) : "move between different types";
         assert insertIdx != -1 : "must setup insert position first";
 
         AllocatableValue toOpr = toInterval.operand;
@@ -339,7 +339,7 @@
         Debug.log("add move mapping from %s to %s", fromInterval, toInterval);
 
         assert !fromInterval.operand.equals(toInterval.operand) : "from and to interval equal: " + fromInterval;
-        assert fromInterval.kind() == toInterval.kind();
+        assert fromInterval.kind().equals(toInterval.kind());
         mappingFrom.add(fromInterval);
         mappingFromOpr.add(Value.ILLEGAL);
         mappingTo.add(toInterval);
--- a/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotBackendFactory.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotBackendFactory.java	Tue Jun 17 17:42:24 2014 -0700
@@ -92,7 +92,7 @@
         final int stackFrameAlignment = 16;
         final int implicitNullCheckLimit = 4096;
         final boolean inlineObjects = true;
-        return new HotSpotTargetDescription(createArchitecture(config), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects, Kind.Int);
+        return new HotSpotTargetDescription(createArchitecture(config), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
     }
 
     @Override
@@ -233,15 +233,15 @@
         } else {
             /*
              * System V Application Binary Interface, AMD64 Architecture Processor Supplement
-             * 
+             *
              * Draft Version 0.96
-             * 
+             *
              * http://www.uclibc.org/docs/psABI-x86_64.pdf
-             * 
+             *
              * 3.2.1
-             * 
+             *
              * ...
-             * 
+             *
              * This subsection discusses usage of each register. Registers %rbp, %rbx and %r12
              * through %r15 "belong" to the calling function and the called function is required to
              * preserve their values. In other words, a called function must preserve these
--- a/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotCompare.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotCompare.java	Tue Jun 17 17:42:24 2014 -0700
@@ -30,7 +30,6 @@
 import com.oracle.graal.compiler.common.*;
 import com.oracle.graal.hotspot.data.*;
 import com.oracle.graal.hotspot.meta.*;
-import com.oracle.graal.hotspot.nodes.type.*;
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.amd64.*;
 import com.oracle.graal.lir.amd64.AMD64Move.MemOp;
@@ -38,33 +37,6 @@
 
 public class AMD64HotSpotCompare {
 
-    @Opcode("NCMP")
-    public static class HotSpotCompareNarrowOp extends AMD64LIRInstruction {
-
-        @Use({REG}) protected AllocatableValue x;
-        @Use({REG, STACK}) protected AllocatableValue y;
-
-        public HotSpotCompareNarrowOp(AllocatableValue x, AllocatableValue y) {
-            this.x = x;
-            this.y = y;
-        }
-
-        @Override
-        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
-            if (isRegister(y)) {
-                masm.cmpl(asRegister(x), asRegister(y));
-            } else {
-                assert isStackSlot(y);
-                masm.cmpl(asRegister(x), (AMD64Address) crb.asAddress(y));
-            }
-        }
-
-        @Override
-        protected void verify() {
-            assert x.getPlatformKind() == NarrowOopStamp.NarrowOop && y.getPlatformKind() == NarrowOopStamp.NarrowOop;
-        }
-    }
-
     @Opcode("CMP")
     public static class HotSpotCompareConstantOp extends AMD64LIRInstruction {
 
--- a/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotLIRGenerator.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotLIRGenerator.java	Tue Jun 17 17:42:24 2014 -0700
@@ -39,7 +39,6 @@
 import com.oracle.graal.hotspot.HotSpotVMConfig.CompressEncoding;
 import com.oracle.graal.hotspot.amd64.AMD64HotSpotMove.HotSpotStoreConstantOp;
 import com.oracle.graal.hotspot.meta.*;
-import com.oracle.graal.hotspot.nodes.type.*;
 import com.oracle.graal.hotspot.stubs.*;
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.StandardOp.NoOp;
@@ -50,7 +49,6 @@
 import com.oracle.graal.lir.amd64.AMD64Move.LeaDataOp;
 import com.oracle.graal.lir.amd64.AMD64Move.LoadOp;
 import com.oracle.graal.lir.amd64.AMD64Move.MoveFromRegOp;
-import com.oracle.graal.lir.amd64.AMD64Move.MoveToRegOp;
 import com.oracle.graal.lir.amd64.AMD64Move.StoreOp;
 import com.oracle.graal.lir.gen.*;
 
@@ -436,14 +434,6 @@
         }
     }
 
-    private static Kind getMemoryKind(LIRKind kind) {
-        if (kind.getPlatformKind() == NarrowOopStamp.NarrowOop) {
-            return Kind.Int;
-        } else {
-            return (Kind) kind.getPlatformKind();
-        }
-    }
-
     private static LIRKind toStackKind(LIRKind kind) {
         if (kind.getPlatformKind() instanceof Kind) {
             Kind stackKind = ((Kind) kind.getPlatformKind()).getStackKind();
@@ -465,7 +455,7 @@
     public Variable emitLoad(LIRKind kind, Value address, LIRFrameState state) {
         AMD64AddressValue loadAddress = asAddressValue(address);
         Variable result = newVariable(toStackKind(kind));
-        append(new LoadOp(getMemoryKind(kind), result, loadAddress, state));
+        append(new LoadOp((Kind) kind.getPlatformKind(), result, loadAddress, state));
         return result;
     }
 
@@ -475,12 +465,12 @@
         if (isConstant(inputVal)) {
             Constant c = asConstant(inputVal);
             if (canStoreConstant(c)) {
-                append(new HotSpotStoreConstantOp(getMemoryKind(kind), storeAddress, c, state));
+                append(new HotSpotStoreConstantOp((Kind) kind.getPlatformKind(), storeAddress, c, state));
                 return;
             }
         }
         Variable input = load(inputVal);
-        append(new StoreOp(getMemoryKind(kind), storeAddress, input, state));
+        append(new StoreOp((Kind) kind.getPlatformKind(), storeAddress, input, state));
     }
 
     @Override
@@ -489,7 +479,7 @@
         assert inputKind.getPlatformKind() == Kind.Long || inputKind.getPlatformKind() == Kind.Object;
         if (inputKind.isReference(0)) {
             // oop
-            Variable result = newVariable(LIRKind.reference(NarrowOopStamp.NarrowOop));
+            Variable result = newVariable(LIRKind.reference(Kind.Int));
             append(new AMD64HotSpotMove.CompressPointer(result, asAllocatable(pointer), getProviders().getRegisters().getHeapBaseRegister().asValue(), encoding, nonNull));
             return result;
         } else {
@@ -507,7 +497,7 @@
     @Override
     public Value emitUncompress(Value pointer, CompressEncoding encoding, boolean nonNull) {
         LIRKind inputKind = pointer.getLIRKind();
-        assert inputKind.getPlatformKind() == Kind.Int || inputKind.getPlatformKind() == NarrowOopStamp.NarrowOop;
+        assert inputKind.getPlatformKind() == Kind.Int;
         if (inputKind.isReference(0)) {
             // oop
             Variable result = newVariable(LIRKind.reference(Kind.Object));
@@ -529,12 +519,6 @@
     protected AMD64LIRInstruction createMove(AllocatableValue dst, Value src) {
         if (src instanceof Constant) {
             return new AMD64HotSpotMove.HotSpotLoadConstantOp(dst, (Constant) src);
-        } else if (dst.getPlatformKind() == NarrowOopStamp.NarrowOop) {
-            if (isRegister(src) || isStackSlot(dst)) {
-                return new MoveFromRegOp(Kind.Int, dst, src);
-            } else {
-                return new MoveToRegOp(Kind.Int, dst, src);
-            }
         } else {
             return super.createMove(dst, src);
         }
@@ -543,7 +527,7 @@
     public Value emitCompareAndSwap(Value address, Value expectedValue, Value newValue, Value trueValue, Value falseValue) {
         LIRKind kind = newValue.getLIRKind();
         assert kind.equals(expectedValue.getLIRKind());
-        Kind memKind = getMemoryKind(kind);
+        Kind memKind = (Kind) kind.getPlatformKind();
 
         AMD64AddressValue addressValue = asAddressValue(address);
         RegisterValue raxRes = AMD64.rax.asValue(kind);
@@ -558,7 +542,7 @@
 
     public Value emitAtomicReadAndAdd(Value address, Value delta) {
         LIRKind kind = delta.getLIRKind();
-        Kind memKind = getMemoryKind(kind);
+        Kind memKind = (Kind) kind.getPlatformKind();
         Variable result = newVariable(kind);
         AMD64AddressValue addressValue = asAddressValue(address);
         append(new AMD64Move.AtomicReadAndAddOp(memKind, result, addressValue, asAllocatable(delta)));
@@ -567,7 +551,7 @@
 
     public Value emitAtomicReadAndWrite(Value address, Value newValue) {
         LIRKind kind = newValue.getLIRKind();
-        Kind memKind = getMemoryKind(kind);
+        Kind memKind = (Kind) kind.getPlatformKind();
         Variable result = newVariable(kind);
         AMD64AddressValue addressValue = asAddressValue(address);
         append(new AMD64Move.AtomicReadAndWriteOp(memKind, result, addressValue, asAllocatable(newValue)));
@@ -583,8 +567,6 @@
     protected void emitCompareOp(PlatformKind cmpKind, Variable left, Value right) {
         if (right instanceof HotSpotConstant) {
             append(new AMD64HotSpotCompare.HotSpotCompareConstantOp(left, (Constant) right));
-        } else if (cmpKind == NarrowOopStamp.NarrowOop) {
-            append(new AMD64HotSpotCompare.HotSpotCompareNarrowOp(left, asAllocatable(right)));
         } else {
             super.emitCompareOp(cmpKind, left, right);
         }
--- a/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotRegisterConfig.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64HotSpotRegisterConfig.java	Tue Jun 17 17:42:24 2014 -0700
@@ -33,7 +33,6 @@
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.compiler.common.*;
 import com.oracle.graal.hotspot.*;
-import com.oracle.graal.hotspot.nodes.type.*;
 
 public class AMD64HotSpotRegisterConfig implements RegisterConfig {
 
@@ -70,16 +69,9 @@
             return categorized.get(kind);
         }
 
-        PlatformKind primitiveKind;
-        if (kind == NarrowOopStamp.NarrowOop) {
-            primitiveKind = Kind.Int;
-        } else {
-            primitiveKind = kind;
-        }
-
         ArrayList<Register> list = new ArrayList<>();
         for (Register reg : getAllocatableRegisters()) {
-            if (architecture.canStoreValue(reg.getRegisterCategory(), primitiveKind)) {
+            if (architecture.canStoreValue(reg.getRegisterCategory(), kind)) {
                 list.add(reg);
             }
         }
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/ForEachToGraal.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/ForEachToGraal.java	Tue Jun 17 17:42:24 2014 -0700
@@ -26,15 +26,20 @@
 import static com.oracle.graal.hotspot.HotSpotGraalRuntime.*;
 
 import java.lang.reflect.*;
+import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.*;
 
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
+import com.oracle.graal.compiler.common.GraalInternalError;
 import com.oracle.graal.compiler.hsail.*;
 import com.oracle.graal.compiler.target.*;
 import com.oracle.graal.debug.*;
 import com.oracle.graal.debug.internal.*;
 import com.oracle.graal.gpu.*;
 import com.oracle.graal.graph.iterators.*;
+import com.oracle.graal.hotspot.*;
 import com.oracle.graal.hotspot.meta.*;
 import com.oracle.graal.hsail.*;
 import com.oracle.graal.java.*;
@@ -55,11 +60,54 @@
         return (HSAILHotSpotBackend) backend;
     }
 
+    ConcurrentHashMap<Class<?>, String> resolvedConsumerTargetMethods = new ConcurrentHashMap<>();
+
     /**
-     * Gets a compiled and installed kernel for the lambda called by the {@code accept(int value)}
-     * method in a class implementing {@code java.util.function.IntConsumer}.
-     * 
-     * @param intConsumerClass a class implementing {@code java.util.function.IntConsumer}
+     * Returns the name of the reduction method given a class implementing {@link IntConsumer}.
+     *
+     * @param opClass a class implementing {@link IntConsumer}.
+     * @return the name of the reduction method
+     */
+    public String getIntReduceTargetName(Class<?> opClass) {
+        String cachedMethodName = resolvedConsumerTargetMethods.get(Objects.requireNonNull(opClass));
+        if (cachedMethodName != null) {
+            return cachedMethodName;
+        } else {
+            Method acceptMethod = null;
+            for (Method m : opClass.getMethods()) {
+                if (m.getName().equals("applyAsInt")) {
+                    assert acceptMethod == null : "found more than one implementation of applyAsInt in " + opClass;
+                    acceptMethod = m;
+                }
+            }
+            // Ensure a debug configuration for this thread is initialized
+            if (DebugScope.getConfig() == null) {
+                DebugEnvironment.initialize(System.out);
+            }
+
+            HSAILHotSpotBackend backend = getHSAILBackend();
+            Providers providers = backend.getProviders();
+            StructuredGraph graph = new StructuredGraph(((HotSpotMetaAccessProvider) providers.getMetaAccess()).lookupJavaMethod(acceptMethod));
+            new GraphBuilderPhase.Instance(providers.getMetaAccess(), GraphBuilderConfiguration.getDefault(), OptimisticOptimizations.ALL).apply(graph);
+            NodeIterable<MethodCallTargetNode> calls = graph.getNodes(MethodCallTargetNode.class);
+            assert calls.count() == 1;
+            ResolvedJavaMethod lambdaMethod = calls.first().targetMethod();
+            Debug.log("target ... %s", lambdaMethod);
+
+            String className = lambdaMethod.getDeclaringClass().getName();
+            if (!className.equals("Ljava/lang/Integer;")) {
+                return null;
+            }
+            resolvedConsumerTargetMethods.put(opClass, lambdaMethod.getName());
+            return lambdaMethod.getName().intern();
+        }
+    }
+
+    /**
+     * Gets a compiled and installed kernel for the lambda called by the
+     * {@link IntConsumer#accept(int)} method in a class implementing {@link IntConsumer}.
+     *
+     * @param intConsumerClass a class implementing {@link IntConsumer}
      * @return a {@link HotSpotNmethod} handle to the compiled and installed kernel
      */
     private static HotSpotNmethod getCompiledLambda(Class<?> intConsumerClass) {
@@ -108,6 +156,38 @@
     }
 
     @Override
+    public Object createKernelFromHsailString(String code, String methodName) {
+        ExternalCompilationResult hsailCode = new ExternalCompilationResult();
+        try (Debug.Scope ds = Debug.scope("GeneratingKernelBinary")) {
+
+            HSAILHotSpotBackend backend = getHSAILBackend();
+            Providers providers = backend.getProviders();
+            Method integerOffloadMethod = null;
+
+            for (Method m : Integer.class.getMethods()) {
+                if (m.getName().equals(methodName)) {
+                    integerOffloadMethod = m;
+                    break;
+                }
+            }
+            if (integerOffloadMethod != null) {
+                ResolvedJavaMethod rm = ((HotSpotMetaAccessProvider) providers.getMetaAccess()).lookupJavaMethod(integerOffloadMethod);
+
+                long kernel = HSAILHotSpotBackend.generateKernel(code.getBytes(), "Integer::" + methodName);
+                if (kernel == 0) {
+                    throw new GraalInternalError("Failed to compile HSAIL kernel from String");
+                }
+                hsailCode.setEntryPoint(kernel);
+                return backend.installKernel(rm, hsailCode); // is a HotSpotNmethod
+            } else {
+                return null;
+            }
+        } catch (Throwable e) {
+            throw Debug.handle(e);
+        }
+    }
+
+    @Override
     public boolean dispatchKernel(Object kernel, int jobSize, Object[] args) {
         HotSpotNmethod code = (HotSpotNmethod) kernel;
         if (code != null) {
@@ -125,4 +205,258 @@
             return false;
         }
     }
+
+    /**
+     * Running with a larger global size seems to increase the performance for sum, but it might be
+     * different for other reductions so it is a knob.
+     */
+    private static final int GlobalSize = 1024 * Integer.getInteger("com.amd.sumatra.reduce.globalsize.multiple", 1);
+
+    @Override
+    public Integer offloadIntReduceImpl(Object okraKernel, int identity, int[] streamSource) {
+        // NOTE - this reduce requires local size of 64 which is the SumatraUtils default
+
+        // Handmade reduce does not support +UseCompressedOops
+        HotSpotVMConfig config = runtime().getConfig();
+        if (config.useCompressedOops == true || config.useHSAILDeoptimization == true) {
+            throw new GraalInternalError("Reduce offload not compatible with +UseCompressedOops or +UseHSAILDeoptimization");
+        }
+
+        try {
+            assert streamSource.length >= GlobalSize : "Input array length=" + streamSource.length + " smaller than requested global_size=" + GlobalSize;
+
+            int result[] = {identity};
+            Object args[] = {streamSource, result, streamSource.length};
+            args[0] = streamSource;
+
+            dispatchKernel(okraKernel, GlobalSize, args);
+
+            // kernel result is result[0].
+            return result[0];
+        } catch (Exception e) {
+            System.err.println(e);
+            e.printStackTrace();
+        }
+        return null;
+    }
+
+    @Override
+    public String getIntegerReduceIntrinsic(String reducerName) {
+
+        // Note all of these depend on group size of 256
+
+        String reduceOp = "/* Invalid */ ";
+        String atomicResultProduction = "/* Invalid */ ";
+        if (reducerName.equals("sum")) {
+            reduceOp = "add_u32 ";
+            atomicResultProduction = "atomicnoret_add_global_u32 ";
+        } else if (reducerName.equals("max")) {
+            reduceOp = "max_s32 ";
+            atomicResultProduction = "atomicnoret_max_global_s32 ";
+        } else if (reducerName.equals("min")) {
+            reduceOp = "min_s32 ";
+            atomicResultProduction = "atomicnoret_min_global_s32 ";
+        } else {
+            return "/* Invalid */ ";
+        }
+
+        // @formatter:off
+        return new String(
+                "version 0:95:$full:$large; // BRIG Object Format Version 0:4" + "\n"
+                + "" + "\n"
+                + "kernel &run(" + "\n"
+                + "	align 8 kernarg_u64 %arg_p3," + "\n"
+                + "	align 8 kernarg_u64 %arg_p4," + "\n"
+                + "	align 4 kernarg_u32 %arg_p5)" + "\n"
+                + "{" + "\n"
+                + "" + "\n"
+                + "	align 4 group_u32 %reduce_cllocal_scratch[256];" + "\n"
+                + "" + "\n"
+                + "	workitemabsid_u32 $s2, 0;" + "\n"
+                + "" + "\n"
+                + "	ld_kernarg_u32	$s1, [%arg_p5];" + "\n"
+                + "	ld_kernarg_u64	$d0, [%arg_p4];" + "\n"
+                + "	ld_kernarg_u64	$d1, [%arg_p3];" + "\n"
+                + "" + "\n"
+                + "	add_u64 $d0, $d0, 24;             // adjust over obj array headers" + "\n"
+                + "	add_u64 $d1, $d1, 24;" + "\n"
+                + "	cmp_ge_b1_s32	$c0, $s2, $s1; // if(gloId < length){" + "\n"
+                + "	cbr	$c0, @BB0_1;" + "\n"
+                + "	gridsize_u32	$s0, 0;        // s0 is globalsize" + "\n"
+                + " add_u32 $s0, $s0, $s2;         // gx += globalsize" + "\n"
+                + "	cvt_s64_s32	$d2, $s2;      // s2 is global id" + "\n"
+                + "	shl_u64	$d2, $d2, 2;" + "\n"
+                + "	add_u64	$d2, $d1, $d2;" + "\n"
+                + "	ld_global_u32	$s3, [$d2];    // load this element from input" + "\n"
+                + "	brn	@BB0_3;" + "\n"
+                + "" + "\n"
+                + "@BB0_1:" + "\n"
+                + "	mov_b32	$s0, $s2;" + "\n"                                  + "" + "\n"
+                + "@BB0_3:" + "\n"
+                + "	cmp_ge_b1_s32	$c1, $s0, $s1; // while (gx < length)" + "\n"
+                + "	cbr	$c1, @BB0_6;" + "\n"
+                + "	gridsize_u32	$s2, 0;" + "\n"
+                + "" + "\n"
+                + "@BB0_5:" + "\n"
+                + "	cvt_s64_s32	$d2, $s0;" + "\n"
+                + "	shl_u64	$d2, $d2, 2;" + "\n"
+                + "	add_u64	$d2, $d1, $d2;" + "\n"
+                + "	ld_global_u32	$s4, [$d2];" + "\n"
+                +       reduceOp + "  $s3, $s3, $s4;" + "\n"
+                + "	add_u32	$s0, $s0, $s2;" + "\n"
+                + "	cmp_lt_b1_s32	$c1, $s0, $s1;" + "\n"
+                + "	cbr	$c1, @BB0_5;" + "\n"
+                + "" + "\n"
+                + "@BB0_6:" + "\n"
+                + "	workgroupid_u32	$s0, 0;" + "\n"
+                + "	workgroupsize_u32	$s2, 0;" + "\n"
+                + "	mul_u32	$s2, $s2, $s0;" + "\n"
+                + "	sub_u32	$s2, $s1, $s2;" + "\n"
+                + "	workitemid_u32	$s1, 0;" + "\n"
+                + "	add_u32	$s4, $s1, 128;"
+                + "\n"
+                + "	cmp_lt_b1_u32	$c1, $s4, $s2;" + "\n"
+                + "	cmp_lt_b1_s32	$c2, $s1, 128;" + "\n"
+                + "	and_b1	$c1, $c2, $c1;" + "\n"
+                + "	cvt_s64_s32	$d1, $s1;" + "\n"
+                + "	shl_u64	$d1, $d1, 2;" + "\n"
+                + "	lda_group_u64	$d2, [%reduce_cllocal_scratch];" + "\n"
+                + "	add_u64	$d1, $d2, $d1;" + "\n"
+                + "	st_group_u32	$s3, [$d1];" + "\n"
+                + "	barrier_fgroup;" + "\n"
+                + "	not_b1	$c1, $c1;" + "\n"
+                + "	cbr	$c1, @BB0_8;" + "\n"
+                + "	ld_group_u32	$s3, [$d1];" + "\n"
+                + "	cvt_s64_s32	$d3, $s4;" + "\n"
+                + "	shl_u64	$d3, $d3, 2;" + "\n"
+                + "	add_u64	$d3, $d2, $d3;" + "\n"
+                + "	ld_group_u32	$s4, [$d3];" + "\n"
+                +       reduceOp + "  $s3, $s3, $s4;" + "\n"
+                + "	st_group_u32	$s3, [$d1];" + "\n"
+                + "" + "\n"
+                + "@BB0_8:" + "\n"
+                + "	add_u32	$s3, $s1, 64;" + "\n"
+                + "	cmp_lt_b1_u32	$c1, $s3, $s2;" + "\n"
+                + "	cmp_lt_b1_s32	$c2, $s1, 64;" + "\n"
+                + "	and_b1	$c1, $c2, $c1;" + "\n"
+                + "	barrier_fgroup;" + "\n"
+                + "	not_b1	$c1, $c1;" + "\n"
+                + "	cbr	$c1, @BB0_10;" + "\n"
+                + "	ld_group_u32	$s4, [$d1];" + "\n"
+                + "	cvt_s64_s32	$d3, $s3;" + "\n"
+                + "	shl_u64	$d3, $d3, 2;" + "\n"
+                + "	add_u64	$d3, $d2, $d3;" + "\n"
+                + "	ld_group_u32	$s3, [$d3];" + "\n"
+                +       reduceOp + "  $s3, $s3, $s4;"
+                + "\n"
+                + "	st_group_u32	$s3, [$d1];" + "\n"
+                + "" + "\n"
+                + "@BB0_10:" + "\n"
+                + "	add_u32	$s3, $s1, 32;" + "\n"
+                + "	cmp_lt_b1_u32	$c1, $s3, $s2;" + "\n"
+                + "	cmp_lt_b1_s32	$c2, $s1, 32;" + "\n"
+                + "	and_b1	$c1, $c2, $c1;" + "\n"
+                + "	barrier_fgroup;" + "\n"
+                + "	not_b1	$c1, $c1;" + "\n"
+                + "	cbr	$c1, @BB0_12;" + "\n"
+                + "	ld_group_u32	$s4, [$d1];" + "\n"
+                + "	cvt_s64_s32	$d3, $s3;" + "\n"
+                + "	shl_u64	$d3, $d3, 2;" + "\n"
+                + "	add_u64	$d3, $d2, $d3;" + "\n"
+                + "	ld_group_u32	$s3, [$d3];" + "\n"
+                +       reduceOp + "  $s3, $s3, $s4;" + "\n"
+                + "	st_group_u32	$s3, [$d1];" + "\n"
+                + "" + "\n"
+                + "@BB0_12:" + "\n"
+                + "	add_u32	$s3, $s1, 16;" + "\n"
+                + "	cmp_lt_b1_u32	$c1, $s3, $s2;" + "\n"
+                + "	cmp_lt_b1_s32	$c2, $s1, 16;" + "\n"
+                + "	and_b1	$c1, $c2, $c1;" + "\n"
+                + "	barrier_fgroup;" + "\n"
+                + "	not_b1	$c1, $c1;" + "\n"
+                + "	cbr	$c1, @BB0_14;" + "\n"
+                + "	ld_group_u32	$s4, [$d1];" + "\n"
+                + "	cvt_s64_s32	$d3, $s3;" + "\n"
+                + "	shl_u64	$d3, $d3, 2;" + "\n"
+                + "	add_u64	$d3, $d2, $d3;" + "\n"
+                + "	ld_group_u32	$s3, [$d3];" + "\n"
+                +       reduceOp + "  $s3, $s3, $s4;" + "\n"
+                + "	st_group_u32	$s3, [$d1];" + "\n"
+                + "" + "\n"
+                + "@BB0_14:" + "\n"
+                + "	add_u32	$s3, $s1, 8;" + "\n"
+                + "	cmp_lt_b1_u32	$c1, $s3, $s2;" + "\n"
+                + "	cmp_lt_b1_s32	$c2, $s1, 8;" + "\n"
+                + "	and_b1	$c1, $c2, $c1;" + "\n"
+                + "	barrier_fgroup;" + "\n"
+                + "	not_b1	$c1, $c1;" + "\n"
+                + "	cbr	$c1, @BB0_16;" + "\n"
+                + "	ld_group_u32	$s4, [$d1];" + "\n"
+                + "	cvt_s64_s32	$d3, $s3;" + "\n"
+                + "	shl_u64	$d3, $d3, 2;" + "\n"
+                + "	add_u64	$d3, $d2, $d3;" + "\n"
+                + "	ld_group_u32	$s3, [$d3];" + "\n"
+                +       reduceOp + "  $s3, $s3, $s4;" + "\n"
+                + "	st_group_u32	$s3, [$d1];" + "\n"
+                + "" + "\n"
+                + "@BB0_16:" + "\n"
+                + "	add_u32	$s3, $s1, 4;" + "\n"
+                + "	cmp_lt_b1_u32	$c1, $s3, $s2;" + "\n"
+                + "	cmp_lt_b1_s32	$c2, $s1, 4;" + "\n"
+                + "	and_b1	$c1, $c2, $c1;" + "\n"
+                + "	barrier_fgroup;" + "\n"
+                + "	not_b1	$c1, $c1;" + "\n"
+                + "	cbr	$c1, @BB0_18;" + "\n"
+                + "	ld_group_u32	$s4, [$d1];" + "\n"
+                + "	cvt_s64_s32	$d3, $s3;" + "\n"
+                + "	shl_u64	$d3, $d3, 2;" + "\n"
+                + "	add_u64	$d3, $d2, $d3;" + "\n"
+                + "	ld_group_u32	$s3, [$d3];" + "\n"
+                +       reduceOp + "  $s3, $s3, $s4;" + "\n"
+                + "	st_group_u32	$s3, [$d1];" + "\n"
+                + "" + "\n"
+                + "@BB0_18:" + "\n"
+                + "	add_u32	$s3, $s1, 2;" + "\n"
+                + "	cmp_lt_b1_u32	$c1, $s3, $s2;" + "\n"
+                + "	cmp_lt_b1_s32	$c2, $s1, 2;" + "\n"
+                + "	and_b1	$c1, $c2, $c1;" + "\n"
+                + "	barrier_fgroup;" + "\n"
+                + "	not_b1	$c1, $c1;" + "\n"
+                + "	cbr	$c1, @BB0_20;" + "\n"
+                + "	ld_group_u32	$s4, [$d1];" + "\n"
+                + "	cvt_s64_s32	$d3, $s3;" + "\n"
+                + "	shl_u64	$d3, $d3, 2;" + "\n"
+                + "	add_u64	$d3, $d2, $d3;" + "\n"
+                + "	ld_group_u32	$s3, [$d3];" + "\n"
+                +       reduceOp + "  $s3, $s3, $s4;" + "\n"
+                + "	st_group_u32	$s3, [$d1];" + "\n"
+                + "" + "\n"
+                + "@BB0_20:" + "\n"
+                + "	add_u32	$s3, $s1, 1;" + "\n"
+                + "	cmp_lt_b1_u32	$c1, $s3, $s2;" + "\n"
+                + "	cmp_lt_b1_s32	$c2, $s1, 1;" + "\n"
+                + "	and_b1	$c1, $c2, $c1;" + "\n"
+                + "	barrier_fgroup;" + "\n"
+                + "	not_b1	$c1, $c1;" + "\n"
+                + "	cbr	$c1, @BB0_22;" + "\n"
+                + "	ld_group_u32	$s4, [$d1];" + "\n"
+                + "	cvt_s64_s32	$d3, $s3;" + "\n"
+                + "	shl_u64	$d3, $d3, 2;" + "\n"
+                + "	add_u64	$d2, $d2, $d3;" + "\n"
+                + "	ld_group_u32	$s3, [$d2];" + "\n"
+                +       reduceOp + "  $s3, $s3, $s4;" + "\n"
+                + "	st_group_u32	$s3, [$d1];" + "\n"
+                + "" + "\n"
+                + "@BB0_22:" + "\n"
+                + "	cmp_gt_b1_u32	$c0, $s1, 0;  // s1 is local id, done if > 0" + "\n"
+                + "	cbr	$c0, @BB0_24;" + "\n"
+                + "" + "\n"
+                + "	ld_group_u32	$s2, [%reduce_cllocal_scratch];  // s2 is result[get_group_id(0)];" + "\n"
+                +       atomicResultProduction + " [$d0], $s2; // build global result from local results" + "\n"
+                + "" + "\n"
+                + "@BB0_24:" + "\n"
+                + "	ret;" + "\n"
+                + "};" + "\n");
+        //@formatter:on
+    }
 }
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackend.java	Tue Jun 17 17:42:24 2014 -0700
@@ -250,7 +250,7 @@
     /**
      * Generates a GPU binary from HSAIL code.
      */
-    private static native long generateKernel(byte[] hsailCode, String name);
+    static native long generateKernel(byte[] hsailCode, String name);
 
     /**
      * Installs the {@linkplain ExternalCompilationResult#getEntryPoint() GPU binary} associated
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackendFactory.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotBackendFactory.java	Tue Jun 17 17:42:24 2014 -0700
@@ -72,7 +72,7 @@
         final int stackFrameAlignment = 8;
         final int implicitNullCheckLimit = 0;
         final boolean inlineObjects = true;
-        return new HotSpotTargetDescription(new HSAIL(), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects, Kind.Int);
+        return new HotSpotTargetDescription(new HSAIL(), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
     }
 
     public String getArchitecture() {
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerator.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotLIRGenerator.java	Tue Jun 17 17:42:24 2014 -0700
@@ -33,7 +33,6 @@
 import com.oracle.graal.hotspot.*;
 import com.oracle.graal.hotspot.HotSpotVMConfig.CompressEncoding;
 import com.oracle.graal.hotspot.meta.*;
-import com.oracle.graal.hotspot.nodes.type.*;
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.StandardOp.SaveRegistersOp;
 import com.oracle.graal.lir.gen.*;
@@ -46,7 +45,6 @@
 import com.oracle.graal.lir.hsail.HSAILMove.CompareAndSwapOp;
 import com.oracle.graal.lir.hsail.HSAILMove.LoadAcquireOp;
 import com.oracle.graal.lir.hsail.HSAILMove.LoadOp;
-import com.oracle.graal.lir.hsail.HSAILMove.MoveFromRegOp;
 import com.oracle.graal.lir.hsail.HSAILMove.MoveToRegOp;
 import com.oracle.graal.lir.hsail.HSAILMove.StoreConstantOp;
 import com.oracle.graal.lir.hsail.HSAILMove.StoreOp;
@@ -109,26 +107,18 @@
         }
     }
 
-    private static Kind getMemoryKind(LIRKind kind) {
-        if (kind.getPlatformKind() == NarrowOopStamp.NarrowOop) {
-            return Kind.Int;
-        } else {
-            return (Kind) kind.getPlatformKind();
-        }
-    }
-
     @Override
     public Variable emitLoad(LIRKind kind, Value address, LIRFrameState state) {
         HSAILAddressValue loadAddress = asAddressValue(address);
         Variable result = newVariable(kind);
-        append(new LoadOp(getMemoryKind(kind), result, loadAddress, state));
+        append(new LoadOp((Kind) kind.getPlatformKind(), result, loadAddress, state));
         return result;
     }
 
     public Variable emitLoadAcquire(LIRKind kind, Value address, LIRFrameState state) {
         HSAILAddressValue loadAddress = asAddressValue(address);
         Variable result = newVariable(kind);
-        append(new LoadAcquireOp(getMemoryKind(kind), result, loadAddress, state));
+        append(new LoadAcquireOp((Kind) kind.getPlatformKind(), result, loadAddress, state));
         return result;
     }
 
@@ -141,25 +131,25 @@
                 c = Constant.INT_0;
             }
             if (canStoreConstant(c)) {
-                append(new StoreConstantOp(getMemoryKind(kind), storeAddress, c, state));
+                append(new StoreConstantOp((Kind) kind.getPlatformKind(), storeAddress, c, state));
                 return;
             }
         }
         Variable input = load(inputVal);
-        append(new StoreOp(getMemoryKind(kind), storeAddress, input, state));
+        append(new StoreOp((Kind) kind.getPlatformKind(), storeAddress, input, state));
     }
 
     public void emitStoreRelease(LIRKind kind, Value address, Value inputVal, LIRFrameState state) {
         HSAILAddressValue storeAddress = asAddressValue(address);
         // TODO: handle Constants here
         Variable input = load(inputVal);
-        append(new StoreReleaseOp(getMemoryKind(kind), storeAddress, input, state));
+        append(new StoreReleaseOp((Kind) kind.getPlatformKind(), storeAddress, input, state));
     }
 
     public Value emitCompareAndSwap(Value address, Value expectedValue, Value newValue, Value trueValue, Value falseValue) {
         LIRKind kind = newValue.getLIRKind();
-        assert kind == expectedValue.getLIRKind();
-        Kind memKind = getMemoryKind(kind);
+        assert kind.equals(expectedValue.getLIRKind());
+        Kind memKind = (Kind) kind.getPlatformKind();
 
         HSAILAddressValue addressValue = asAddressValue(address);
         Variable expected = emitMove(expectedValue);
@@ -175,7 +165,7 @@
     @Override
     public Value emitAtomicReadAndAdd(Value address, Value delta) {
         LIRKind kind = delta.getLIRKind();
-        Kind memKind = getMemoryKind(kind);
+        Kind memKind = (Kind) kind.getPlatformKind();
         Variable result = newVariable(kind);
         HSAILAddressValue addressValue = asAddressValue(address);
         append(new HSAILMove.AtomicReadAndAddOp(memKind, result, addressValue, asAllocatable(delta)));
@@ -185,7 +175,7 @@
     @Override
     public Value emitAtomicReadAndWrite(Value address, Value newValue) {
         LIRKind kind = newValue.getLIRKind();
-        Kind memKind = getMemoryKind(kind);
+        Kind memKind = (Kind) kind.getPlatformKind();
         Variable result = newVariable(kind);
         HSAILAddressValue addressValue = asAddressValue(address);
         append(new HSAILMove.AtomicReadAndWriteOp(memKind, result, addressValue, asAllocatable(newValue)));
@@ -251,12 +241,6 @@
             append(new MoveToRegOp(Kind.Object, uncompressed, src));
             CompressEncoding oopEncoding = config.getOopEncoding();
             return new HSAILMove.CompressPointer(dst, newVariable(LIRKind.reference(Kind.Object)), uncompressed, oopEncoding.base, oopEncoding.shift, oopEncoding.alignment, true);
-        } else if (dst.getPlatformKind() == NarrowOopStamp.NarrowOop) {
-            if (isRegister(src) || isStackSlot(dst)) {
-                return new MoveFromRegOp(Kind.Int, dst, src);
-            } else {
-                return new MoveToRegOp(Kind.Int, dst, src);
-            }
         } else {
             return super.createMove(dst, src);
         }
@@ -297,7 +281,7 @@
 
     @Override
     public Value emitCompress(Value pointer, CompressEncoding encoding, boolean nonNull) {
-        Variable result = newVariable(LIRKind.reference(NarrowOopStamp.NarrowOop));
+        Variable result = newVariable(LIRKind.reference(Kind.Int));
         append(new HSAILMove.CompressPointer(result, newVariable(pointer.getLIRKind()), asAllocatable(pointer), encoding.base, encoding.shift, encoding.alignment, nonNull));
         return result;
     }
--- a/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotRegisterConfig.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.hsail/src/com/oracle/graal/hotspot/hsail/HSAILHotSpotRegisterConfig.java	Tue Jun 17 17:42:24 2014 -0700
@@ -29,7 +29,6 @@
 import com.oracle.graal.api.code.CallingConvention.Type;
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.compiler.common.*;
-import com.oracle.graal.hotspot.nodes.type.*;
 import com.oracle.graal.hsail.*;
 
 /**
@@ -145,14 +144,7 @@
 
     @Override
     public Register[] getAllocatableRegisters(PlatformKind kind) {
-        Kind primitiveKind;
-        if (kind == NarrowOopStamp.NarrowOop) {
-            primitiveKind = Kind.Int;
-        } else {
-            primitiveKind = (Kind) kind;
-        }
-
-        switch (primitiveKind) {
+        switch ((Kind) kind) {
             case Int:
             case Short:
             case Byte:
--- a/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackendFactory.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.ptx/src/com/oracle/graal/hotspot/ptx/PTXHotSpotBackendFactory.java	Tue Jun 17 17:42:24 2014 -0700
@@ -93,7 +93,7 @@
         final int stackFrameAlignment = 1;
         final int implicitNullCheckLimit = 0;
         final boolean inlineObjects = true;
-        return new HotSpotTargetDescription(createArchitecture(), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects, Kind.Int);
+        return new HotSpotTargetDescription(createArchitecture(), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
     }
 
     public String getArchitecture() {
--- a/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotBackendFactory.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotBackendFactory.java	Tue Jun 17 17:42:24 2014 -0700
@@ -42,7 +42,7 @@
         final int stackFrameAlignment = 16;
         final int implicitNullCheckLimit = 4096;
         final boolean inlineObjects = true;
-        return new HotSpotTargetDescription(createArchitecture(), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects, Kind.Int);
+        return new HotSpotTargetDescription(createArchitecture(), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
     }
 
     public HotSpotBackend createBackend(HotSpotGraalRuntime runtime, HotSpotBackend host) {
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotGraalRuntime.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotGraalRuntime.java	Tue Jun 17 17:42:24 2014 -0700
@@ -169,7 +169,7 @@
         @Option(help = "The runtime configuration to use")
         static final OptionValue<String> GraalRuntime = new OptionValue<>("");
 
-        @Option(help = "File to which logging is sent")
+        @Option(help = "File to which logging is sent.  %p in the name will be replaced with a string the process, usually the process id.")
         public static final PrintStreamOption LogFile = new PrintStreamOption();
         // @formatter:on
     }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotReferenceMap.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotReferenceMap.java	Tue Jun 17 17:42:24 2014 -0700
@@ -34,25 +34,36 @@
 
     private static final long serialVersionUID = -1052183095979496819L;
 
+    private static final int BITS_PER_WORD = 3;
+
     /**
-     * Contains 3 bits per 64 bit register, and n*3 bits per n*64 bit vector register.
-     * <ul>
-     * <li>bit0 = 0: contains no references</li>
-     * <li>bit0 = 1, bit1+2 = 0: contains a wide oop</li>
-     * <li>bit0 = 1, bit1 = 1: contains a narrow oop in the lower 32 bit</li>
-     * <li>bit0 = 1, bit2 = 1: contains a narrow oop in the upper 32 bit</li>
-     * </ul>
+     * Contains 3 bits per scalar register, and n*3 bits per n-word vector register (e.g., on a
+     * 64-bit system, a 256-bit vector register requires 12 reference map bits).
+     * <p>
+     * These bits can have the following values (LSB first):
+     *
+     * <pre>
+     * 000 - contains no references
+     * 100 - contains a wide oop
+     * 110 - contains a narrow oop in the lower half
+     * 101 - contains a narrow oop in the upper half
+     * 111 - contains two narrow oops
+     * </pre>
      */
     private final BitSet registerRefMap;
 
     /**
-     * Contains 3 bits per stack slot.
-     * <ul>
-     * <li>bit0 = 0: contains no references</li>
-     * <li>bit0 = 1, bit1+2 = 0: contains a wide oop</li>
-     * <li>bit0 = 1, bit1 = 1: contains a narrow oop in the lower half</li>
-     * <li>bit0 = 1, bit2 = 1: contains a narrow oop in the upper half</li>
-     * </ul>
+     * Contains 3 bits per stack word.
+     * <p>
+     * These bits can have the following values (LSB first):
+     *
+     * <pre>
+     * 000 - contains no references
+     * 100 - contains a wide oop
+     * 110 - contains a narrow oop in the lower half
+     * 101 - contains a narrow oop in the upper half
+     * 111 - contains two narrow oops
+     * </pre>
      */
     private final BitSet frameRefMap;
 
@@ -60,18 +71,18 @@
 
     public HotSpotReferenceMap(int registerCount, int frameSlotCount, TargetDescription target) {
         if (registerCount > 0) {
-            this.registerRefMap = new BitSet(registerCount * 3);
+            this.registerRefMap = new BitSet(registerCount * BITS_PER_WORD);
         } else {
             this.registerRefMap = null;
         }
-        this.frameRefMap = new BitSet(frameSlotCount * 3);
+        this.frameRefMap = new BitSet(frameSlotCount * BITS_PER_WORD);
         this.target = target;
     }
 
     private static void setOop(BitSet map, int startIdx, LIRKind kind) {
         int length = kind.getPlatformKind().getVectorLength();
-        map.clear(3 * startIdx, 3 * (startIdx + length) - 1);
-        for (int i = 0, idx = 3 * startIdx; i < length; i++, idx += 3) {
+        map.clear(BITS_PER_WORD * startIdx, BITS_PER_WORD * (startIdx + length) - 1);
+        for (int i = 0, idx = BITS_PER_WORD * startIdx; i < length; i++, idx += BITS_PER_WORD) {
             if (kind.isReference(i)) {
                 map.set(idx);
             }
@@ -81,8 +92,8 @@
     private static void setNarrowOop(BitSet map, int idx, LIRKind kind) {
         int length = kind.getPlatformKind().getVectorLength();
         int nextIdx = idx + (length + 1) / 2;
-        map.clear(3 * idx, 3 * nextIdx - 1);
-        for (int i = 0, regIdx = 3 * idx; i < length; i += 2, regIdx += 3) {
+        map.clear(BITS_PER_WORD * idx, BITS_PER_WORD * nextIdx - 1);
+        for (int i = 0, regIdx = BITS_PER_WORD * idx; i < length; i += 2, regIdx += BITS_PER_WORD) {
             if (kind.isReference(i)) {
                 map.set(regIdx);
                 map.set(regIdx + 1);
@@ -130,11 +141,11 @@
                 // so setNarrowOop won't work correctly
                 int idx = offset / target.wordSize;
                 if (kind.isReference(0)) {
-                    frameRefMap.set(3 * idx);
+                    frameRefMap.set(BITS_PER_WORD * idx);
                     if (offset % target.wordSize == 0) {
-                        frameRefMap.set(3 * idx + 1);
+                        frameRefMap.set(BITS_PER_WORD * idx + 1);
                     } else {
-                        frameRefMap.set(3 * idx + 2);
+                        frameRefMap.set(BITS_PER_WORD * idx + 2);
                     }
                 }
             }
@@ -152,14 +163,14 @@
     }
 
     public void appendRegisterMap(StringBuilder sb, RefMapFormatter formatter) {
-        for (int reg = registerRefMap.nextSetBit(0); reg >= 0; reg = registerRefMap.nextSetBit(reg + 2)) {
-            sb.append(' ').append(formatter.formatRegister(reg / 2));
+        for (int reg = registerRefMap.nextSetBit(0); reg >= 0; reg = registerRefMap.nextSetBit(reg + BITS_PER_WORD)) {
+            sb.append(' ').append(formatter.formatRegister(reg / BITS_PER_WORD));
         }
     }
 
     public void appendFrameMap(StringBuilder sb, RefMapFormatter formatter) {
-        for (int slot = frameRefMap.nextSetBit(0); slot >= 0; slot = frameRefMap.nextSetBit(slot + 3)) {
-            sb.append(' ').append(formatter.formatStackSlot(slot / 3));
+        for (int slot = frameRefMap.nextSetBit(0); slot >= 0; slot = frameRefMap.nextSetBit(slot + BITS_PER_WORD)) {
+            sb.append(' ').append(formatter.formatStackSlot(slot / BITS_PER_WORD));
         }
     }
 }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotTargetDescription.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/HotSpotTargetDescription.java	Tue Jun 17 17:42:24 2014 -0700
@@ -24,28 +24,20 @@
 
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
-import com.oracle.graal.hotspot.nodes.type.*;
 
 public class HotSpotTargetDescription extends TargetDescription {
 
-    private final PlatformKind rawNarrowOopKind;
-
-    public HotSpotTargetDescription(Architecture arch, boolean isMP, int stackAlignment, int implicitNullCheckLimit, boolean inlineObjects, PlatformKind rawNarrowOopKind) {
+    public HotSpotTargetDescription(Architecture arch, boolean isMP, int stackAlignment, int implicitNullCheckLimit, boolean inlineObjects) {
         super(arch, isMP, stackAlignment, implicitNullCheckLimit, inlineObjects);
-        this.rawNarrowOopKind = rawNarrowOopKind;
     }
 
     @Override
     public int getSizeInBytes(PlatformKind kind) {
-        if (kind == NarrowOopStamp.NarrowOop) {
-            return super.getSizeInBytes(rawNarrowOopKind);
-        } else {
-            return super.getSizeInBytes(kind);
-        }
+        return super.getSizeInBytes(kind);
     }
 
     @Override
     public ReferenceMap createReferenceMap(boolean hasRegisters, int stackSlotCount) {
-        return new HotSpotReferenceMap(hasRegisters ? arch.getRegisterReferenceMapBitCount() : 0, stackSlotCount, this);
+        return new HotSpotReferenceMap(hasRegisters ? arch.getRegisterReferenceMapSize() : 0, stackSlotCount, this);
     }
 }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/PrintStreamOption.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/PrintStreamOption.java	Tue Jun 17 17:42:24 2014 -0700
@@ -23,6 +23,7 @@
 package com.oracle.graal.hotspot;
 
 import java.io.*;
+import java.lang.management.*;
 
 import com.oracle.graal.options.*;
 
@@ -44,6 +45,30 @@
     private volatile PrintStream ps;
 
     /**
+     * Replace any instance of %p with a an identifying name. Try to get it from the RuntimeMXBean
+     * name.
+     * 
+     * @return the name of the file to log to
+     */
+    private String getFilename() {
+        String name = getValue();
+        if (name.contains("%p")) {
+            String runtimeName = ManagementFactory.getRuntimeMXBean().getName();
+            try {
+                int index = runtimeName.indexOf('@');
+                if (index != -1) {
+                    long pid = Long.parseLong(runtimeName.substring(0, index));
+                    runtimeName = Long.toString(pid);
+                }
+                name = name.replaceAll("%p", runtimeName);
+            } catch (NumberFormatException e) {
+
+            }
+        }
+        return name;
+    }
+
+    /**
      * Gets the print stream configured by this option.
      */
     public PrintStream getStream() {
@@ -53,7 +78,7 @@
                     if (ps == null) {
                         try {
                             final boolean enableAutoflush = true;
-                            ps = new PrintStream(new FileOutputStream(getValue()), enableAutoflush);
+                            ps = new PrintStream(new FileOutputStream(getFilename()), enableAutoflush);
                         } catch (FileNotFoundException e) {
                             throw new RuntimeException("couldn't open file: " + getValue(), e);
                         }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/data/OopData.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/data/OopData.java	Tue Jun 17 17:42:24 2014 -0700
@@ -28,7 +28,6 @@
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.compiler.common.*;
 import com.oracle.graal.hotspot.*;
-import com.oracle.graal.hotspot.nodes.type.*;
 
 /**
  * A data item that represents an oop value.
@@ -48,7 +47,7 @@
     @Override
     public int getSize(TargetDescription target) {
         if (compressed) {
-            return target.getSizeInBytes(NarrowOopStamp.NarrowOop);
+            return target.getSizeInBytes(Kind.Int);
         } else {
             return target.getSizeInBytes(Kind.Object);
         }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotCompressedNullConstant.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotCompressedNullConstant.java	Tue Jun 17 17:42:24 2014 -0700
@@ -23,7 +23,6 @@
 package com.oracle.graal.hotspot.meta;
 
 import com.oracle.graal.api.meta.*;
-import com.oracle.graal.hotspot.nodes.type.*;
 
 /**
  * The compressed representation of the {@link Constant#NULL_OBJECT null constant}.
@@ -35,7 +34,7 @@
     public static final Constant COMPRESSED_NULL = new HotSpotCompressedNullConstant();
 
     private HotSpotCompressedNullConstant() {
-        super(LIRKind.reference(NarrowOopStamp.NarrowOop));
+        super(LIRKind.reference(Kind.Int));
     }
 
     @Override
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotObjectConstant.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/meta/HotSpotObjectConstant.java	Tue Jun 17 17:42:24 2014 -0700
@@ -23,7 +23,6 @@
 package com.oracle.graal.hotspot.meta;
 
 import com.oracle.graal.api.meta.*;
-import com.oracle.graal.hotspot.nodes.type.*;
 
 /**
  * Represents a constant non-{@code null} object reference, within the compiler and across the
@@ -79,7 +78,7 @@
     private final boolean compressed;
 
     private HotSpotObjectConstant(Object object, boolean compressed) {
-        super(LIRKind.reference(compressed ? NarrowOopStamp.NarrowOop : Kind.Object));
+        super(LIRKind.reference(compressed ? Kind.Int : Kind.Object));
         this.object = object;
         this.compressed = compressed;
         assert object != null;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/nodes/ClassGetClassLoader0Node.java	Tue Jun 17 17:42:24 2014 -0700
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.hotspot.nodes;
+
+import com.oracle.graal.graph.*;
+import com.oracle.graal.graph.spi.*;
+import com.oracle.graal.hotspot.meta.*;
+import com.oracle.graal.hotspot.replacements.*;
+import com.oracle.graal.nodes.*;
+import com.oracle.graal.replacements.nodes.*;
+
+/**
+ * {@link MacroNode Macro node} for {@link Class#getClassLoader0()}.
+ *
+ * @see ClassSubstitutions#getClassLoader0(Class)
+ */
+@SuppressWarnings("javadoc")
+public class ClassGetClassLoader0Node extends MacroNode implements Canonicalizable {
+
+    public ClassGetClassLoader0Node(Invoke invoke) {
+        super(invoke);
+    }
+
+    private ValueNode getJavaClass() {
+        return arguments.get(0);
+    }
+
+    @Override
+    public Node canonical(CanonicalizerTool tool) {
+        ValueNode javaClass = getJavaClass();
+        if (javaClass.isConstant()) {
+            Class<?> c = (Class<?>) HotSpotObjectConstant.asObject(javaClass.asConstant());
+            if (c != null) {
+                /*
+                 * This is an intrinsic for getClassLoader0, which occurs after any security checks.
+                 * We can't call that directly so just call getClassLoader.
+                 */
+                ClassLoader classLoader = c.getClassLoader();
+                return ConstantNode.forConstant(HotSpotObjectConstant.forObject(classLoader), tool.getMetaAccess(), graph());
+            }
+        }
+        return this;
+    }
+}
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/nodes/type/NarrowOopStamp.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/nodes/type/NarrowOopStamp.java	Tue Jun 17 17:42:24 2014 -0700
@@ -26,26 +26,9 @@
 import com.oracle.graal.compiler.common.spi.*;
 import com.oracle.graal.compiler.common.type.*;
 import com.oracle.graal.hotspot.HotSpotVMConfig.CompressEncoding;
-import com.oracle.graal.hotspot.meta.*;
 
 public class NarrowOopStamp extends AbstractObjectStamp {
 
-    public static final PlatformKind NarrowOop = new PlatformKind() {
-
-        public String name() {
-            return "NarrowOop";
-        }
-
-        @Override
-        public String toString() {
-            return name();
-        }
-
-        public Constant getDefaultValue() {
-            return HotSpotCompressedNullConstant.COMPRESSED_NULL;
-        }
-    };
-
     private final CompressEncoding encoding;
 
     public NarrowOopStamp(ResolvedJavaType type, boolean exactType, boolean nonNull, boolean alwaysNull, CompressEncoding encoding) {
@@ -72,7 +55,7 @@
 
     @Override
     public LIRKind getLIRKind(LIRKindTool tool) {
-        return LIRKind.reference(NarrowOop);
+        return LIRKind.reference(Kind.Int);
     }
 
     @Override
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/phases/LoadJavaMirrorWithKlassPhase.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/phases/LoadJavaMirrorWithKlassPhase.java	Tue Jun 17 17:42:24 2014 -0700
@@ -26,6 +26,7 @@
 import static com.oracle.graal.nodes.ConstantNode.*;
 
 import com.oracle.graal.api.meta.*;
+import com.oracle.graal.compiler.common.*;
 import com.oracle.graal.compiler.common.type.*;
 import com.oracle.graal.hotspot.HotSpotVMConfig.*;
 import com.oracle.graal.hotspot.meta.*;
@@ -61,13 +62,35 @@
         if (constant instanceof HotSpotObjectConstant && HotSpotObjectConstant.asObject(constant) instanceof Class<?>) {
             MetaAccessProvider metaAccess = context.getMetaAccess();
             ResolvedJavaType type = metaAccess.lookupJavaType((Class<?>) HotSpotObjectConstant.asObject(constant));
-            assert type instanceof HotSpotResolvedObjectType;
-
-            Constant klass = ((HotSpotResolvedObjectType) type).klass();
+            Constant klass;
+            LocationNode location;
+            if (type instanceof HotSpotResolvedObjectType) {
+                location = ConstantLocationNode.create(FINAL_LOCATION, Kind.Object, classMirrorOffset, graph);
+                klass = ((HotSpotResolvedObjectType) type).klass();
+            } else {
+                /*
+                 * Primitive classes are more difficult since they don't have a corresponding Klass*
+                 * so get them from Class.TYPE for the java box type.
+                 */
+                HotSpotResolvedPrimitiveType primitive = (HotSpotResolvedPrimitiveType) type;
+                ResolvedJavaType boxingClass = metaAccess.lookupJavaType(primitive.getKind().toBoxedJavaClass());
+                klass = ((HotSpotResolvedObjectType) boxingClass).klass();
+                HotSpotResolvedJavaField[] a = (HotSpotResolvedJavaField[]) boxingClass.getStaticFields();
+                HotSpotResolvedJavaField typeField = null;
+                for (HotSpotResolvedJavaField f : a) {
+                    if (f.getName().equals("TYPE")) {
+                        typeField = f;
+                        break;
+                    }
+                }
+                if (typeField == null) {
+                    throw new GraalInternalError("Can't find TYPE field in class");
+                }
+                location = ConstantLocationNode.create(FINAL_LOCATION, Kind.Object, typeField.offset(), graph);
+            }
             ConstantNode klassNode = ConstantNode.forConstant(klass, metaAccess, graph);
 
             Stamp stamp = StampFactory.exactNonNull(metaAccess.lookupJavaType(Class.class));
-            LocationNode location = ConstantLocationNode.create(FINAL_LOCATION, Kind.Object, classMirrorOffset, graph);
             FloatingReadNode freadNode = graph.unique(new FloatingReadNode(klassNode, location, null, stamp));
 
             if (HotSpotObjectConstant.isCompressed(constant)) {
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ClassSubstitutions.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ClassSubstitutions.java	Tue Jun 17 17:42:24 2014 -0700
@@ -82,6 +82,9 @@
         return klass.equal(0);
     }
 
+    @MacroSubstitution(macro = ClassGetClassLoader0Node.class, isStatic = false)
+    public static native ClassLoader getClassLoader0(Class<?> thisObj);
+
     @MacroSubstitution(macro = ClassGetSuperclassNode.class, isStatic = false)
     @MethodSubstitution(isStatic = false)
     public static Class<?> getSuperclass(final Class<?> thisObj) {
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ObjectGetClassNode.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/ObjectGetClassNode.java	Tue Jun 17 17:42:24 2014 -0700
@@ -67,11 +67,19 @@
         if (usages().isEmpty()) {
             return null;
         } else {
+            ResolvedJavaType type = StampTool.typeOrNull(getObject());
             if (StampTool.isExactType(getObject())) {
-                ResolvedJavaType type = StampTool.typeOrNull(getObject());
                 Constant clazz = type.getEncoding(Representation.JavaClass);
                 return ConstantNode.forConstant(clazz, tool.getMetaAccess(), graph());
             }
+            if (type != null && tool.assumptions().useOptimisticAssumptions()) {
+                ResolvedJavaType exactType = type.findUniqueConcreteSubtype();
+                if (exactType != null) {
+                    tool.assumptions().recordConcreteSubtype(type, exactType);
+                    Constant clazz = exactType.getEncoding(Representation.JavaClass);
+                    return ConstantNode.forConstant(clazz, tool.getMetaAccess(), graph());
+                }
+            }
             return this;
         }
     }
--- a/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/FrameMap.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/FrameMap.java	Tue Jun 17 17:42:24 2014 -0700
@@ -285,7 +285,7 @@
         if (freedSlots != null) {
             for (Iterator<StackSlot> iter = freedSlots.iterator(); iter.hasNext();) {
                 StackSlot s = iter.next();
-                if (s.getLIRKind() == kind) {
+                if (s.getLIRKind().equals(kind)) {
                     iter.remove();
                     if (freedSlots.isEmpty()) {
                         freedSlots = null;
--- a/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/RedundantMoveElimination.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/RedundantMoveElimination.java	Tue Jun 17 17:42:24 2014 -0700
@@ -340,7 +340,7 @@
                 int sourceIdx = getStateIdx(moveOp.getInput());
                 int destIdx = getStateIdx(moveOp.getResult());
                 if (sourceIdx >= 0 && destIdx >= 0) {
-                    assert isObjectValue(state[sourceIdx]) || (moveOp.getInput().getKind() != Kind.Object) : "move op moves object but input is not defined as object";
+                    assert isObjectValue(state[sourceIdx]) || moveOp.getInput().getLIRKind().isValue() : "move op moves object but input is not defined as object";
                     state[destIdx] = state[sourceIdx];
                     Debug.log("move value %d from %d to %d", state[sourceIdx], sourceIdx, destIdx);
                     return initValueNum;
@@ -378,7 +378,7 @@
                         /*
                          * Assign a unique number to the output or temp location.
                          */
-                        state[stateIdx] = encodeValueNum(opValueNum++, operand.getKind() == Kind.Object);
+                        state[stateIdx] = encodeValueNum(opValueNum++, !operand.getLIRKind().isValue());
                         Debug.log("set def %d for register %s(%d): %d", opValueNum, operand, stateIdx, state[stateIdx]);
                     }
                     return operand;
@@ -510,7 +510,7 @@
             /*
              * Moves with mismatching kinds are not moves, but memory loads/stores!
              */
-            return source.getKind() == dest.getKind() && source.getLIRKind() == dest.getLIRKind() && source.getKind() != Kind.Illegal;
+            return source.getLIRKind().equals(dest.getLIRKind());
         }
         return false;
     }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/TypeProfileProxyNode.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/TypeProfileProxyNode.java	Tue Jun 17 17:42:24 2014 -0700
@@ -47,6 +47,9 @@
     }
 
     public static ValueNode create(ValueNode object, JavaTypeProfile profile) {
+        if (StampTool.isExactType(object)) {
+            return object;
+        }
         if (profile == null) {
             // No profile, so create no node.
             return object;
--- a/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/ArraySubstitutions.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/ArraySubstitutions.java	Tue Jun 17 17:42:24 2014 -0700
@@ -34,6 +34,11 @@
 
     @MethodSubstitution
     public static Object newInstance(Class<?> componentType, int length) throws NegativeArraySizeException {
+        // The error cases must be handled here since DynamicNewArrayNode can only deoptimize the
+        // caller in response to exceptions.
+        if (componentType == void.class) {
+            throw new IllegalArgumentException();
+        }
         return DynamicNewArrayNode.newArray(GuardingPiNode.guardingNonNull(componentType), length);
     }
 
--- a/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/DefaultJavaLoweringProvider.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/DefaultJavaLoweringProvider.java	Tue Jun 17 17:42:24 2014 -0700
@@ -273,7 +273,7 @@
     }
 
     protected ReadNode createUnsafeRead(StructuredGraph graph, UnsafeLoadNode load, GuardingNode guard) {
-        boolean compressible = (!load.object().isNullConstant() && load.accessKind() == Kind.Object);
+        boolean compressible = load.accessKind() == Kind.Object;
         Kind readKind = load.accessKind();
         LocationNode location = createLocation(load);
         Stamp loadStamp = loadStamp(load.stamp(), readKind, compressible);
--- a/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/ReplacementsImpl.java	Mon Jun 16 20:52:06 2014 -0700
+++ b/graal/com.oracle.graal.replacements/src/com/oracle/graal/replacements/ReplacementsImpl.java	Tue Jun 17 17:42:24 2014 -0700
@@ -483,7 +483,10 @@
             if (!SnippetTemplate.hasConstantParameter(method)) {
                 NodeIntrinsificationVerificationPhase.verify(graph);
             }
+            int sideEffectCount = 0;
+            assert (sideEffectCount = graph.getNodes().filter(e -> e instanceof StateSplit && ((StateSplit) e).hasSideEffect()).count()) >= 0;
             new ConvertDeoptimizeToGuardPhase().apply(graph);
+            assert sideEffectCount == graph.getNodes().filter(e -> e instanceof StateSplit && ((StateSplit) e).hasSideEffect()).count() : "deleted side effecting node";
 
             switch (frameStateProcessing) {
                 case Removal:
--- a/mx/mx_graal.py	Mon Jun 16 20:52:06 2014 -0700
+++ b/mx/mx_graal.py	Tue Jun 17 17:42:24 2014 -0700
@@ -1197,34 +1197,34 @@
 
     with VM('graal', 'fastdebug'):
         t = Task('BootstrapWithSystemAssertions:fastdebug')
-        vm(['-esa', '-XX:-TieredCompilation', '-version'])
+        vm(['-esa', '-XX:+UseGraalCompilationQueue', '-XX:-TieredCompilation', '-version'])
         tasks.append(t.stop())
 
     with VM('graal', 'fastdebug'):
         t = Task('BootstrapWithSystemAssertionsNoCoop:fastdebug')
-        vm(['-esa', '-XX:-TieredCompilation', '-XX:-UseCompressedOops', '-version'])
+        vm(['-esa', '-XX:+UseGraalCompilationQueue', '-XX:-TieredCompilation', '-XX:-UseCompressedOops', '-version'])
         tasks.append(t.stop())
 
     with VM('graal', 'product'):
         t = Task('BootstrapWithGCVerification:product')
         out = mx.DuplicateSuppressingStream(['VerifyAfterGC:', 'VerifyBeforeGC:']).write
-        vm(['-XX:-TieredCompilation', '-XX:+UnlockDiagnosticVMOptions', '-XX:+VerifyBeforeGC', '-XX:+VerifyAfterGC', '-version'], out=out)
+        vm(['-XX:+UseGraalCompilationQueue', '-XX:-TieredCompilation', '-XX:+UnlockDiagnosticVMOptions', '-XX:+VerifyBeforeGC', '-XX:+VerifyAfterGC', '-version'], out=out)
         tasks.append(t.stop())
 
     with VM('graal', 'product'):
         t = Task('BootstrapWithG1GCVerification:product')
         out = mx.DuplicateSuppressingStream(['VerifyAfterGC:', 'VerifyBeforeGC:']).write
-        vm(['-XX:-TieredCompilation', '-XX:+UnlockDiagnosticVMOptions', '-XX:-UseSerialGC', '-XX:+UseG1GC', '-XX:+VerifyBeforeGC', '-XX:+VerifyAfterGC', '-version'], out=out)
+        vm(['-XX:+UseGraalCompilationQueue', '-XX:-TieredCompilation', '-XX:+UnlockDiagnosticVMOptions', '-XX:-UseSerialGC', '-XX:+UseG1GC', '-XX:+VerifyBeforeGC', '-XX:+VerifyAfterGC', '-version'], out=out)
         tasks.append(t.stop())
 
     with VM('graal', 'product'):
         t = Task('BootstrapWithRegisterPressure:product')
-        vm(['-XX:-TieredCompilation', '-G:RegisterPressure=rbx,r11,r10,r14,xmm3,xmm11,xmm14', '-esa', '-version'])
+        vm(['-XX:+UseGraalCompilationQueue', '-XX:-TieredCompilation', '-G:RegisterPressure=rbx,r11,r10,r14,xmm3,xmm11,xmm14', '-esa', '-version'])
         tasks.append(t.stop())
 
     with VM('graal', 'product'):
         t = Task('BootstrapWithImmutableCode:product')
-        vm(['-XX:-TieredCompilation', '-G:+ImmutableCode', '-G:+VerifyPhases', '-esa', '-version'])
+        vm(['-XX:+UseGraalCompilationQueue', '-XX:-TieredCompilation', '-G:+ImmutableCode', '-G:+VerifyPhases', '-esa', '-version'])
         tasks.append(t.stop())
 
     with VM('server', 'product'):  # hosted mode
--- a/src/gpu/hsail/vm/gpu_hsail.hpp	Mon Jun 16 20:52:06 2014 -0700
+++ b/src/gpu/hsail/vm/gpu_hsail.hpp	Tue Jun 17 17:42:24 2014 -0700
@@ -25,6 +25,7 @@
 #ifndef GPU_HSAIL_VM_GPU_HSAIL_HPP
 #define GPU_HSAIL_VM_GPU_HSAIL_HPP
 
+#include "runtime/gpu.hpp"
 #include "utilities/exceptions.hpp"
 #include "graal/graalEnv.hpp"
 #include "gpu_hsail_Frame.hpp"
--- a/src/gpu/hsail/vm/hsailArgumentsBase.cpp	Mon Jun 16 20:52:06 2014 -0700
+++ b/src/gpu/hsail/vm/hsailArgumentsBase.cpp	Tue Jun 17 17:42:24 2014 -0700
@@ -38,6 +38,29 @@
   return arg;
 }
 
+void HSAILArgumentsBase::collectArgs() {
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] %s::collectArgs, sig:%s  args length=%d", argsBuilderName(), _signature->as_C_string(), _length);
+  }
+  if (!_is_static) {
+    // First object in args should be 'this'
+    oop arg = _args->obj_at(_index++);
+    assert(arg->is_instance() && (!arg->is_array()), "First arg should be 'this'");
+    if (TraceGPUInteraction) {
+      tty->print_cr("[HSAIL] %s, instance method, this " PTR_FORMAT ", is a %s", argsBuilderName(), (address) arg, arg->klass()->external_name());
+    }
+    pushObject(arg);
+  } else {
+    if (TraceGPUInteraction) {
+      tty->print_cr("[HSAIL] %s, static method", argsBuilderName());
+    }
+  }
+  // Iterate over the entire signature
+  iterate();
+
+  pushTrailingArgs();
+}
+
 void HSAILArgumentsBase::do_bool() {
   // Get the boxed value
   oop arg = _args->obj_at(_index++);
--- a/src/gpu/hsail/vm/hsailArgumentsBase.hpp	Mon Jun 16 20:52:06 2014 -0700
+++ b/src/gpu/hsail/vm/hsailArgumentsBase.hpp	Tue Jun 17 17:42:24 2014 -0700
@@ -67,10 +67,6 @@
     virtual void handleFinalObjParameter(void* obj) = 0;
     virtual void pushTrailingArgs() = 0;
 
-    void recordNullObjectParameter() {
-        if (_first_null_parameter_index == -1) _first_null_parameter_index = _parameter_index;
-    }
-
  public:
   HSAILArgumentsBase(Symbol* signature, objArrayOop args, bool is_static) : SignatureIterator(signature) {
     this->_return_type = T_ILLEGAL;
@@ -86,32 +82,29 @@
 
   }
 
+  void recordNullObjectParameter() {
+    if (_first_null_parameter_index == -1) {
+      _first_null_parameter_index = _parameter_index;
+    }
+  }
+
+  bool is_static() {
+    return _is_static;
+  }
+
+  int length() {
+    return _length;
+  }
+
+  objArrayOop args() {
+    return _args;
+  }
+
   int getFirstNullParameterIndex() {
     return _first_null_parameter_index;
   }
 
-  void collectArgs() {
-    if (TraceGPUInteraction) {
-      tty->print_cr("[HSAIL] %s::collectArgs, sig:%s  args length=%d", argsBuilderName(), _signature->as_C_string(), _length);
-    }    
-    if (!_is_static) {      
-      // First object in args should be 'this'
-      oop arg = _args->obj_at(_index++);
-      assert(arg->is_instance() && (! arg->is_array()), "First arg should be 'this'");
-      if (TraceGPUInteraction) {
-        tty->print_cr("[HSAIL] %s, instance method, this " PTR_FORMAT ", is a %s", argsBuilderName(), (address) arg, arg->klass()->external_name());
-      }
-      pushObject(arg);
-    } else {
-      if (TraceGPUInteraction) {
-          tty->print_cr("[HSAIL] %s, static method", argsBuilderName());
-      }
-    }
-    // Iterate over the entire signature
-    iterate();
-    
-    pushTrailingArgs();
-  }
+  virtual void collectArgs();
 
   void do_bool();
   void do_byte();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/gpu/hsail/vm/hsailKernelArguments.cpp	Tue Jun 17 17:42:24 2014 -0700
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#include "hsailKernelArguments.hpp"
+
+void HSAILKernelArguments::collectArgs() {
+  int index = 0;
+  if (TraceGPUInteraction) {
+    tty->print_cr("[HSAIL] %s::collectArgs, args length=%d", argsBuilderName(), length());
+  }
+
+  // Manually iterate over the actual args array without looking at method signature
+  while (index < length()) {
+    oop arg = args()->obj_at(index++);
+    jvalue jValue;
+    if (arg == NULL) {
+      if (TraceGPUInteraction) {
+        tty->print_cr("[HSAIL] %s::collectArgs object, _index=%d, value = " PTR_FORMAT " is a %s", argsBuilderName(), index, (void*) arg, "null");
+      }
+      recordNullObjectParameter();
+      pushObject(arg);
+    } else {
+      java_lang_boxing_object::get_value(arg, &jValue);
+      BasicType basic_type = java_lang_boxing_object::basic_type(arg);
+      if (basic_type == T_ILLEGAL && (!(arg->is_array()))) {
+        if (TraceGPUInteraction) {
+          tty->print_cr("[HSAIL] %s::collectArgs object, _index=%d, value = " PTR_FORMAT " is a %s", argsBuilderName(), index, (void*) arg, arg == NULL ? "null" : arg->klass()->external_name());
+        }
+        pushObject(arg);
+      } else if (arg->is_array()) {
+        if (TraceGPUInteraction) {
+          int array_length = ((objArrayOop) arg)->length();
+          tty->print_cr("[HSAIL] %s::collectArgs array, length=%d, _index=%d, value = " PTR_FORMAT, argsBuilderName(), array_length, index, (void*) arg);
+        }
+        pushObject(arg);
+      } else {
+        switch (basic_type) {
+          case T_INT:
+            if (TraceGPUInteraction) {
+              tty->print_cr("[HSAIL] %s::collectArgs, T_INT _index=%d, value = %d", argsBuilderName(), index, jValue.i);
+            }
+            pushInt(jValue.i);
+            break;
+          case T_LONG:
+            if (TraceGPUInteraction) {
+              tty->print_cr("[HSAIL] %s::collectArgs, T_LONG _index=%d, value = %d", argsBuilderName(), index, jValue.j);
+            }
+            pushLong(jValue.j);
+            break;
+          case T_FLOAT:
+            if (TraceGPUInteraction) {
+              tty->print_cr("[HSAIL] %s::collectArgs, T_FLOAT _index=%d, value = %d", argsBuilderName(), index, jValue.f);
+            }
+            pushFloat(jValue.f);
+            break;
+          case T_DOUBLE:
+            if (TraceGPUInteraction) {
+              tty->print_cr("[HSAIL] %s::collectArgs, T_DOUBLE _index=%d, value = %d", argsBuilderName(), index, jValue.d);
+            }
+            pushDouble(jValue.d);
+            break;
+          case T_BYTE:
+            if (TraceGPUInteraction) {
+              tty->print_cr("[HSAIL] %s::collectArgs, T_BYTE _index=%d, value = %d", argsBuilderName(), index, jValue.b);
+            }
+            pushByte(jValue.b);
+            break;
+          case T_BOOLEAN:
+            if (TraceGPUInteraction) {
+              tty->print_cr("[HSAIL] %s::collectArgs, T_BOOLEAN _index=%d, value = %d", argsBuilderName(), index, jValue.z);
+            }
+            pushBool(jValue.z);
+            break;
+        }
+      }
+    }
+  }
+
+  pushTrailingArgs();
+}
+
--- a/src/gpu/hsail/vm/hsailKernelArguments.hpp	Mon Jun 16 20:52:06 2014 -0700
+++ b/src/gpu/hsail/vm/hsailKernelArguments.hpp	Tue Jun 17 17:42:24 2014 -0700
@@ -91,16 +91,17 @@
     // For kernel arguments we don't pass the final int parameter
     // since we use the HSAIL workitemid instruction in place of that int value
     virtual void handleFinalIntParameter() {
-      if (TraceGPUInteraction) {
-        tty->print_cr("[HSAIL] HSAILKernelArguments, not pushing trailing int");
-      }
+      ShouldNotReachHere();
     }
 
-    // for kernel arguments, final obj parameter should be an object
+    // For kernel arguments, final obj parameter should be an object
     // stream source array (already checked in the base class) so here we just pass it
     virtual void handleFinalObjParameter(void* arg) {
-      pushObject(arg);
+      ShouldNotReachHere();
     }
+
+    virtual void collectArgs();
+
 };
 
 #endif  // GPU_HSAIL_VM_HSAIL_KERNEL_ARGUMENTS_HPP