changeset 11196:498d0e531bbb

Merge.
author Christian Humer <christian.humer@gmail.com>
date Thu, 01 Aug 2013 20:54:32 +0200
parents 4f52b08bd2f9 (current diff) dae97549a94d (diff)
children 3479ab380552
files
diffstat 20 files changed, 461 insertions(+), 189 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.asm/src/com/oracle/graal/asm/NumUtil.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.asm/src/com/oracle/graal/asm/NumUtil.java	Thu Aug 01 20:54:32 2013 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,6 +22,8 @@
  */
 package com.oracle.graal.asm;
 
+// JaCoCo Exclude
+
 /**
  * A collection of static utility functions that check ranges of numbers.
  */
@@ -96,4 +98,8 @@
     public static int roundUp(int number, int mod) {
         return ((number + mod - 1) / mod) * mod;
     }
+
+    public static long roundUp(long number, long mod) {
+        return ((number + mod - 1L) / mod) * mod;
+    }
 }
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/EliminateNestedCheckCastsTest.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/EliminateNestedCheckCastsTest.java	Thu Aug 01 20:54:32 2013 +0200
@@ -106,11 +106,11 @@
     }
 
     private StructuredGraph compileSnippet(final String snippet, final int checkcasts, final int afterCanon) {
-        return Debug.scope(snippet, new Callable<StructuredGraph>() {
+        final StructuredGraph graph = parse(snippet);
+        return Debug.scope("NestedCheckCastsTest", graph, new Callable<StructuredGraph>() {
 
             @Override
             public StructuredGraph call() throws Exception {
-                StructuredGraph graph = parse(snippet);
                 Debug.dump(graph, "After parsing: " + snippet);
                 Assert.assertEquals(checkcasts, graph.getNodes().filter(CheckCastNode.class).count());
                 new CanonicalizerPhase.Instance(runtime(), new Assumptions(false), true).apply(graph);
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/MemoryScheduleTest.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/MemoryScheduleTest.java	Thu Aug 01 20:54:32 2013 +0200
@@ -85,7 +85,7 @@
     @Test
     public void testSimple() {
         for (TestMode mode : TestMode.values()) {
-            SchedulePhase schedule = getFinalSchedule("testSimpleSnippet", mode, MemoryScheduling.OPTIMAL, false);
+            SchedulePhase schedule = getFinalSchedule("testSimpleSnippet", mode, MemoryScheduling.OPTIMAL);
             StructuredGraph graph = schedule.getCFG().graph;
             assertReadAndWriteInSameBlock(schedule, true);
             assertOrderedAfterSchedule(schedule, graph.getNodes().filter(FloatingReadNode.class).first(), graph.getNodes().filter(WriteNode.class).first());
@@ -110,7 +110,7 @@
     @Test
     public void testSplit1() {
         for (TestMode mode : TestMode.values()) {
-            SchedulePhase schedule = getFinalSchedule("testSplitSnippet1", mode, MemoryScheduling.OPTIMAL, false);
+            SchedulePhase schedule = getFinalSchedule("testSplitSnippet1", mode, MemoryScheduling.OPTIMAL);
             assertReadWithinStartBlock(schedule, true);
             assertReadWithinReturnBlock(schedule, false);
         }
@@ -133,7 +133,7 @@
 
     @Test
     public void testSplit2() {
-        SchedulePhase schedule = getFinalSchedule("testSplit2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testSplit2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, true);
     }
@@ -157,7 +157,7 @@
 
     @Test
     public void testLoop1() {
-        SchedulePhase schedule = getFinalSchedule("testLoop1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testLoop1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(6, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, true);
         assertReadWithinReturnBlock(schedule, false);
@@ -182,13 +182,35 @@
 
     @Test
     public void testLoop2() {
-        SchedulePhase schedule = getFinalSchedule("testLoop2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testLoop2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(6, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, true);
     }
 
     /**
+     * Here the read should float out of the loop.
+     */
+    public static int testLoop3Snippet(int a) {
+        int j = 0;
+        for (int i = 0; i < a; i++) {
+            if (i - container.a == 0) {
+                break;
+            }
+            j++;
+        }
+        return j;
+    }
+
+    @Test
+    public void testLoop3() {
+        SchedulePhase schedule = getFinalSchedule("testLoop3Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
+        assertEquals(7, schedule.getCFG().getBlocks().length);
+        assertReadWithinStartBlock(schedule, true);
+        assertReadWithinReturnBlock(schedule, false);
+    }
+
+    /**
      * Here the read should float to the end (into the same block as the return).
      */
     public static int testArrayCopySnippet(Integer intValue, char[] a, char[] b, int len) {
@@ -198,7 +220,7 @@
 
     @Test
     public void testArrayCopy() {
-        SchedulePhase schedule = getFinalSchedule("testArrayCopySnippet", TestMode.INLINED_WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testArrayCopySnippet", TestMode.INLINED_WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         StructuredGraph graph = schedule.getCFG().getStartBlock().getBeginNode().graph();
         ReturnNode ret = graph.getNodes(ReturnNode.class).first();
         assertTrue(ret.result() instanceof FloatingReadNode);
@@ -219,7 +241,7 @@
 
     @Test
     public void testIfRead1() {
-        SchedulePhase schedule = getFinalSchedule("testIfRead1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testIfRead1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(4, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, true);
         assertReadAndWriteInSameBlock(schedule, false);
@@ -240,7 +262,7 @@
 
     @Test
     public void testIfRead2() {
-        SchedulePhase schedule = getFinalSchedule("testIfRead2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testIfRead2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(4, schedule.getCFG().getBlocks().length);
         assertEquals(1, schedule.getCFG().graph.getNodes().filter(FloatingReadNode.class).count());
         assertReadWithinStartBlock(schedule, false);
@@ -262,7 +284,7 @@
 
     @Test
     public void testIfRead3() {
-        SchedulePhase schedule = getFinalSchedule("testIfRead3Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testIfRead3Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(4, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, true);
@@ -283,7 +305,7 @@
 
     @Test
     public void testIfRead4() {
-        SchedulePhase schedule = getFinalSchedule("testIfRead4Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testIfRead4Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(4, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, false);
@@ -309,7 +331,7 @@
 
     @Test
     public void testBlockSchedule() {
-        SchedulePhase schedule = getFinalSchedule("testBlockScheduleSnippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testBlockScheduleSnippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         StructuredGraph graph = schedule.getCFG().graph;
         NodeIterable<WriteNode> writeNodes = graph.getNodes().filter(WriteNode.class);
 
@@ -344,7 +366,7 @@
 
     @Test
     public void testProxy1() {
-        SchedulePhase schedule = getFinalSchedule("testProxy1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testProxy1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertReadWithinStartBlock(schedule, true); // read of container.a should be in start block
         /*
          * read of container.b for increment operation should be in return block. TODO: not sure
@@ -370,7 +392,62 @@
 
     @Test
     public void testProxy2() {
-        SchedulePhase schedule = getFinalSchedule("testProxy2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testProxy2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
+        assertReadWithinStartBlock(schedule, false);
+        assertReadWithinReturnBlock(schedule, false);
+    }
+
+    private int hash = 0;
+    private final char[] value = new char[3];
+
+    public int testStringHashCodeSnippet() {
+        int h = hash;
+        if (h == 0 && value.length > 0) {
+            char[] val = value;
+
+            for (int i = 0; i < value.length; i++) {
+                h = 31 * h + val[i];
+            }
+            hash = h;
+        }
+        return h;
+    }
+
+    @Test
+    public void testStringHashCode() {
+        SchedulePhase schedule = getFinalSchedule("testStringHashCodeSnippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
+        assertReadWithinStartBlock(schedule, true);
+        assertReadWithinReturnBlock(schedule, false);
+
+        hash = 0x1337;
+        value[0] = 'a';
+        value[1] = 'b';
+        value[2] = 'c';
+        test("testStringHashCodeSnippet");
+    }
+
+    public static int testLoop4Snippet(int count) {
+        int[] a = new int[count];
+
+        for (int i = 0; i < a.length; i++) {
+            a[i] = i;
+        }
+
+        int i = 0;
+        int iwrap = count - 1;
+        int sum = 0;
+
+        while (i < count) {
+            sum += (a[i] + a[iwrap]) / 2;
+            iwrap = i;
+            i++;
+        }
+        return sum;
+    }
+
+    @Test
+    public void testLoop4() {
+        SchedulePhase schedule = getFinalSchedule("testLoop4Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, false);
     }
@@ -415,7 +492,7 @@
         assertTrue(!(inSame ^ schedule.getCFG().blockFor(read) == schedule.getCFG().blockFor(write)));
     }
 
-    private SchedulePhase getFinalSchedule(final String snippet, final TestMode mode, final MemoryScheduling memsched, final boolean printSchedule) {
+    private SchedulePhase getFinalSchedule(final String snippet, final TestMode mode, final MemoryScheduling memsched) {
         final StructuredGraph graph = parse(snippet);
         return Debug.scope("FloatingReadTest", graph, new Callable<SchedulePhase>() {
 
@@ -444,7 +521,12 @@
                 new FloatingReadPhase().apply(graph);
                 new RemoveValueProxyPhase().apply(graph);
 
-                SchedulePhase schedule = new SchedulePhase(SchedulingStrategy.LATEST_OUT_OF_LOOPS, memsched, printSchedule);
+                MidTierContext midContext = new MidTierContext(runtime(), assumptions, replacements, runtime().getTarget(), OptimisticOptimizations.ALL);
+                new GuardLoweringPhase().apply(graph, midContext);
+                new LoweringPhase(LoweringType.AFTER_GUARDS).apply(graph, midContext);
+                new LoweringPhase(LoweringType.AFTER_FSA).apply(graph, midContext);
+
+                SchedulePhase schedule = new SchedulePhase(SchedulingStrategy.LATEST_OUT_OF_LOOPS, memsched);
                 schedule.apply(graph);
                 assertEquals(1, graph.getNodes().filter(StartNode.class).count());
                 return schedule;
--- a/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/Debug.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/Debug.java	Thu Aug 01 20:54:32 2013 +0200
@@ -164,6 +164,12 @@
         }
     }
 
+    public static void printf(String msg, Object... args) {
+        if (ENABLED && DebugScope.getInstance().isLogEnabled()) {
+            DebugScope.getInstance().printf(msg, args);
+        }
+    }
+
     public static void dump(Object object, String msg, Object... args) {
         if (ENABLED && DebugScope.getInstance().isDumpEnabled()) {
             DebugScope.getInstance().dump(object, msg, args);
--- a/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/internal/DebugScope.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/internal/DebugScope.java	Thu Aug 01 20:54:32 2013 +0200
@@ -114,6 +114,16 @@
         }
     }
 
+    public void printf(String msg, Object... args) {
+        if (isLogEnabled()) {
+            if (lastLogScope.get() == null || !lastLogScope.get().qualifiedName.equals(this.qualifiedName)) {
+                output.println("scope: " + qualifiedName);
+                lastLogScope.set(this);
+            }
+            output.printf(msg, args);
+        }
+    }
+
     public void dump(Object object, String formatString, Object[] args) {
         if (isDumpEnabled()) {
             DebugConfig config = getConfig();
--- a/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64SafepointOp.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.hotspot.amd64/src/com/oracle/graal/hotspot/amd64/AMD64SafepointOp.java	Thu Aug 01 20:54:32 2013 +0200
@@ -23,8 +23,6 @@
 package com.oracle.graal.hotspot.amd64;
 
 import static com.oracle.graal.amd64.AMD64.*;
-import static com.oracle.graal.phases.GraalOptions.*;
-import sun.misc.*;
 
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
@@ -42,8 +40,6 @@
 @Opcode("SAFEPOINT")
 public class AMD64SafepointOp extends AMD64LIRInstruction {
 
-    private static final Unsafe unsafe = Unsafe.getUnsafe();
-
     @State protected LIRFrameState state;
     @Temp({OperandFlag.REG}) private AllocatableValue temp;
 
@@ -57,11 +53,10 @@
 
     @Override
     public void emitCode(TargetMethodAssembler tasm, AMD64MacroAssembler asm) {
-        int pos = asm.codeBuffer.position();
-        int offset = SafepointPollOffset.getValue() % unsafe.pageSize();
+        final int pos = asm.codeBuffer.position();
         RegisterValue scratch = (RegisterValue) temp;
         if (config.isPollingPageFar) {
-            asm.movq(scratch.getRegister(), config.safepointPollingAddress + offset);
+            asm.movq(scratch.getRegister(), config.safepointPollingAddress);
             tasm.recordMark(Marks.MARK_POLL_FAR);
             tasm.recordInfopoint(pos, state, InfopointReason.SAFEPOINT);
             asm.movq(scratch.getRegister(), new AMD64Address(scratch.getRegister()));
@@ -70,7 +65,7 @@
             tasm.recordInfopoint(pos, state, InfopointReason.SAFEPOINT);
             // The C++ code transforms the polling page offset into an RIP displacement
             // to the real address at that offset in the polling page.
-            asm.movq(scratch.getRegister(), new AMD64Address(rip, offset));
+            asm.movq(scratch.getRegister(), new AMD64Address(rip, 0));
         }
     }
 }
--- a/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCSafepointOp.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCSafepointOp.java	Thu Aug 01 20:54:32 2013 +0200
@@ -24,8 +24,6 @@
 
 import static com.oracle.graal.asm.sparc.SPARCMacroAssembler.*;
 import static com.oracle.graal.sparc.SPARC.*;
-import static com.oracle.graal.phases.GraalOptions.*;
-import sun.misc.*;
 
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
@@ -42,8 +40,6 @@
 @Opcode("SAFEPOINT")
 public class SPARCSafepointOp extends SPARCLIRInstruction {
 
-    private static final Unsafe unsafe = Unsafe.getUnsafe();
-
     @State protected LIRFrameState state;
     @Temp({OperandFlag.REG}) private AllocatableValue temp;
 
@@ -58,9 +54,8 @@
     @Override
     public void emitCode(TargetMethodAssembler tasm, SPARCMacroAssembler masm) {
         final int pos = masm.codeBuffer.position();
-        final int offset = SafepointPollOffset.getValue() % unsafe.pageSize();
         Register scratch = ((RegisterValue) temp).getRegister();
-        new Setx(config.safepointPollingAddress + offset, scratch).emit(masm);
+        new Setx(config.safepointPollingAddress, scratch).emit(masm);
         tasm.recordInfopoint(pos, state, InfopointReason.SAFEPOINT);
         new Ldx(new SPARCAddress(scratch, 0), g0).emit(masm);
     }
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/UnsafeArrayCopySnippets.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/UnsafeArrayCopySnippets.java	Thu Aug 01 20:54:32 2013 +0200
@@ -24,11 +24,13 @@
 
 import static com.oracle.graal.api.meta.LocationIdentity.*;
 import static com.oracle.graal.hotspot.replacements.HotSpotReplacementsUtil.*;
+import static com.oracle.graal.hotspot.HotSpotGraalRuntime.*;
 import static com.oracle.graal.nodes.extended.BranchProbabilityNode.*;
 import static com.oracle.graal.replacements.SnippetTemplate.*;
 
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
+import com.oracle.graal.asm.*;
 import com.oracle.graal.nodes.extended.*;
 import com.oracle.graal.nodes.spi.*;
 import com.oracle.graal.replacements.*;
@@ -40,33 +42,88 @@
 
 public class UnsafeArrayCopySnippets implements Snippets {
 
+    private static final boolean supportsUnalignedMemoryAccess = graalRuntime().getTarget().arch.supportsUnalignedMemoryAccess();
+
     private static final Kind VECTOR_KIND = Kind.Long;
     private static final long VECTOR_SIZE = arrayIndexScale(Kind.Long);
 
     private static void vectorizedCopy(Object src, int srcPos, Object dest, int destPos, int length, Kind baseKind) {
-        int header = arrayBaseOffset(baseKind);
+        int arrayBaseOffset = arrayBaseOffset(baseKind);
         int elementSize = arrayIndexScale(baseKind);
         long byteLength = (long) length * elementSize;
-        long nonVectorBytes = byteLength % VECTOR_SIZE;
         long srcOffset = (long) srcPos * elementSize;
         long destOffset = (long) destPos * elementSize;
+
+        long preLoopBytes;
+        long mainLoopBytes;
+        long postLoopBytes;
+
+        // We can easily vectorize the loop if both offsets have the same alignment.
+        if (byteLength >= VECTOR_SIZE && (srcOffset % VECTOR_SIZE) == (destOffset % VECTOR_SIZE)) {
+            preLoopBytes = NumUtil.roundUp(arrayBaseOffset + srcOffset, VECTOR_SIZE) - (arrayBaseOffset + srcOffset);
+            postLoopBytes = (byteLength - preLoopBytes) % VECTOR_SIZE;
+            mainLoopBytes = byteLength - preLoopBytes - postLoopBytes;
+        } else {
+            // Does the architecture support unaligned memory accesses?
+            if (supportsUnalignedMemoryAccess) {
+                preLoopBytes = byteLength % VECTOR_SIZE;
+                mainLoopBytes = byteLength - preLoopBytes;
+                postLoopBytes = 0;
+            } else {
+                // No. Let's do element-wise copying.
+                preLoopBytes = byteLength;
+                mainLoopBytes = 0;
+                postLoopBytes = 0;
+            }
+        }
+
         if (probability(NOT_FREQUENT_PROBABILITY, src == dest) && probability(NOT_FREQUENT_PROBABILITY, srcPos < destPos)) {
             // bad aliased case
-            for (long i = byteLength - elementSize; i >= byteLength - nonVectorBytes; i -= elementSize) {
-                UnsafeStoreNode.store(dest, header, i + destOffset, UnsafeLoadNode.load(src, header, i + srcOffset, baseKind), baseKind);
+            srcOffset += byteLength;
+            destOffset += byteLength;
+
+            // Post-loop
+            for (long i = 0; i < postLoopBytes; i += elementSize) {
+                srcOffset -= elementSize;
+                destOffset -= elementSize;
+                Object a = UnsafeLoadNode.load(src, arrayBaseOffset, srcOffset, baseKind);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, destOffset, a, baseKind);
             }
-            long vectorLength = byteLength - nonVectorBytes;
-            for (long i = vectorLength - VECTOR_SIZE; i >= 0; i -= VECTOR_SIZE) {
-                Long a = UnsafeLoadNode.load(src, header, i + srcOffset, VECTOR_KIND);
-                UnsafeStoreNode.store(dest, header, i + destOffset, a.longValue(), VECTOR_KIND);
+            // Main-loop
+            for (long i = 0; i < mainLoopBytes; i += VECTOR_SIZE) {
+                srcOffset -= VECTOR_SIZE;
+                destOffset -= VECTOR_SIZE;
+                Long a = UnsafeLoadNode.load(src, arrayBaseOffset, srcOffset, VECTOR_KIND);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, destOffset, a.longValue(), VECTOR_KIND);
+            }
+            // Pre-loop
+            for (long i = 0; i < preLoopBytes; i += elementSize) {
+                srcOffset -= elementSize;
+                destOffset -= elementSize;
+                Object a = UnsafeLoadNode.load(src, arrayBaseOffset, srcOffset, baseKind);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, destOffset, a, baseKind);
             }
         } else {
-            for (long i = 0; i < nonVectorBytes; i += elementSize) {
-                UnsafeStoreNode.store(dest, header, i + destOffset, UnsafeLoadNode.load(src, header, i + srcOffset, baseKind), baseKind);
+            // Pre-loop
+            for (long i = 0; i < preLoopBytes; i += elementSize) {
+                Object a = UnsafeLoadNode.load(src, arrayBaseOffset, srcOffset, baseKind);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, destOffset, a, baseKind);
+                srcOffset += elementSize;
+                destOffset += elementSize;
             }
-            for (long i = nonVectorBytes; i < byteLength; i += VECTOR_SIZE) {
-                Long a = UnsafeLoadNode.load(src, header, i + srcOffset, VECTOR_KIND);
-                UnsafeStoreNode.store(dest, header, i + destOffset, a.longValue(), VECTOR_KIND);
+            // Main-loop
+            for (long i = 0; i < mainLoopBytes; i += VECTOR_SIZE) {
+                Long a = UnsafeLoadNode.load(src, arrayBaseOffset, srcOffset, VECTOR_KIND);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, destOffset, a.longValue(), VECTOR_KIND);
+                srcOffset += VECTOR_SIZE;
+                destOffset += VECTOR_SIZE;
+            }
+            // Post-loop
+            for (long i = 0; i < postLoopBytes; i += elementSize) {
+                Object a = UnsafeLoadNode.load(src, arrayBaseOffset, srcOffset, baseKind);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, destOffset, a, baseKind);
+                srcOffset += elementSize;
+                destOffset += elementSize;
             }
         }
     }
@@ -104,19 +161,19 @@
     @Snippet
     public static void arraycopyLong(long[] src, int srcPos, long[] dest, int destPos, int length) {
         Kind baseKind = Kind.Long;
-        int header = arrayBaseOffset(baseKind);
+        int arrayBaseOffset = arrayBaseOffset(baseKind);
         long byteLength = (long) length * arrayIndexScale(baseKind);
         long srcOffset = (long) srcPos * arrayIndexScale(baseKind);
         long destOffset = (long) destPos * arrayIndexScale(baseKind);
         if (src == dest && srcPos < destPos) { // bad aliased case
             for (long i = byteLength - VECTOR_SIZE; i >= 0; i -= VECTOR_SIZE) {
-                Long a = UnsafeLoadNode.load(src, header, i + srcOffset, VECTOR_KIND);
-                UnsafeStoreNode.store(dest, header, i + destOffset, a.longValue(), VECTOR_KIND);
+                Long a = UnsafeLoadNode.load(src, arrayBaseOffset, i + srcOffset, VECTOR_KIND);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, i + destOffset, a.longValue(), VECTOR_KIND);
             }
         } else {
             for (long i = 0; i < byteLength; i += VECTOR_SIZE) {
-                Long a = UnsafeLoadNode.load(src, header, i + srcOffset, VECTOR_KIND);
-                UnsafeStoreNode.store(dest, header, i + destOffset, a.longValue(), VECTOR_KIND);
+                Long a = UnsafeLoadNode.load(src, arrayBaseOffset, i + srcOffset, VECTOR_KIND);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, i + destOffset, a.longValue(), VECTOR_KIND);
             }
         }
     }
@@ -124,19 +181,19 @@
     @Snippet
     public static void arraycopyDouble(double[] src, int srcPos, double[] dest, int destPos, int length) {
         Kind baseKind = Kind.Double;
-        int header = arrayBaseOffset(baseKind);
+        int arrayBaseOffset = arrayBaseOffset(baseKind);
         long byteLength = (long) length * arrayIndexScale(baseKind);
         long srcOffset = (long) srcPos * arrayIndexScale(baseKind);
         long destOffset = (long) destPos * arrayIndexScale(baseKind);
         if (src == dest && srcPos < destPos) { // bad aliased case
             for (long i = byteLength - VECTOR_SIZE; i >= 0; i -= VECTOR_SIZE) {
-                Long a = UnsafeLoadNode.load(src, header, i + srcOffset, VECTOR_KIND);
-                UnsafeStoreNode.store(dest, header, i + destOffset, a.longValue(), VECTOR_KIND);
+                Long a = UnsafeLoadNode.load(src, arrayBaseOffset, i + srcOffset, VECTOR_KIND);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, i + destOffset, a.longValue(), VECTOR_KIND);
             }
         } else {
             for (long i = 0; i < byteLength; i += VECTOR_SIZE) {
-                Long a = UnsafeLoadNode.load(src, header, i + srcOffset, VECTOR_KIND);
-                UnsafeStoreNode.store(dest, header, i + destOffset, a.longValue(), VECTOR_KIND);
+                Long a = UnsafeLoadNode.load(src, arrayBaseOffset, i + srcOffset, VECTOR_KIND);
+                UnsafeStoreNode.store(dest, arrayBaseOffset, i + destOffset, a.longValue(), VECTOR_KIND);
             }
         }
     }
@@ -147,18 +204,18 @@
     @Snippet
     public static void arraycopyObject(Object[] src, int srcPos, Object[] dest, int destPos, int length) {
         final int scale = arrayIndexScale(Kind.Object);
-        int header = arrayBaseOffset(Kind.Object);
+        int arrayBaseOffset = arrayBaseOffset(Kind.Object);
         if (src == dest && srcPos < destPos) { // bad aliased case
             long start = (long) (length - 1) * scale;
             for (long i = start; i >= 0; i -= scale) {
-                Object a = UnsafeLoadNode.load(src, header, i + (long) srcPos * scale, Kind.Object);
-                DirectObjectStoreNode.storeObject(dest, header, i + (long) destPos * scale, a);
+                Object a = UnsafeLoadNode.load(src, arrayBaseOffset, i + (long) srcPos * scale, Kind.Object);
+                DirectObjectStoreNode.storeObject(dest, arrayBaseOffset, i + (long) destPos * scale, a);
             }
         } else {
             long end = (long) length * scale;
             for (long i = 0; i < end; i += scale) {
-                Object a = UnsafeLoadNode.load(src, header, i + (long) srcPos * scale, Kind.Object);
-                DirectObjectStoreNode.storeObject(dest, header, i + (long) destPos * scale, a);
+                Object a = UnsafeLoadNode.load(src, arrayBaseOffset, i + (long) srcPos * scale, Kind.Object);
+                DirectObjectStoreNode.storeObject(dest, arrayBaseOffset, i + (long) destPos * scale, a);
             }
         }
     }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/java/CheckCastNode.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/java/CheckCastNode.java	Thu Aug 01 20:54:32 2013 +0200
@@ -146,7 +146,8 @@
             return object();
         }
 
-        // remove checkcast if next node is a more specific checkcast
+        // if the previous node is also a checkcast, with a less precise and compatible type,
+        // replace both with one checkcast checking the more specific type.
         if (predecessor() instanceof CheckCastNode) {
             CheckCastNode ccn = (CheckCastNode) predecessor();
             if (ccn != null && ccn.type != null && ccn == object && ccn.forStoreCheck == forStoreCheck && ccn.type.isAssignableFrom(type)) {
--- a/graal/com.oracle.graal.phases.common/src/com/oracle/graal/phases/common/InliningPhase.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.phases.common/src/com/oracle/graal/phases/common/InliningPhase.java	Thu Aug 01 20:54:32 2013 +0200
@@ -674,21 +674,21 @@
         }
 
         public MethodInvocation currentInvocation() {
-            return invocationQueue.peek();
+            return invocationQueue.peekFirst();
         }
 
         public MethodInvocation pushInvocation(InlineInfo info, Assumptions assumptions, double probability, double relevance) {
             MethodInvocation methodInvocation = new MethodInvocation(info, new Assumptions(assumptions.useOptimisticAssumptions()), probability, relevance);
-            invocationQueue.push(methodInvocation);
+            invocationQueue.addFirst(methodInvocation);
             maxGraphs += info.numberOfMethods();
             assert graphQueue.size() <= maxGraphs;
             return methodInvocation;
         }
 
         public void popInvocation() {
-            maxGraphs -= invocationQueue.peek().callee.numberOfMethods();
+            maxGraphs -= invocationQueue.peekFirst().callee.numberOfMethods();
             assert graphQueue.size() <= maxGraphs;
-            invocationQueue.pop();
+            invocationQueue.removeFirst();
         }
 
         public int countRecursiveInlining(ResolvedJavaMethod method) {
--- a/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/GraalOptions.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/GraalOptions.java	Thu Aug 01 20:54:32 2013 +0200
@@ -43,7 +43,7 @@
     @Option(help = "")
     public static final OptionValue<Integer> MaximumDesiredSize = new OptionValue<>(20000);
     @Option(help = "")
-    public static final OptionValue<Integer> MaximumRecursiveInlining = new OptionValue<>(1);
+    public static final OptionValue<Integer> MaximumRecursiveInlining = new OptionValue<>(5);
 
     // inlining settings
     @Option(help = "")
@@ -230,13 +230,11 @@
     public static final OptionValue<Boolean> ResolveClassBeforeStaticInvoke = new OptionValue<>(false);
     @Option(help = "")
     public static final OptionValue<Boolean> CanOmitFrame = new OptionValue<>(true);
-    @Option(help = "")
-    public static final OptionValue<Integer> SafepointPollOffset = new OptionValue<>(256);
 
     @Option(help = "")
-    public static final OptionValue<Boolean> MemoryAwareScheduling = new OptionValue<>(true);
+    public static final OptionValue<Boolean> MemoryAwareScheduling = new OptionValue<>(false);
     @Option(help = "")
-    public static final OptionValue<Boolean> NewMemoryAwareScheduling = new OptionValue<>(false);
+    public static final OptionValue<Boolean> NewMemoryAwareScheduling = new OptionValue<>(true);
 
     // Translating tableswitch instructions
     @Option(help = "")
--- a/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/schedule/SchedulePhase.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/schedule/SchedulePhase.java	Thu Aug 01 20:54:32 2013 +0200
@@ -202,10 +202,17 @@
 
         @Override
         protected Map<LocationIdentity, Node> merge(Block merge, List<Map<LocationIdentity, Node>> states) {
+            return merge(merge, states, false);
+        }
+
+        protected Map<LocationIdentity, Node> merge(Block merge, List<Map<LocationIdentity, Node>> states, boolean loopbegin) {
             assert merge.getBeginNode() instanceof MergeNode;
             MergeNode mergeNode = (MergeNode) merge.getBeginNode();
 
-            Map<LocationIdentity, Node> initKillMap = cloneState(getBlockToKillMap().get(merge));
+            Map<LocationIdentity, Node> initKillMap = new HashMap<>();
+            if (loopbegin) {
+                initKillMap.putAll(getBlockToKillMap().get(merge));
+            }
             for (Map<LocationIdentity, Node> state : states) {
                 for (LocationIdentity locid : state.keySet()) {
                     if (initKillMap.containsKey(locid)) {
@@ -219,6 +226,9 @@
             }
 
             getMergeToKillMap().set(mergeNode, cloneState(initKillMap));
+            if (!loopbegin) {
+                initKillMap.putAll(getBlockToKillMap().get(merge));
+            }
             return initKillMap;
         }
 
@@ -232,7 +242,7 @@
             LoopInfo<Map<LocationIdentity, Node>> info = ReentrantBlockIterator.processLoop(this, loop, new HashMap<>(state));
 
             assert loop.header.getBeginNode() instanceof LoopBeginNode;
-            Map<LocationIdentity, Node> headerState = merge(loop.header, info.endStates);
+            Map<LocationIdentity, Node> headerState = merge(loop.header, info.endStates, true);
             getBlockToKillMap().put(loop.header, headerState);
 
             for (Map<LocationIdentity, Node> exitState : info.exitStates) {
@@ -258,7 +268,6 @@
     private final Map<FixedNode, List<FloatingNode>> phantomInputs = new IdentityHashMap<>();
     private final SchedulingStrategy selectedStrategy;
     private final MemoryScheduling memsched;
-    private final boolean dumpSchedule;
 
     public SchedulePhase() {
         this(OptScheduleOutOfLoops.getValue() ? SchedulingStrategy.LATEST_OUT_OF_LOOPS : SchedulingStrategy.LATEST);
@@ -276,13 +285,11 @@
             this.memsched = MemoryScheduling.NONE;
         }
         this.selectedStrategy = strategy;
-        this.dumpSchedule = false;
     }
 
-    public SchedulePhase(SchedulingStrategy strategy, MemoryScheduling memsched, boolean dumpSchedule) {
+    public SchedulePhase(SchedulingStrategy strategy, MemoryScheduling memsched) {
         this.selectedStrategy = strategy;
         this.memsched = memsched;
-        this.dumpSchedule = dumpSchedule;
     }
 
     @Override
@@ -347,52 +354,52 @@
     }
 
     private void printSchedule(String desc) {
-        if (dumpSchedule) {
-            TTY.print("=== %s / %s / %s (%s) ===\n", getCFG().getStartBlock().getBeginNode().graph(), selectedStrategy, memsched, desc);
-            for (Block b : getCFG().getBlocks()) {
-                TTY.print("==== b: %s. ", b);
-                TTY.print("dom: %s. ", b.getDominator());
-                TTY.print("post-dom: %s. ", b.getPostdominator());
-                TTY.print("preds: %s. ", b.getPredecessors());
-                TTY.print("succs: %s ====\n", b.getSuccessors());
-                BlockMap<Map<LocationIdentity, Node>> killMaps = getBlockToKillMap();
-                if (killMaps != null) {
-                    TTY.print("X block kills: \n");
-                    for (LocationIdentity locId : killMaps.get(b).keySet()) {
-                        TTY.print("X %s killed by %s\n", locId, killMaps.get(b).get(locId));
-                    }
-                }
-
-                if (getBlockToNodesMap().get(b) != null) {
-                    for (Node n : nodesFor(b)) {
-                        printNode(n);
-                    }
-                } else {
-                    for (Node n : b.getNodes()) {
-                        printNode(n);
-                    }
+        Debug.printf("=== %s / %s / %s (%s) ===\n", getCFG().getStartBlock().getBeginNode().graph(), selectedStrategy, memsched, desc);
+        for (Block b : getCFG().getBlocks()) {
+            Debug.printf("==== b: %s. ", b);
+            Debug.printf("dom: %s. ", b.getDominator());
+            Debug.printf("post-dom: %s. ", b.getPostdominator());
+            Debug.printf("preds: %s. ", b.getPredecessors());
+            Debug.printf("succs: %s ====\n", b.getSuccessors());
+            BlockMap<Map<LocationIdentity, Node>> killMaps = getBlockToKillMap();
+            if (killMaps != null) {
+                Debug.printf("X block kills: \n");
+                for (LocationIdentity locId : killMaps.get(b).keySet()) {
+                    Debug.printf("X %s killed by %s\n", locId, killMaps.get(b).get(locId));
                 }
             }
-            TTY.print("\n\n");
+
+            if (getBlockToNodesMap().get(b) != null) {
+                for (Node n : nodesFor(b)) {
+                    printNode(n);
+                }
+            } else {
+                for (Node n : b.getNodes()) {
+                    printNode(n);
+                }
+            }
         }
+        Debug.printf("\n\n");
     }
 
     private static void printNode(Node n) {
-        TTY.print("%s", n);
+        Debug.printf("%s", n);
         if (n instanceof MemoryCheckpoint.Single) {
-            TTY.print(" // kills %s", ((MemoryCheckpoint.Single) n).getLocationIdentity());
+            Debug.printf(" // kills %s", ((MemoryCheckpoint.Single) n).getLocationIdentity());
         } else if (n instanceof MemoryCheckpoint.Multi) {
-            TTY.print(" // kills ");
+            Debug.printf(" // kills ");
             for (LocationIdentity locid : ((MemoryCheckpoint.Multi) n).getLocationIdentities()) {
-                TTY.print("%s, ", locid);
+                Debug.printf("%s, ", locid);
             }
         } else if (n instanceof FloatingReadNode) {
             FloatingReadNode frn = (FloatingReadNode) n;
-            TTY.print(" // from %s", frn.location().getLocationIdentity());
-            TTY.print(", lastAccess: %s", frn.lastLocationAccess());
-            TTY.print(", object: %s", frn.object());
+            Debug.printf(" // from %s", frn.location().getLocationIdentity());
+            Debug.printf(", lastAccess: %s", frn.lastLocationAccess());
+            Debug.printf(", object: %s", frn.object());
+        } else if (n instanceof GuardNode) {
+            Debug.printf(", guard: %s", ((GuardNode) n).getGuard());
         }
-        TTY.print("\n");
+        Debug.printf("\n");
     }
 
     public ControlFlowGraph getCFG() {
@@ -464,39 +471,60 @@
                 break;
             case LATEST:
             case LATEST_OUT_OF_LOOPS:
-                if (memsched == MemoryScheduling.OPTIMAL && node instanceof FloatingReadNode) {
+                if (memsched == MemoryScheduling.OPTIMAL && node instanceof FloatingReadNode && ((FloatingReadNode) node).location().getLocationIdentity() != FINAL_LOCATION) {
                     block = optimalBlock((FloatingReadNode) node, strategy);
                 } else {
                     block = latestBlock(node, strategy);
-                }
-                if (block == null) {
-                    block = earliestBlock(node);
-                } else if (strategy == SchedulingStrategy.LATEST_OUT_OF_LOOPS && !(node instanceof VirtualObjectNode)) {
-                    // schedule at the latest position possible in the outermost loop possible
-                    Block earliestBlock = earliestBlock(node);
-                    Block before = block;
-                    block = scheduleOutOfLoops(node, block, earliestBlock);
-                    if (!earliestBlock.dominates(block)) {
-                        throw new SchedulingError("%s: Graph cannot be scheduled : inconsistent for %s, %d usages, (%s needs to dominate %s (before %s))", node.graph(), node, node.usages().count(),
-                                        earliestBlock, block, before);
+                    if (block == null) {
+                        block = earliestBlock(node);
+                    } else if (strategy == SchedulingStrategy.LATEST_OUT_OF_LOOPS && !(node instanceof VirtualObjectNode)) {
+                        // schedule at the latest position possible in the outermost loop possible
+                        Block earliestBlock = earliestBlock(node);
+                        Block before = block;
+                        block = scheduleOutOfLoops(node, block, earliestBlock);
+                        if (!earliestBlock.dominates(block)) {
+                            throw new SchedulingError("%s: Graph cannot be scheduled : inconsistent for %s, %d usages, (%s needs to dominate %s (before %s))", node.graph(), node,
+                                            node.usages().count(), earliestBlock, block, before);
+                        }
                     }
                 }
                 break;
             default:
                 throw new GraalInternalError("unknown scheduling strategy");
         }
+        assert earliestBlock(node).dominates(block) : "node " + node + " in block " + block + " is not dominated by earliest " + earliestBlock(node);
         cfg.getNodeToBlock().set(node, block);
         blockToNodesMap.get(block).add(node);
     }
 
-    // assign floating read to block according to kill maps
+    /**
+     * this method tries to find the latest position for a read, by taking the information gathered
+     * by {@link NewMemoryScheduleClosure} into account.
+     * 
+     * The idea is to iterate the dominator tree starting with the latest schedule of the read.
+     * 
+     * <pre>
+     *    U      upperbound block, defined by last access location of the floating read
+     *    &#9650;
+     *    E      earliest block
+     *    &#9650;
+     *    O      optimal block, first block that contains a kill of the read's location
+     *    &#9650;
+     *    L      latest block
+     * </pre>
+     * 
+     * i.e. <code>upperbound `dom` earliest `dom` optimal `dom` latest</code>. However, there're
+     * cases where <code>earliest `dom` optimal</code> is not true, because the position is
+     * (impliclitly) bounded by an anchor of the read's guard. In such cases, the earliest schedule
+     * is taken.
+     * 
+     */
     private Block optimalBlock(FloatingReadNode n, SchedulingStrategy strategy) {
         assert memsched == MemoryScheduling.OPTIMAL;
 
         LocationIdentity locid = n.location().getLocationIdentity();
-        if (locid == FINAL_LOCATION) {
-            return latestBlock(n, strategy);
-        }
+        assert locid != FINAL_LOCATION;
+
         Node upperBound = n.lastLocationAccess();
         Block upperBoundBlock = forKillLocation(upperBound);
         Block earliestBlock = earliestBlock(n);
@@ -506,35 +534,68 @@
         assert currentBlock != null && earliestBlock.dominates(currentBlock) : "earliest (" + earliestBlock + ") should dominate latest block (" + currentBlock + ")";
         Block previousBlock = currentBlock;
 
+        Debug.printf("processing %s (accessing %s): latest %s, earliest %s, upper bound %s (%s)\n", n, locid, currentBlock, earliestBlock, upperBoundBlock, upperBound);
+
         int iterations = 0;
-        // iterate the dominator tree until the last kill matches or reaching a upper bound
+        // iterate the dominator tree
         while (true) {
             iterations++;
+            assert earliestBlock.dominates(previousBlock) : "iterations: " + iterations;
             Node lastKill = blockToKillMap.get(currentBlock).get(locid);
+            boolean isAtEarliest = earliestBlock == previousBlock && previousBlock != currentBlock;
 
             if (lastKill.equals(upperBound)) {
                 // assign node to the block which kills the location
 
-                if (previousBlock.getBeginNode() instanceof MergeNode) {
-                    // merges kill locations right at the beginning of a block
+                boolean outOfLoop = false;
+
+                // schedule read out of the loop if possible, in terms of killMaps and earliest
+                // schedule
+                if (currentBlock != earliestBlock && previousBlock != earliestBlock) {
+                    assert earliestBlock.dominates(currentBlock);
+                    Block t = currentBlock;
+                    while (t.getLoop() != null && t.getDominator() != null && earliestBlock.dominates(t)) {
+                        Block dom = t.getDominator();
+                        if (dom.getLoopDepth() < currentBlock.getLoopDepth() && blockToKillMap.get(dom).get(locid) == upperBound && earliestBlock.dominates(dom)) {
+                            printIterations(iterations, "moved out of loop, from " + currentBlock + " to " + dom);
+                            previousBlock = currentBlock = dom;
+                            outOfLoop = true;
+                        }
+                        t = dom;
+                    }
+                }
+
+                if (!outOfLoop && previousBlock.getBeginNode() instanceof MergeNode) {
+                    // merges kill locations right at the beginning of a block. if a merge is the
+                    // killing node, we assign it to the dominating node.
+
                     MergeNode merge = (MergeNode) previousBlock.getBeginNode();
-                    if (getMergeToKillMap().get(merge).containsKey(locid)) {
-                        printIterations(iterations, "kill by merge");
+                    Node killer = getMergeToKillMap().get(merge).get(locid);
+
+                    if (killer != null && killer == merge) {
+                        // check if we violate earliest schedule condition
+                        if (isAtEarliest) {
+                            printIterations(iterations, "earliest bound in merge: " + earliestBlock);
+                            return earliestBlock;
+                        }
+                        printIterations(iterations, "kill by merge: " + currentBlock);
                         return currentBlock;
                     }
                 }
 
-                printIterations(iterations, "regular kill");
+                // current block matches last access, that means the previous (dominated) block
+                // kills the location, therefore schedule read to previous block.
+                printIterations(iterations, "regular kill: " + previousBlock);
                 return previousBlock;
             }
 
-            if (upperBoundBlock == currentBlock) { // reached upper bound
-                printIterations(iterations, "upper bound");
-                return currentBlock;
+            if (isAtEarliest) {
+                printIterations(iterations, "earliest bound: " + earliestBlock);
+                return earliestBlock;
             }
 
-            if (earliestBlock == currentBlock) { // reached earliest block
-                printIterations(iterations, "earliest block");
+            if (upperBoundBlock == currentBlock) {
+                printIterations(iterations, "upper bound: " + currentBlock + ", previous: " + previousBlock);
                 return currentBlock;
             }
 
@@ -544,10 +605,8 @@
         }
     }
 
-    private void printIterations(int iterations, String desc) {
-        if (dumpSchedule) {
-            TTY.print("\niterations: %d,  %s\n\n", iterations, desc);
-        }
+    private static void printIterations(int iterations, String desc) {
+        Debug.printf("iterations: %d,  %s\n", iterations, desc);
     }
 
     /**
@@ -578,7 +637,7 @@
             }
         }
 
-        assert cdbc.block == null || earliestBlock(node).dominates(cdbc.block) : "failed to find correct latest schedule for " + node;
+        assert cdbc.block == null || earliestBlock(node).dominates(cdbc.block) : "failed to find correct latest schedule for " + node + ". cdbc: " + cdbc.block + ", earliest: " + earliestBlock(node);
         return cdbc.block;
     }
 
@@ -750,8 +809,9 @@
 
     private void sortNodesWithinBlocks(StructuredGraph graph, SchedulingStrategy strategy) {
         NodeBitMap visited = graph.createNodeBitMap();
+        NodeBitMap beforeLastLocation = graph.createNodeBitMap();
         for (Block b : cfg.getBlocks()) {
-            sortNodesWithinBlock(b, visited, strategy);
+            sortNodesWithinBlock(b, visited, beforeLastLocation, strategy);
             assert noDuplicatedNodesInBlock(b) : "duplicated nodes in " + b;
         }
     }
@@ -762,7 +822,7 @@
         return list.size() == hashset.size();
     }
 
-    private void sortNodesWithinBlock(Block b, NodeBitMap visited, SchedulingStrategy strategy) {
+    private void sortNodesWithinBlock(Block b, NodeBitMap visited, NodeBitMap beforeLastLocation, SchedulingStrategy strategy) {
         if (visited.isMarked(b.getBeginNode()) || cfg.blockFor(b.getBeginNode()) != b) {
             throw new SchedulingError();
         }
@@ -777,15 +837,37 @@
                 break;
             case LATEST:
             case LATEST_OUT_OF_LOOPS:
-                sortedInstructions = sortNodesWithinBlockLatest(b, visited);
+                sortedInstructions = sortNodesWithinBlockLatest(b, visited, beforeLastLocation);
                 break;
             default:
                 throw new GraalInternalError("unknown scheduling strategy");
         }
+        assert filterSchedulableNodes(blockToNodesMap.get(b)).size() == removeProxies(sortedInstructions).size() : "sorted block does not contain the same amount of nodes: " +
+                        filterSchedulableNodes(blockToNodesMap.get(b)) + " vs. " + removeProxies(sortedInstructions);
         assert sameOrderForFixedNodes(blockToNodesMap.get(b), sortedInstructions) : "fixed nodes in sorted block are not in the same order";
         blockToNodesMap.put(b, sortedInstructions);
     }
 
+    private static List<ScheduledNode> removeProxies(List<ScheduledNode> list) {
+        List<ScheduledNode> result = new ArrayList<>();
+        for (ScheduledNode n : list) {
+            if (!(n instanceof ProxyNode)) {
+                result.add(n);
+            }
+        }
+        return result;
+    }
+
+    private static List<ScheduledNode> filterSchedulableNodes(List<ScheduledNode> list) {
+        List<ScheduledNode> result = new ArrayList<>();
+        for (ScheduledNode n : list) {
+            if (!(n instanceof LocalNode) && !(n instanceof PhiNode)) {
+                result.add(n);
+            }
+        }
+        return result;
+    }
+
     private static boolean sameOrderForFixedNodes(List<ScheduledNode> fixed, List<ScheduledNode> sorted) {
         Iterator<ScheduledNode> fixedIterator = fixed.iterator();
         Iterator<ScheduledNode> sortedIterator = sorted.iterator();
@@ -813,12 +895,10 @@
      * all inputs. This means that a node is added to the list after all its inputs have been
      * processed.
      */
-    private List<ScheduledNode> sortNodesWithinBlockLatest(Block b, NodeBitMap visited) {
+    private List<ScheduledNode> sortNodesWithinBlockLatest(Block b, NodeBitMap visited, NodeBitMap beforeLastLocation) {
         List<ScheduledNode> instructions = blockToNodesMap.get(b);
         List<ScheduledNode> sortedInstructions = new ArrayList<>(blockToNodesMap.get(b).size() + 2);
         List<FloatingReadNode> reads = new ArrayList<>();
-        // TODO: need bitmap for just within a block
-        NodeBitMap beforeLastLocation = cfg.graph.createNodeBitMap();
 
         if (memsched == MemoryScheduling.OPTIMAL) {
             /*
--- a/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/FrameWithoutBoxing.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/FrameWithoutBoxing.java	Thu Aug 01 20:54:32 2013 +0200
@@ -283,6 +283,14 @@
 
     @Override
     public boolean isInitialized(FrameSlot slot) {
-        return tags[slot.getIndex()] != FrameSlotKind.Illegal.ordinal();
+        try {
+            return tags[slot.getIndex()] != 0;
+        } catch (ArrayIndexOutOfBoundsException ex) {
+            CompilerDirectives.transferToInterpreter();
+            if (slot.getIndex() >= 0 && slot.getIndex() < descriptor.getSize()) {
+                return false;
+            }
+            throw ex;
+        }
     }
 }
--- a/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/phases/ReplaceIntrinsicsPhase.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/phases/ReplaceIntrinsicsPhase.java	Thu Aug 01 20:54:32 2013 +0200
@@ -47,10 +47,16 @@
             if (methodCallTarget.isAlive()) {
                 InvokeKind invokeKind = methodCallTarget.invokeKind();
                 if (invokeKind == InvokeKind.Static || invokeKind == InvokeKind.Special) {
-                    StructuredGraph inlineGraph = replacements.getMethodSubstitution(methodCallTarget.targetMethod());
-                    if (inlineGraph != null) {
-                        InliningUtil.inline(methodCallTarget.invoke(), inlineGraph, false);
+                    Class<? extends FixedWithNextNode> macroSubstitution = replacements.getMacroSubstitution(methodCallTarget.targetMethod());
+                    if (macroSubstitution != null) {
+                        InliningUtil.inlineMacroNode(methodCallTarget.invoke(), methodCallTarget.targetMethod(), graph, macroSubstitution);
                         Debug.dump(graph, "After inlining %s", methodCallTarget.targetMethod().toString());
+                    } else {
+                        StructuredGraph inlineGraph = replacements.getMethodSubstitution(methodCallTarget.targetMethod());
+                        if (inlineGraph != null) {
+                            InliningUtil.inline(methodCallTarget.invoke(), inlineGraph, true);
+                            Debug.dump(graph, "After inlining %s", methodCallTarget.targetMethod().toString());
+                        }
                     }
                 }
             }
--- a/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/substitutions/FrameWithoutBoxingSubstitutions.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.truffle/src/com/oracle/graal/truffle/substitutions/FrameWithoutBoxingSubstitutions.java	Thu Aug 01 20:54:32 2013 +0200
@@ -209,4 +209,9 @@
     public static MaterializedFrame materialize(FrameWithoutBoxing frame) {
         return MaterializeFrameNode.materialize(frame);
     }
+
+    @MethodSubstitution(isStatic = false, forced = true)
+    public static boolean isInitialized(FrameWithoutBoxing frame, FrameSlot slot) {
+        return getTag(frame, slot) != 0;
+    }
 }
--- a/graal/com.oracle.graal.virtual/src/com/oracle/graal/virtual/phases/ea/PEReadEliminationClosure.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.virtual/src/com/oracle/graal/virtual/phases/ea/PEReadEliminationClosure.java	Thu Aug 01 20:54:32 2013 +0200
@@ -55,27 +55,35 @@
         if (!deleted) {
             if (node instanceof LoadFieldNode) {
                 LoadFieldNode load = (LoadFieldNode) node;
-                ValueNode object = GraphUtil.unproxify(load.object());
-                ValueNode cachedValue = state.getReadCache(object, load.field());
-                if (cachedValue != null) {
-                    effects.replaceAtUsages(load, cachedValue);
-                    state.addScalarAlias(load, cachedValue);
-                    deleted = true;
+                if (!load.isVolatile()) {
+                    ValueNode object = GraphUtil.unproxify(load.object());
+                    ValueNode cachedValue = state.getReadCache(object, load.field());
+                    if (cachedValue != null) {
+                        effects.replaceAtUsages(load, cachedValue);
+                        state.addScalarAlias(load, cachedValue);
+                        deleted = true;
+                    } else {
+                        state.addReadCache(object, load.field(), load);
+                    }
                 } else {
-                    state.addReadCache(object, load.field(), load);
+                    processIdentity(state, ANY_LOCATION);
                 }
             } else if (node instanceof StoreFieldNode) {
                 StoreFieldNode store = (StoreFieldNode) node;
-                ValueNode object = GraphUtil.unproxify(store.object());
-                ValueNode cachedValue = state.getReadCache(object, store.field());
+                if (!store.isVolatile()) {
+                    ValueNode object = GraphUtil.unproxify(store.object());
+                    ValueNode cachedValue = state.getReadCache(object, store.field());
 
-                ValueNode value = state.getScalarAlias(store.value());
-                if (GraphUtil.unproxify(value) == GraphUtil.unproxify(cachedValue)) {
-                    effects.deleteFixedNode(store);
-                    deleted = true;
+                    ValueNode value = state.getScalarAlias(store.value());
+                    if (GraphUtil.unproxify(value) == GraphUtil.unproxify(cachedValue)) {
+                        effects.deleteFixedNode(store);
+                        deleted = true;
+                    }
+                    state.killReadCache(store.field());
+                    state.addReadCache(object, store.field(), value);
+                } else {
+                    processIdentity(state, ANY_LOCATION);
                 }
-                state.killReadCache(store.field());
-                state.addReadCache(object, store.field(), value);
             } else if (node instanceof MemoryCheckpoint.Single) {
                 METRIC_MEMORYCHECKOINT.increment();
                 LocationIdentity identity = ((MemoryCheckpoint.Single) node).getLocationIdentity();
--- a/graal/com.oracle.graal.virtual/src/com/oracle/graal/virtual/phases/ea/ReadEliminationClosure.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.graal.virtual/src/com/oracle/graal/virtual/phases/ea/ReadEliminationClosure.java	Thu Aug 01 20:54:32 2013 +0200
@@ -55,29 +55,37 @@
         boolean deleted = false;
         if (node instanceof LoadFieldNode) {
             LoadFieldNode load = (LoadFieldNode) node;
-            ValueNode object = GraphUtil.unproxify(load.object());
-            LoadCacheEntry identifier = new LoadCacheEntry(object, load.field());
-            ValueNode cachedValue = state.getCacheEntry(identifier);
-            if (cachedValue != null) {
-                effects.replaceAtUsages(load, cachedValue);
-                state.addScalarAlias(load, cachedValue);
-                deleted = true;
+            if (!load.isVolatile()) {
+                ValueNode object = GraphUtil.unproxify(load.object());
+                LoadCacheEntry identifier = new LoadCacheEntry(object, load.field());
+                ValueNode cachedValue = state.getCacheEntry(identifier);
+                if (cachedValue != null) {
+                    effects.replaceAtUsages(load, cachedValue);
+                    state.addScalarAlias(load, cachedValue);
+                    deleted = true;
+                } else {
+                    state.addCacheEntry(identifier, load);
+                }
             } else {
-                state.addCacheEntry(identifier, load);
+                processIdentity(state, ANY_LOCATION);
             }
         } else if (node instanceof StoreFieldNode) {
             StoreFieldNode store = (StoreFieldNode) node;
-            ValueNode object = GraphUtil.unproxify(store.object());
-            LoadCacheEntry identifier = new LoadCacheEntry(object, store.field());
-            ValueNode cachedValue = state.getCacheEntry(identifier);
+            if (!store.isVolatile()) {
+                ValueNode object = GraphUtil.unproxify(store.object());
+                LoadCacheEntry identifier = new LoadCacheEntry(object, store.field());
+                ValueNode cachedValue = state.getCacheEntry(identifier);
 
-            ValueNode value = state.getScalarAlias(store.value());
-            if (GraphUtil.unproxify(value) == GraphUtil.unproxify(cachedValue)) {
-                effects.deleteFixedNode(store);
-                deleted = true;
+                ValueNode value = state.getScalarAlias(store.value());
+                if (GraphUtil.unproxify(value) == GraphUtil.unproxify(cachedValue)) {
+                    effects.deleteFixedNode(store);
+                    deleted = true;
+                }
+                state.killReadCache((LocationIdentity) store.field());
+                state.addCacheEntry(identifier, value);
+            } else {
+                processIdentity(state, ANY_LOCATION);
             }
-            state.killReadCache((LocationIdentity) store.field());
-            state.addCacheEntry(identifier, value);
         } else if (node instanceof ReadNode) {
             ReadNode read = (ReadNode) node;
             if (read.location() instanceof ConstantLocationNode) {
@@ -104,13 +112,14 @@
                     effects.deleteFixedNode(write);
                     deleted = true;
                 }
-                state.killReadCache(write.location().getLocationIdentity());
+                processIdentity(state, write.location().getLocationIdentity());
                 state.addCacheEntry(identifier, value);
             } else {
-                state.killReadCache(write.location().getLocationIdentity());
+                processIdentity(state, write.location().getLocationIdentity());
             }
         } else if (node instanceof MemoryCheckpoint.Single) {
-            processIdentity(state, ((MemoryCheckpoint.Single) node).getLocationIdentity());
+            LocationIdentity identity = ((MemoryCheckpoint.Single) node).getLocationIdentity();
+            processIdentity(state, identity);
         } else if (node instanceof MemoryCheckpoint.Multi) {
             for (LocationIdentity identity : ((MemoryCheckpoint.Multi) node).getLocationIdentities()) {
                 processIdentity(state, identity);
--- a/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/impl/DefaultVirtualFrame.java	Thu Aug 01 20:53:54 2013 +0200
+++ b/graal/com.oracle.truffle.api/src/com/oracle/truffle/api/impl/DefaultVirtualFrame.java	Thu Aug 01 20:54:32 2013 +0200
@@ -204,6 +204,13 @@
 
     @Override
     public boolean isInitialized(FrameSlot slot) {
-        return tags[slot.getIndex()] != FrameSlotKind.Illegal.ordinal();
+        try {
+            return tags[slot.getIndex()] != 0;
+        } catch (ArrayIndexOutOfBoundsException ex) {
+            if (slot.getIndex() >= 0 && slot.getIndex() < descriptor.getSize()) {
+                return false;
+            }
+            throw ex;
+        }
     }
 }
--- a/src/cpu/x86/vm/graalGlobals_x86.hpp	Thu Aug 01 20:53:54 2013 +0200
+++ b/src/cpu/x86/vm/graalGlobals_x86.hpp	Thu Aug 01 20:54:32 2013 +0200
@@ -31,8 +31,6 @@
 // Sets the default values for platform dependent flags used by the Graal compiler.
 // (see graalGlobals.hpp)
 
-define_pd_global(intx, GraalSafepointPollOffset,     256  );
-
 #if !defined(COMPILER1) && !defined(COMPILER2)
 define_pd_global(bool, BackgroundCompilation,        true );
 define_pd_global(bool, UseTLAB,                      true );
--- a/src/share/vm/graal/graalCompilerToVM.cpp	Thu Aug 01 20:53:54 2013 +0200
+++ b/src/share/vm/graal/graalCompilerToVM.cpp	Thu Aug 01 20:54:32 2013 +0200
@@ -702,6 +702,7 @@
   set_int("klassHasFinalizerFlag", JVM_ACC_HAS_FINALIZER);
   set_int("threadExceptionOopOffset", in_bytes(JavaThread::exception_oop_offset()));
   set_int("threadExceptionPcOffset", in_bytes(JavaThread::exception_pc_offset()));
+  set_long("safepointPollingAddress", (jlong)(os::get_polling_page()));
 #ifdef TARGET_ARCH_x86
   set_boolean("isPollingPageFar", Assembler::is_polling_page_far());
   set_int("runtimeCallStackSize", (jint)frame::arg_reg_save_area_bytes);