changeset 10957:c6e004578eb4

Merge
author twisti
date Thu, 01 Aug 2013 15:23:05 -0700
parents 563c6d1994c0 (current diff) 2556046ca25a (diff)
children e2c63a0b799c
files
diffstat 7 files changed, 275 insertions(+), 96 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/EliminateNestedCheckCastsTest.java	Wed Jul 31 14:04:24 2013 -0700
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/EliminateNestedCheckCastsTest.java	Thu Aug 01 15:23:05 2013 -0700
@@ -106,11 +106,11 @@
     }
 
     private StructuredGraph compileSnippet(final String snippet, final int checkcasts, final int afterCanon) {
-        return Debug.scope(snippet, new Callable<StructuredGraph>() {
+        final StructuredGraph graph = parse(snippet);
+        return Debug.scope("NestedCheckCastsTest", graph, new Callable<StructuredGraph>() {
 
             @Override
             public StructuredGraph call() throws Exception {
-                StructuredGraph graph = parse(snippet);
                 Debug.dump(graph, "After parsing: " + snippet);
                 Assert.assertEquals(checkcasts, graph.getNodes().filter(CheckCastNode.class).count());
                 new CanonicalizerPhase.Instance(runtime(), new Assumptions(false), true).apply(graph);
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/MemoryScheduleTest.java	Wed Jul 31 14:04:24 2013 -0700
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/MemoryScheduleTest.java	Thu Aug 01 15:23:05 2013 -0700
@@ -85,7 +85,7 @@
     @Test
     public void testSimple() {
         for (TestMode mode : TestMode.values()) {
-            SchedulePhase schedule = getFinalSchedule("testSimpleSnippet", mode, MemoryScheduling.OPTIMAL, false);
+            SchedulePhase schedule = getFinalSchedule("testSimpleSnippet", mode, MemoryScheduling.OPTIMAL);
             StructuredGraph graph = schedule.getCFG().graph;
             assertReadAndWriteInSameBlock(schedule, true);
             assertOrderedAfterSchedule(schedule, graph.getNodes().filter(FloatingReadNode.class).first(), graph.getNodes().filter(WriteNode.class).first());
@@ -110,7 +110,7 @@
     @Test
     public void testSplit1() {
         for (TestMode mode : TestMode.values()) {
-            SchedulePhase schedule = getFinalSchedule("testSplitSnippet1", mode, MemoryScheduling.OPTIMAL, false);
+            SchedulePhase schedule = getFinalSchedule("testSplitSnippet1", mode, MemoryScheduling.OPTIMAL);
             assertReadWithinStartBlock(schedule, true);
             assertReadWithinReturnBlock(schedule, false);
         }
@@ -133,7 +133,7 @@
 
     @Test
     public void testSplit2() {
-        SchedulePhase schedule = getFinalSchedule("testSplit2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testSplit2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, true);
     }
@@ -157,7 +157,7 @@
 
     @Test
     public void testLoop1() {
-        SchedulePhase schedule = getFinalSchedule("testLoop1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testLoop1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(6, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, true);
         assertReadWithinReturnBlock(schedule, false);
@@ -182,13 +182,35 @@
 
     @Test
     public void testLoop2() {
-        SchedulePhase schedule = getFinalSchedule("testLoop2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testLoop2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(6, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, true);
     }
 
     /**
+     * Here the read should float out of the loop.
+     */
+    public static int testLoop3Snippet(int a) {
+        int j = 0;
+        for (int i = 0; i < a; i++) {
+            if (i - container.a == 0) {
+                break;
+            }
+            j++;
+        }
+        return j;
+    }
+
+    @Test
+    public void testLoop3() {
+        SchedulePhase schedule = getFinalSchedule("testLoop3Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
+        assertEquals(7, schedule.getCFG().getBlocks().length);
+        assertReadWithinStartBlock(schedule, true);
+        assertReadWithinReturnBlock(schedule, false);
+    }
+
+    /**
      * Here the read should float to the end (into the same block as the return).
      */
     public static int testArrayCopySnippet(Integer intValue, char[] a, char[] b, int len) {
@@ -198,7 +220,7 @@
 
     @Test
     public void testArrayCopy() {
-        SchedulePhase schedule = getFinalSchedule("testArrayCopySnippet", TestMode.INLINED_WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testArrayCopySnippet", TestMode.INLINED_WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         StructuredGraph graph = schedule.getCFG().getStartBlock().getBeginNode().graph();
         ReturnNode ret = graph.getNodes(ReturnNode.class).first();
         assertTrue(ret.result() instanceof FloatingReadNode);
@@ -219,7 +241,7 @@
 
     @Test
     public void testIfRead1() {
-        SchedulePhase schedule = getFinalSchedule("testIfRead1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testIfRead1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(4, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, true);
         assertReadAndWriteInSameBlock(schedule, false);
@@ -240,7 +262,7 @@
 
     @Test
     public void testIfRead2() {
-        SchedulePhase schedule = getFinalSchedule("testIfRead2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testIfRead2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(4, schedule.getCFG().getBlocks().length);
         assertEquals(1, schedule.getCFG().graph.getNodes().filter(FloatingReadNode.class).count());
         assertReadWithinStartBlock(schedule, false);
@@ -262,7 +284,7 @@
 
     @Test
     public void testIfRead3() {
-        SchedulePhase schedule = getFinalSchedule("testIfRead3Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testIfRead3Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(4, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, true);
@@ -283,7 +305,7 @@
 
     @Test
     public void testIfRead4() {
-        SchedulePhase schedule = getFinalSchedule("testIfRead4Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testIfRead4Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertEquals(4, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, false);
@@ -309,7 +331,7 @@
 
     @Test
     public void testBlockSchedule() {
-        SchedulePhase schedule = getFinalSchedule("testBlockScheduleSnippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testBlockScheduleSnippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         StructuredGraph graph = schedule.getCFG().graph;
         NodeIterable<WriteNode> writeNodes = graph.getNodes().filter(WriteNode.class);
 
@@ -344,7 +366,7 @@
 
     @Test
     public void testProxy1() {
-        SchedulePhase schedule = getFinalSchedule("testProxy1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testProxy1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertReadWithinStartBlock(schedule, true); // read of container.a should be in start block
         /*
          * read of container.b for increment operation should be in return block. TODO: not sure
@@ -370,7 +392,62 @@
 
     @Test
     public void testProxy2() {
-        SchedulePhase schedule = getFinalSchedule("testProxy2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        SchedulePhase schedule = getFinalSchedule("testProxy2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
+        assertReadWithinStartBlock(schedule, false);
+        assertReadWithinReturnBlock(schedule, false);
+    }
+
+    private int hash = 0;
+    private final char[] value = new char[3];
+
+    public int testStringHashCodeSnippet() {
+        int h = hash;
+        if (h == 0 && value.length > 0) {
+            char[] val = value;
+
+            for (int i = 0; i < value.length; i++) {
+                h = 31 * h + val[i];
+            }
+            hash = h;
+        }
+        return h;
+    }
+
+    @Test
+    public void testStringHashCode() {
+        SchedulePhase schedule = getFinalSchedule("testStringHashCodeSnippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
+        assertReadWithinStartBlock(schedule, true);
+        assertReadWithinReturnBlock(schedule, false);
+
+        hash = 0x1337;
+        value[0] = 'a';
+        value[1] = 'b';
+        value[2] = 'c';
+        test("testStringHashCodeSnippet");
+    }
+
+    public static int testLoop4Snippet(int count) {
+        int[] a = new int[count];
+
+        for (int i = 0; i < a.length; i++) {
+            a[i] = i;
+        }
+
+        int i = 0;
+        int iwrap = count - 1;
+        int sum = 0;
+
+        while (i < count) {
+            sum += (a[i] + a[iwrap]) / 2;
+            iwrap = i;
+            i++;
+        }
+        return sum;
+    }
+
+    @Test
+    public void testLoop4() {
+        SchedulePhase schedule = getFinalSchedule("testLoop4Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL);
         assertReadWithinStartBlock(schedule, false);
         assertReadWithinReturnBlock(schedule, false);
     }
@@ -415,7 +492,7 @@
         assertTrue(!(inSame ^ schedule.getCFG().blockFor(read) == schedule.getCFG().blockFor(write)));
     }
 
-    private SchedulePhase getFinalSchedule(final String snippet, final TestMode mode, final MemoryScheduling memsched, final boolean printSchedule) {
+    private SchedulePhase getFinalSchedule(final String snippet, final TestMode mode, final MemoryScheduling memsched) {
         final StructuredGraph graph = parse(snippet);
         return Debug.scope("FloatingReadTest", graph, new Callable<SchedulePhase>() {
 
@@ -444,7 +521,12 @@
                 new FloatingReadPhase().apply(graph);
                 new RemoveValueProxyPhase().apply(graph);
 
-                SchedulePhase schedule = new SchedulePhase(SchedulingStrategy.LATEST_OUT_OF_LOOPS, memsched, printSchedule);
+                MidTierContext midContext = new MidTierContext(runtime(), assumptions, replacements, runtime().getTarget(), OptimisticOptimizations.ALL);
+                new GuardLoweringPhase().apply(graph, midContext);
+                new LoweringPhase(LoweringType.AFTER_GUARDS).apply(graph, midContext);
+                new LoweringPhase(LoweringType.AFTER_FSA).apply(graph, midContext);
+
+                SchedulePhase schedule = new SchedulePhase(SchedulingStrategy.LATEST_OUT_OF_LOOPS, memsched);
                 schedule.apply(graph);
                 assertEquals(1, graph.getNodes().filter(StartNode.class).count());
                 return schedule;
--- a/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/Debug.java	Wed Jul 31 14:04:24 2013 -0700
+++ b/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/Debug.java	Thu Aug 01 15:23:05 2013 -0700
@@ -164,6 +164,12 @@
         }
     }
 
+    public static void printf(String msg, Object... args) {
+        if (ENABLED && DebugScope.getInstance().isLogEnabled()) {
+            DebugScope.getInstance().printf(msg, args);
+        }
+    }
+
     public static void dump(Object object, String msg, Object... args) {
         if (ENABLED && DebugScope.getInstance().isDumpEnabled()) {
             DebugScope.getInstance().dump(object, msg, args);
--- a/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/internal/DebugScope.java	Wed Jul 31 14:04:24 2013 -0700
+++ b/graal/com.oracle.graal.debug/src/com/oracle/graal/debug/internal/DebugScope.java	Thu Aug 01 15:23:05 2013 -0700
@@ -114,6 +114,16 @@
         }
     }
 
+    public void printf(String msg, Object... args) {
+        if (isLogEnabled()) {
+            if (lastLogScope.get() == null || !lastLogScope.get().qualifiedName.equals(this.qualifiedName)) {
+                output.println("scope: " + qualifiedName);
+                lastLogScope.set(this);
+            }
+            output.printf(msg, args);
+        }
+    }
+
     public void dump(Object object, String formatString, Object[] args) {
         if (isDumpEnabled()) {
             DebugConfig config = getConfig();
--- a/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/UnsafeArrayCopySnippets.java	Wed Jul 31 14:04:24 2013 -0700
+++ b/graal/com.oracle.graal.hotspot/src/com/oracle/graal/hotspot/replacements/UnsafeArrayCopySnippets.java	Thu Aug 01 15:23:05 2013 -0700
@@ -59,7 +59,7 @@
         long postLoopBytes;
 
         // We can easily vectorize the loop if both offsets have the same alignment.
-        if ((srcOffset % VECTOR_SIZE) == (destOffset % VECTOR_SIZE)) {
+        if (byteLength >= VECTOR_SIZE && (srcOffset % VECTOR_SIZE) == (destOffset % VECTOR_SIZE)) {
             preLoopBytes = NumUtil.roundUp(arrayBaseOffset + srcOffset, VECTOR_SIZE) - (arrayBaseOffset + srcOffset);
             postLoopBytes = (byteLength - preLoopBytes) % VECTOR_SIZE;
             mainLoopBytes = byteLength - preLoopBytes - postLoopBytes;
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/java/CheckCastNode.java	Wed Jul 31 14:04:24 2013 -0700
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/java/CheckCastNode.java	Thu Aug 01 15:23:05 2013 -0700
@@ -146,7 +146,8 @@
             return object();
         }
 
-        // remove checkcast if next node is a more specific checkcast
+        // if the previous node is also a checkcast, with a less precise and compatible type,
+        // replace both with one checkcast checking the more specific type.
         if (predecessor() instanceof CheckCastNode) {
             CheckCastNode ccn = (CheckCastNode) predecessor();
             if (ccn != null && ccn.type != null && ccn == object && ccn.forStoreCheck == forStoreCheck && ccn.type.isAssignableFrom(type)) {
--- a/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/schedule/SchedulePhase.java	Wed Jul 31 14:04:24 2013 -0700
+++ b/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/schedule/SchedulePhase.java	Thu Aug 01 15:23:05 2013 -0700
@@ -202,10 +202,17 @@
 
         @Override
         protected Map<LocationIdentity, Node> merge(Block merge, List<Map<LocationIdentity, Node>> states) {
+            return merge(merge, states, false);
+        }
+
+        protected Map<LocationIdentity, Node> merge(Block merge, List<Map<LocationIdentity, Node>> states, boolean loopbegin) {
             assert merge.getBeginNode() instanceof MergeNode;
             MergeNode mergeNode = (MergeNode) merge.getBeginNode();
 
-            Map<LocationIdentity, Node> initKillMap = cloneState(getBlockToKillMap().get(merge));
+            Map<LocationIdentity, Node> initKillMap = new HashMap<>();
+            if (loopbegin) {
+                initKillMap.putAll(getBlockToKillMap().get(merge));
+            }
             for (Map<LocationIdentity, Node> state : states) {
                 for (LocationIdentity locid : state.keySet()) {
                     if (initKillMap.containsKey(locid)) {
@@ -219,6 +226,9 @@
             }
 
             getMergeToKillMap().set(mergeNode, cloneState(initKillMap));
+            if (!loopbegin) {
+                initKillMap.putAll(getBlockToKillMap().get(merge));
+            }
             return initKillMap;
         }
 
@@ -232,7 +242,7 @@
             LoopInfo<Map<LocationIdentity, Node>> info = ReentrantBlockIterator.processLoop(this, loop, new HashMap<>(state));
 
             assert loop.header.getBeginNode() instanceof LoopBeginNode;
-            Map<LocationIdentity, Node> headerState = merge(loop.header, info.endStates);
+            Map<LocationIdentity, Node> headerState = merge(loop.header, info.endStates, true);
             getBlockToKillMap().put(loop.header, headerState);
 
             for (Map<LocationIdentity, Node> exitState : info.exitStates) {
@@ -258,7 +268,6 @@
     private final Map<FixedNode, List<FloatingNode>> phantomInputs = new IdentityHashMap<>();
     private final SchedulingStrategy selectedStrategy;
     private final MemoryScheduling memsched;
-    private final boolean dumpSchedule;
 
     public SchedulePhase() {
         this(OptScheduleOutOfLoops.getValue() ? SchedulingStrategy.LATEST_OUT_OF_LOOPS : SchedulingStrategy.LATEST);
@@ -276,13 +285,11 @@
             this.memsched = MemoryScheduling.NONE;
         }
         this.selectedStrategy = strategy;
-        this.dumpSchedule = false;
     }
 
-    public SchedulePhase(SchedulingStrategy strategy, MemoryScheduling memsched, boolean dumpSchedule) {
+    public SchedulePhase(SchedulingStrategy strategy, MemoryScheduling memsched) {
         this.selectedStrategy = strategy;
         this.memsched = memsched;
-        this.dumpSchedule = dumpSchedule;
     }
 
     @Override
@@ -347,52 +354,52 @@
     }
 
     private void printSchedule(String desc) {
-        if (dumpSchedule) {
-            TTY.print("=== %s / %s / %s (%s) ===\n", getCFG().getStartBlock().getBeginNode().graph(), selectedStrategy, memsched, desc);
-            for (Block b : getCFG().getBlocks()) {
-                TTY.print("==== b: %s. ", b);
-                TTY.print("dom: %s. ", b.getDominator());
-                TTY.print("post-dom: %s. ", b.getPostdominator());
-                TTY.print("preds: %s. ", b.getPredecessors());
-                TTY.print("succs: %s ====\n", b.getSuccessors());
-                BlockMap<Map<LocationIdentity, Node>> killMaps = getBlockToKillMap();
-                if (killMaps != null) {
-                    TTY.print("X block kills: \n");
-                    for (LocationIdentity locId : killMaps.get(b).keySet()) {
-                        TTY.print("X %s killed by %s\n", locId, killMaps.get(b).get(locId));
-                    }
-                }
-
-                if (getBlockToNodesMap().get(b) != null) {
-                    for (Node n : nodesFor(b)) {
-                        printNode(n);
-                    }
-                } else {
-                    for (Node n : b.getNodes()) {
-                        printNode(n);
-                    }
+        Debug.printf("=== %s / %s / %s (%s) ===\n", getCFG().getStartBlock().getBeginNode().graph(), selectedStrategy, memsched, desc);
+        for (Block b : getCFG().getBlocks()) {
+            Debug.printf("==== b: %s. ", b);
+            Debug.printf("dom: %s. ", b.getDominator());
+            Debug.printf("post-dom: %s. ", b.getPostdominator());
+            Debug.printf("preds: %s. ", b.getPredecessors());
+            Debug.printf("succs: %s ====\n", b.getSuccessors());
+            BlockMap<Map<LocationIdentity, Node>> killMaps = getBlockToKillMap();
+            if (killMaps != null) {
+                Debug.printf("X block kills: \n");
+                for (LocationIdentity locId : killMaps.get(b).keySet()) {
+                    Debug.printf("X %s killed by %s\n", locId, killMaps.get(b).get(locId));
                 }
             }
-            TTY.print("\n\n");
+
+            if (getBlockToNodesMap().get(b) != null) {
+                for (Node n : nodesFor(b)) {
+                    printNode(n);
+                }
+            } else {
+                for (Node n : b.getNodes()) {
+                    printNode(n);
+                }
+            }
         }
+        Debug.printf("\n\n");
     }
 
     private static void printNode(Node n) {
-        TTY.print("%s", n);
+        Debug.printf("%s", n);
         if (n instanceof MemoryCheckpoint.Single) {
-            TTY.print(" // kills %s", ((MemoryCheckpoint.Single) n).getLocationIdentity());
+            Debug.printf(" // kills %s", ((MemoryCheckpoint.Single) n).getLocationIdentity());
         } else if (n instanceof MemoryCheckpoint.Multi) {
-            TTY.print(" // kills ");
+            Debug.printf(" // kills ");
             for (LocationIdentity locid : ((MemoryCheckpoint.Multi) n).getLocationIdentities()) {
-                TTY.print("%s, ", locid);
+                Debug.printf("%s, ", locid);
             }
         } else if (n instanceof FloatingReadNode) {
             FloatingReadNode frn = (FloatingReadNode) n;
-            TTY.print(" // from %s", frn.location().getLocationIdentity());
-            TTY.print(", lastAccess: %s", frn.lastLocationAccess());
-            TTY.print(", object: %s", frn.object());
+            Debug.printf(" // from %s", frn.location().getLocationIdentity());
+            Debug.printf(", lastAccess: %s", frn.lastLocationAccess());
+            Debug.printf(", object: %s", frn.object());
+        } else if (n instanceof GuardNode) {
+            Debug.printf(", guard: %s", ((GuardNode) n).getGuard());
         }
-        TTY.print("\n");
+        Debug.printf("\n");
     }
 
     public ControlFlowGraph getCFG() {
@@ -464,39 +471,60 @@
                 break;
             case LATEST:
             case LATEST_OUT_OF_LOOPS:
-                if (memsched == MemoryScheduling.OPTIMAL && node instanceof FloatingReadNode) {
+                if (memsched == MemoryScheduling.OPTIMAL && node instanceof FloatingReadNode && ((FloatingReadNode) node).location().getLocationIdentity() != FINAL_LOCATION) {
                     block = optimalBlock((FloatingReadNode) node, strategy);
                 } else {
                     block = latestBlock(node, strategy);
-                }
-                if (block == null) {
-                    block = earliestBlock(node);
-                } else if (strategy == SchedulingStrategy.LATEST_OUT_OF_LOOPS && !(node instanceof VirtualObjectNode)) {
-                    // schedule at the latest position possible in the outermost loop possible
-                    Block earliestBlock = earliestBlock(node);
-                    Block before = block;
-                    block = scheduleOutOfLoops(node, block, earliestBlock);
-                    if (!earliestBlock.dominates(block)) {
-                        throw new SchedulingError("%s: Graph cannot be scheduled : inconsistent for %s, %d usages, (%s needs to dominate %s (before %s))", node.graph(), node, node.usages().count(),
-                                        earliestBlock, block, before);
+                    if (block == null) {
+                        block = earliestBlock(node);
+                    } else if (strategy == SchedulingStrategy.LATEST_OUT_OF_LOOPS && !(node instanceof VirtualObjectNode)) {
+                        // schedule at the latest position possible in the outermost loop possible
+                        Block earliestBlock = earliestBlock(node);
+                        Block before = block;
+                        block = scheduleOutOfLoops(node, block, earliestBlock);
+                        if (!earliestBlock.dominates(block)) {
+                            throw new SchedulingError("%s: Graph cannot be scheduled : inconsistent for %s, %d usages, (%s needs to dominate %s (before %s))", node.graph(), node,
+                                            node.usages().count(), earliestBlock, block, before);
+                        }
                     }
                 }
                 break;
             default:
                 throw new GraalInternalError("unknown scheduling strategy");
         }
+        assert earliestBlock(node).dominates(block) : "node " + node + " in block " + block + " is not dominated by earliest " + earliestBlock(node);
         cfg.getNodeToBlock().set(node, block);
         blockToNodesMap.get(block).add(node);
     }
 
-    // assign floating read to block according to kill maps
+    /**
+     * this method tries to find the latest position for a read, by taking the information gathered
+     * by {@link NewMemoryScheduleClosure} into account.
+     * 
+     * The idea is to iterate the dominator tree starting with the latest schedule of the read.
+     * 
+     * <pre>
+     *    U      upperbound block, defined by last access location of the floating read
+     *    &#9650;
+     *    E      earliest block
+     *    &#9650;
+     *    O      optimal block, first block that contains a kill of the read's location
+     *    &#9650;
+     *    L      latest block
+     * </pre>
+     * 
+     * i.e. <code>upperbound `dom` earliest `dom` optimal `dom` latest</code>. However, there're
+     * cases where <code>earliest `dom` optimal</code> is not true, because the position is
+     * (impliclitly) bounded by an anchor of the read's guard. In such cases, the earliest schedule
+     * is taken.
+     * 
+     */
     private Block optimalBlock(FloatingReadNode n, SchedulingStrategy strategy) {
         assert memsched == MemoryScheduling.OPTIMAL;
 
         LocationIdentity locid = n.location().getLocationIdentity();
-        if (locid == FINAL_LOCATION) {
-            return latestBlock(n, strategy);
-        }
+        assert locid != FINAL_LOCATION;
+
         Node upperBound = n.lastLocationAccess();
         Block upperBoundBlock = forKillLocation(upperBound);
         Block earliestBlock = earliestBlock(n);
@@ -506,35 +534,68 @@
         assert currentBlock != null && earliestBlock.dominates(currentBlock) : "earliest (" + earliestBlock + ") should dominate latest block (" + currentBlock + ")";
         Block previousBlock = currentBlock;
 
+        Debug.printf("processing %s (accessing %s): latest %s, earliest %s, upper bound %s (%s)\n", n, locid, currentBlock, earliestBlock, upperBoundBlock, upperBound);
+
         int iterations = 0;
-        // iterate the dominator tree until the last kill matches or reaching a upper bound
+        // iterate the dominator tree
         while (true) {
             iterations++;
+            assert earliestBlock.dominates(previousBlock) : "iterations: " + iterations;
             Node lastKill = blockToKillMap.get(currentBlock).get(locid);
+            boolean isAtEarliest = earliestBlock == previousBlock && previousBlock != currentBlock;
 
             if (lastKill.equals(upperBound)) {
                 // assign node to the block which kills the location
 
-                if (previousBlock.getBeginNode() instanceof MergeNode) {
-                    // merges kill locations right at the beginning of a block
+                boolean outOfLoop = false;
+
+                // schedule read out of the loop if possible, in terms of killMaps and earliest
+                // schedule
+                if (currentBlock != earliestBlock && previousBlock != earliestBlock) {
+                    assert earliestBlock.dominates(currentBlock);
+                    Block t = currentBlock;
+                    while (t.getLoop() != null && t.getDominator() != null && earliestBlock.dominates(t)) {
+                        Block dom = t.getDominator();
+                        if (dom.getLoopDepth() < currentBlock.getLoopDepth() && blockToKillMap.get(dom).get(locid) == upperBound && earliestBlock.dominates(dom)) {
+                            printIterations(iterations, "moved out of loop, from " + currentBlock + " to " + dom);
+                            previousBlock = currentBlock = dom;
+                            outOfLoop = true;
+                        }
+                        t = dom;
+                    }
+                }
+
+                if (!outOfLoop && previousBlock.getBeginNode() instanceof MergeNode) {
+                    // merges kill locations right at the beginning of a block. if a merge is the
+                    // killing node, we assign it to the dominating node.
+
                     MergeNode merge = (MergeNode) previousBlock.getBeginNode();
-                    if (getMergeToKillMap().get(merge).containsKey(locid)) {
-                        printIterations(iterations, "kill by merge");
+                    Node killer = getMergeToKillMap().get(merge).get(locid);
+
+                    if (killer != null && killer == merge) {
+                        // check if we violate earliest schedule condition
+                        if (isAtEarliest) {
+                            printIterations(iterations, "earliest bound in merge: " + earliestBlock);
+                            return earliestBlock;
+                        }
+                        printIterations(iterations, "kill by merge: " + currentBlock);
                         return currentBlock;
                     }
                 }
 
-                printIterations(iterations, "regular kill");
+                // current block matches last access, that means the previous (dominated) block
+                // kills the location, therefore schedule read to previous block.
+                printIterations(iterations, "regular kill: " + previousBlock);
                 return previousBlock;
             }
 
-            if (upperBoundBlock == currentBlock) { // reached upper bound
-                printIterations(iterations, "upper bound");
-                return currentBlock;
+            if (isAtEarliest) {
+                printIterations(iterations, "earliest bound: " + earliestBlock);
+                return earliestBlock;
             }
 
-            if (earliestBlock == currentBlock) { // reached earliest block
-                printIterations(iterations, "earliest block");
+            if (upperBoundBlock == currentBlock) {
+                printIterations(iterations, "upper bound: " + currentBlock + ", previous: " + previousBlock);
                 return currentBlock;
             }
 
@@ -544,10 +605,8 @@
         }
     }
 
-    private void printIterations(int iterations, String desc) {
-        if (dumpSchedule) {
-            TTY.print("\niterations: %d,  %s\n\n", iterations, desc);
-        }
+    private static void printIterations(int iterations, String desc) {
+        Debug.printf("iterations: %d,  %s\n", iterations, desc);
     }
 
     /**
@@ -578,7 +637,7 @@
             }
         }
 
-        assert cdbc.block == null || earliestBlock(node).dominates(cdbc.block) : "failed to find correct latest schedule for " + node;
+        assert cdbc.block == null || earliestBlock(node).dominates(cdbc.block) : "failed to find correct latest schedule for " + node + ". cdbc: " + cdbc.block + ", earliest: " + earliestBlock(node);
         return cdbc.block;
     }
 
@@ -750,8 +809,9 @@
 
     private void sortNodesWithinBlocks(StructuredGraph graph, SchedulingStrategy strategy) {
         NodeBitMap visited = graph.createNodeBitMap();
+        NodeBitMap beforeLastLocation = graph.createNodeBitMap();
         for (Block b : cfg.getBlocks()) {
-            sortNodesWithinBlock(b, visited, strategy);
+            sortNodesWithinBlock(b, visited, beforeLastLocation, strategy);
             assert noDuplicatedNodesInBlock(b) : "duplicated nodes in " + b;
         }
     }
@@ -762,7 +822,7 @@
         return list.size() == hashset.size();
     }
 
-    private void sortNodesWithinBlock(Block b, NodeBitMap visited, SchedulingStrategy strategy) {
+    private void sortNodesWithinBlock(Block b, NodeBitMap visited, NodeBitMap beforeLastLocation, SchedulingStrategy strategy) {
         if (visited.isMarked(b.getBeginNode()) || cfg.blockFor(b.getBeginNode()) != b) {
             throw new SchedulingError();
         }
@@ -777,15 +837,37 @@
                 break;
             case LATEST:
             case LATEST_OUT_OF_LOOPS:
-                sortedInstructions = sortNodesWithinBlockLatest(b, visited);
+                sortedInstructions = sortNodesWithinBlockLatest(b, visited, beforeLastLocation);
                 break;
             default:
                 throw new GraalInternalError("unknown scheduling strategy");
         }
+        assert filterSchedulableNodes(blockToNodesMap.get(b)).size() == removeProxies(sortedInstructions).size() : "sorted block does not contain the same amount of nodes: " +
+                        filterSchedulableNodes(blockToNodesMap.get(b)) + " vs. " + removeProxies(sortedInstructions);
         assert sameOrderForFixedNodes(blockToNodesMap.get(b), sortedInstructions) : "fixed nodes in sorted block are not in the same order";
         blockToNodesMap.put(b, sortedInstructions);
     }
 
+    private static List<ScheduledNode> removeProxies(List<ScheduledNode> list) {
+        List<ScheduledNode> result = new ArrayList<>();
+        for (ScheduledNode n : list) {
+            if (!(n instanceof ProxyNode)) {
+                result.add(n);
+            }
+        }
+        return result;
+    }
+
+    private static List<ScheduledNode> filterSchedulableNodes(List<ScheduledNode> list) {
+        List<ScheduledNode> result = new ArrayList<>();
+        for (ScheduledNode n : list) {
+            if (!(n instanceof LocalNode) && !(n instanceof PhiNode)) {
+                result.add(n);
+            }
+        }
+        return result;
+    }
+
     private static boolean sameOrderForFixedNodes(List<ScheduledNode> fixed, List<ScheduledNode> sorted) {
         Iterator<ScheduledNode> fixedIterator = fixed.iterator();
         Iterator<ScheduledNode> sortedIterator = sorted.iterator();
@@ -813,12 +895,10 @@
      * all inputs. This means that a node is added to the list after all its inputs have been
      * processed.
      */
-    private List<ScheduledNode> sortNodesWithinBlockLatest(Block b, NodeBitMap visited) {
+    private List<ScheduledNode> sortNodesWithinBlockLatest(Block b, NodeBitMap visited, NodeBitMap beforeLastLocation) {
         List<ScheduledNode> instructions = blockToNodesMap.get(b);
         List<ScheduledNode> sortedInstructions = new ArrayList<>(blockToNodesMap.get(b).size() + 2);
         List<FloatingReadNode> reads = new ArrayList<>();
-        // TODO: need bitmap for just within a block
-        NodeBitMap beforeLastLocation = cfg.graph.createNodeBitMap();
 
         if (memsched == MemoryScheduling.OPTIMAL) {
             /*