changeset 10896:8106edbdeac9

add NewMemoryAwareScheduling (GRAAL-159)
author Bernhard Urban <bernhard.urban@jku.at>
date Fri, 26 Jul 2013 20:18:46 +0200
parents 0aba970c89f9
children 9c4f90e48c60
files graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/GraphScheduleTest.java graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/MemoryScheduleTest.java graal/com.oracle.graal.phases/src/com/oracle/graal/phases/graph/ScheduledNodeIterator.java graal/com.oracle.graal.phases/src/com/oracle/graal/phases/schedule/SchedulePhase.java
diffstat 4 files changed, 543 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/GraphScheduleTest.java	Fri Jul 26 20:18:42 2013 +0200
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/GraphScheduleTest.java	Fri Jul 26 20:18:46 2013 +0200
@@ -36,6 +36,10 @@
     protected void assertOrderedAfterSchedule(StructuredGraph graph, Node a, Node b) {
         SchedulePhase ibp = new SchedulePhase();
         ibp.apply(graph);
+        assertOrderedAfterSchedule(ibp, a, b);
+    }
+
+    protected void assertOrderedAfterSchedule(SchedulePhase ibp, Node a, Node b) {
         NodeMap<Block> nodeToBlock = ibp.getCFG().getNodeToBlock();
         Block bBlock = nodeToBlock.get(b);
         Block aBlock = nodeToBlock.get(a);
--- a/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/MemoryScheduleTest.java	Fri Jul 26 20:18:42 2013 +0200
+++ b/graal/com.oracle.graal.compiler.test/src/com/oracle/graal/compiler/test/MemoryScheduleTest.java	Fri Jul 26 20:18:46 2013 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 import static org.junit.Assert.*;
 
+import java.util.*;
 import java.util.concurrent.*;
 
 import org.junit.*;
@@ -31,13 +32,17 @@
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
+import com.oracle.graal.graph.iterators.*;
 import com.oracle.graal.nodes.*;
+import com.oracle.graal.nodes.cfg.*;
 import com.oracle.graal.nodes.extended.*;
 import com.oracle.graal.nodes.spi.Lowerable.LoweringType;
 import com.oracle.graal.nodes.util.*;
 import com.oracle.graal.phases.*;
 import com.oracle.graal.phases.common.*;
 import com.oracle.graal.phases.schedule.*;
+import com.oracle.graal.phases.schedule.SchedulePhase.MemoryScheduling;
+import com.oracle.graal.phases.schedule.SchedulePhase.SchedulingStrategy;
 import com.oracle.graal.phases.tiers.*;
 
 /**
@@ -59,9 +64,12 @@
         public int a;
         public int b;
         public int c;
+
+        public Object obj;
     }
 
     private static final Container container = new Container();
+    private static final List<Container> containerList = new ArrayList<>();
 
     /**
      * In this test the read should be scheduled before the write.
@@ -77,8 +85,10 @@
     @Test
     public void testSimple() {
         for (TestMode mode : TestMode.values()) {
-            SchedulePhase schedule = getFinalSchedule("testSimpleSnippet", mode);
-            assertReadAfterWrite(schedule, false);
+            SchedulePhase schedule = getFinalSchedule("testSimpleSnippet", mode, MemoryScheduling.OPTIMAL, false);
+            StructuredGraph graph = schedule.getCFG().graph;
+            assertReadAndWriteInSameBlock(schedule, true);
+            assertOrderedAfterSchedule(schedule, graph.getNodes().filter(FloatingReadNode.class).first(), graph.getNodes().filter(WriteNode.class).first());
         }
     }
 
@@ -100,8 +110,9 @@
     @Test
     public void testSplit1() {
         for (TestMode mode : TestMode.values()) {
-            SchedulePhase schedule = getFinalSchedule("testSplitSnippet1", mode);
+            SchedulePhase schedule = getFinalSchedule("testSplitSnippet1", mode, MemoryScheduling.OPTIMAL, false);
             assertReadWithinStartBlock(schedule, true);
+            assertReadWithinReturnBlock(schedule, false);
         }
     }
 
@@ -122,8 +133,9 @@
 
     @Test
     public void testSplit2() {
-        SchedulePhase schedule = getFinalSchedule("testSplit2Snippet", TestMode.WITHOUT_FRAMESTATES);
+        SchedulePhase schedule = getFinalSchedule("testSplit2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
         assertReadWithinStartBlock(schedule, false);
+        assertReadWithinReturnBlock(schedule, true);
     }
 
     /**
@@ -145,9 +157,10 @@
 
     @Test
     public void testLoop1() {
-        SchedulePhase schedule = getFinalSchedule("testLoop1Snippet", TestMode.WITHOUT_FRAMESTATES);
+        SchedulePhase schedule = getFinalSchedule("testLoop1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
         assertEquals(6, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, true);
+        assertReadWithinReturnBlock(schedule, false);
     }
 
     /**
@@ -169,9 +182,10 @@
 
     @Test
     public void testLoop2() {
-        SchedulePhase schedule = getFinalSchedule("testLoop2Snippet", TestMode.WITHOUT_FRAMESTATES);
+        SchedulePhase schedule = getFinalSchedule("testLoop2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
         assertEquals(6, schedule.getCFG().getBlocks().length);
         assertReadWithinStartBlock(schedule, false);
+        assertReadWithinReturnBlock(schedule, true);
     }
 
     /**
@@ -184,23 +198,204 @@
 
     @Test
     public void testArrayCopy() {
-        SchedulePhase schedule = getFinalSchedule("testArrayCopySnippet", TestMode.INLINED_WITHOUT_FRAMESTATES);
+        SchedulePhase schedule = getFinalSchedule("testArrayCopySnippet", TestMode.INLINED_WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
         StructuredGraph graph = schedule.getCFG().getStartBlock().getBeginNode().graph();
         ReturnNode ret = graph.getNodes(ReturnNode.class).first();
         assertTrue(ret.result() instanceof FloatingReadNode);
         assertEquals(schedule.getCFG().blockFor(ret), schedule.getCFG().blockFor(ret.result()));
+        assertReadWithinReturnBlock(schedule, true);
+    }
+
+    /**
+     * Here the read should not float to the end.
+     */
+    public static int testIfRead1Snippet(int a) {
+        int res = container.a;
+        if (a < 0) {
+            container.a = 10;
+        }
+        return res;
+    }
+
+    @Test
+    public void testIfRead1() {
+        SchedulePhase schedule = getFinalSchedule("testIfRead1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        assertEquals(4, schedule.getCFG().getBlocks().length);
+        assertReadWithinStartBlock(schedule, true);
+        assertReadAndWriteInSameBlock(schedule, false);
+    }
+
+    /**
+     * Here the read should float in the else block.
+     */
+    public static int testIfRead2Snippet(int a) {
+        int res = 0;
+        if (a < 0) {
+            container.a = 10;
+        } else {
+            res = container.a;
+        }
+        return res;
+    }
+
+    @Test
+    public void testIfRead2() {
+        SchedulePhase schedule = getFinalSchedule("testIfRead2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        assertEquals(4, schedule.getCFG().getBlocks().length);
+        assertEquals(1, schedule.getCFG().graph.getNodes().filter(FloatingReadNode.class).count());
+        assertReadWithinStartBlock(schedule, false);
+        assertReadWithinReturnBlock(schedule, false);
+        assertReadAndWriteInSameBlock(schedule, false);
+    }
+
+    /**
+     * Here the read should float to the end, right before the write.
+     */
+    public static int testIfRead3Snippet(int a) {
+        if (a < 0) {
+            container.a = 10;
+        }
+        int res = container.a;
+        container.a = 20;
+        return res;
+    }
+
+    @Test
+    public void testIfRead3() {
+        SchedulePhase schedule = getFinalSchedule("testIfRead3Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        assertEquals(4, schedule.getCFG().getBlocks().length);
+        assertReadWithinStartBlock(schedule, false);
+        assertReadWithinReturnBlock(schedule, true);
+    }
+
+    /**
+     * Here the read should be just in the if branch (with the write).
+     */
+    public static int testIfRead4Snippet(int a) {
+        if (a > 0) {
+            int res = container.a;
+            container.a = 0x20;
+            return res;
+        } else {
+            return 0x10;
+        }
+    }
+
+    @Test
+    public void testIfRead4() {
+        SchedulePhase schedule = getFinalSchedule("testIfRead4Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        assertEquals(4, schedule.getCFG().getBlocks().length);
+        assertReadWithinStartBlock(schedule, false);
+        assertReadWithinReturnBlock(schedule, false);
+        assertReadAndWriteInSameBlock(schedule, true);
     }
 
-    private void assertReadAfterWrite(SchedulePhase schedule, boolean readAfterWrite) {
-        boolean writeEncountered = false;
+    /**
+     * testing scheduling within a block.
+     */
+    public static int testBlockScheduleSnippet() {
+        int res = 0;
+        container.a = 0x00;
+        container.a = 0x10;
+        container.a = 0x20;
+        container.a = 0x30;
+        container.a = 0x40;
+        res = container.a;
+        container.a = 0x50;
+        container.a = 0x60;
+        container.a = 0x70;
+        return res;
+    }
+
+    @Test
+    public void testBlockSchedule() {
+        SchedulePhase schedule = getFinalSchedule("testBlockScheduleSnippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        StructuredGraph graph = schedule.getCFG().graph;
+        NodeIterable<WriteNode> writeNodes = graph.getNodes().filter(WriteNode.class);
+
         assertEquals(1, schedule.getCFG().getBlocks().length);
-        for (Node node : schedule.getBlockToNodesMap().get(schedule.getCFG().getStartBlock())) {
-            if (node instanceof WriteNode) {
-                writeEncountered = true;
-            } else if (node instanceof FloatingReadNode) {
-                assertEquals(readAfterWrite, writeEncountered);
+        assertEquals(8, writeNodes.count());
+        assertEquals(1, graph.getNodes().filter(FloatingReadNode.class).count());
+
+        FloatingReadNode read = graph.getNodes().filter(FloatingReadNode.class).first();
+
+        WriteNode[] writes = new WriteNode[8];
+        int i = 0;
+        for (WriteNode n : writeNodes) {
+            writes[i] = n;
+            i++;
+        }
+        assertOrderedAfterSchedule(schedule, writes[4], read);
+        assertOrderedAfterSchedule(schedule, read, writes[5]);
+        for (int j = 0; j < 7; j++) {
+            assertOrderedAfterSchedule(schedule, writes[j], writes[j + 1]);
+        }
+    }
+
+    /*
+     * read of field a should be in first block, read of field b in loop begin block
+     */
+    public static void testProxy1Snippet() {
+        while (container.a < container.b) {
+            container.b--;
+        }
+        container.b++;
+    }
+
+    @Test
+    public void testProxy1() {
+        SchedulePhase schedule = getFinalSchedule("testProxy1Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        assertReadWithinStartBlock(schedule, true); // read of container.a should be in start block
+        /*
+         * read of container.b for increment operation should be in return block. TODO: not sure
+         * though, could be replaced by read of container.b of the loop header...
+         */
+        assertReadWithinReturnBlock(schedule, true);
+    }
+
+    public static void testProxy2Snippet() {
+        while (container.a < container.b) {
+            List<Container> list = new ArrayList<>(containerList);
+            while (container.c < list.size()) {
+                if (container.obj != null) {
+                    return;
+                }
+                container.c++;
+            }
+            container.a = 0;
+            container.b--;
+        }
+        container.b++;
+    }
+
+    @Test
+    public void testProxy2() {
+        SchedulePhase schedule = getFinalSchedule("testProxy2Snippet", TestMode.WITHOUT_FRAMESTATES, MemoryScheduling.OPTIMAL, false);
+        assertReadWithinStartBlock(schedule, false);
+        assertReadWithinReturnBlock(schedule, false);
+    }
+
+    private void assertReadWithinReturnBlock(SchedulePhase schedule, boolean withinReturnBlock) {
+        StructuredGraph graph = schedule.getCFG().graph;
+        assertEquals(graph.getNodes().filter(ReturnNode.class).count(), 1);
+
+        Block end = null;
+        outer: for (Block b : schedule.getCFG().getBlocks()) {
+            for (Node n : b.getNodes()) {
+                if (n instanceof ReturnNode) {
+                    end = b;
+                    break outer;
+                }
             }
         }
+        assertNotNull("no block with ReturnNode found", end);
+        boolean readEncountered = false;
+        for (Node node : schedule.getBlockToNodesMap().get(end)) {
+            if (node instanceof FloatingReadNode) {
+                readEncountered = true;
+            }
+        }
+        assertEquals(readEncountered, withinReturnBlock);
     }
 
     private void assertReadWithinStartBlock(SchedulePhase schedule, boolean withinStartBlock) {
@@ -213,7 +408,14 @@
         assertEquals(withinStartBlock, readEncountered);
     }
 
-    private SchedulePhase getFinalSchedule(final String snippet, final TestMode mode) {
+    private static void assertReadAndWriteInSameBlock(SchedulePhase schedule, boolean inSame) {
+        StructuredGraph graph = schedule.getCFG().graph;
+        FloatingReadNode read = graph.getNodes().filter(FloatingReadNode.class).first();
+        WriteNode write = graph.getNodes().filter(WriteNode.class).first();
+        assertTrue(!(inSame ^ schedule.getCFG().blockFor(read) == schedule.getCFG().blockFor(write)));
+    }
+
+    private SchedulePhase getFinalSchedule(final String snippet, final TestMode mode, final MemoryScheduling memsched, final boolean printSchedule) {
         final StructuredGraph graph = parse(snippet);
         return Debug.scope("FloatingReadTest", graph, new Callable<SchedulePhase>() {
 
@@ -237,12 +439,14 @@
                         }
                     }
                 }
+                Debug.dump(graph, "after removal of framestates");
+
                 new FloatingReadPhase().apply(graph);
-
                 new RemoveValueProxyPhase().apply(graph);
 
-                SchedulePhase schedule = new SchedulePhase();
+                SchedulePhase schedule = new SchedulePhase(SchedulingStrategy.LATEST_OUT_OF_LOOPS, memsched, printSchedule);
                 schedule.apply(graph);
+                assertEquals(1, graph.getNodes().filter(StartNode.class).count());
                 return schedule;
             }
         });
--- a/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/graph/ScheduledNodeIterator.java	Fri Jul 26 20:18:42 2013 +0200
+++ b/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/graph/ScheduledNodeIterator.java	Fri Jul 26 20:18:46 2013 +0200
@@ -60,6 +60,7 @@
             }
             processNode(node);
         }
+        assert reconnect == null;
     }
 
     protected void insert(FixedNode start, FixedWithNextNode end) {
--- a/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/schedule/SchedulePhase.java	Fri Jul 26 20:18:42 2013 +0200
+++ b/graal/com.oracle.graal.phases/src/com/oracle/graal/phases/schedule/SchedulePhase.java	Fri Jul 26 20:18:46 2013 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -173,6 +173,78 @@
         }
     }
 
+    private class NewMemoryScheduleClosure extends BlockIteratorClosure<Map<LocationIdentity, Node>> {
+
+        @Override
+        protected Map<LocationIdentity, Node> getInitialState() {
+            return cloneState(blockToKillMap.get(getCFG().getStartBlock()));
+        }
+
+        @Override
+        protected Map<LocationIdentity, Node> processBlock(Block block, Map<LocationIdentity, Node> currentState) {
+            Map<LocationIdentity, Node> initKillMap = getBlockToKillMap().get(block);
+            initKillMap.putAll(currentState);
+
+            for (Node node : block.getNodes()) {
+                if (node instanceof MemoryCheckpoint.Single) {
+                    LocationIdentity identity = ((MemoryCheckpoint.Single) node).getLocationIdentity();
+                    initKillMap.put(identity, node);
+                } else if (node instanceof MemoryCheckpoint.Multi) {
+                    for (LocationIdentity identity : ((MemoryCheckpoint.Multi) node).getLocationIdentities()) {
+                        initKillMap.put(identity, node);
+                    }
+                }
+                assert MemoryCheckpoint.TypeAssertion.correctType(node);
+            }
+
+            return cloneState(initKillMap);
+        }
+
+        @Override
+        protected Map<LocationIdentity, Node> merge(Block merge, List<Map<LocationIdentity, Node>> states) {
+            assert merge.getBeginNode() instanceof MergeNode;
+            MergeNode mergeNode = (MergeNode) merge.getBeginNode();
+
+            Map<LocationIdentity, Node> initKillMap = cloneState(getBlockToKillMap().get(merge));
+            for (Map<LocationIdentity, Node> state : states) {
+                for (LocationIdentity locid : state.keySet()) {
+                    if (initKillMap.containsKey(locid)) {
+                        if (!initKillMap.get(locid).equals(state.get(locid))) {
+                            initKillMap.put(locid, mergeNode);
+                        }
+                    } else {
+                        initKillMap.put(locid, state.get(locid));
+                    }
+                }
+            }
+
+            getMergeToKillMap().set(mergeNode, cloneState(initKillMap));
+            return initKillMap;
+        }
+
+        @Override
+        protected Map<LocationIdentity, Node> cloneState(Map<LocationIdentity, Node> state) {
+            return new HashMap<>(state);
+        }
+
+        @Override
+        protected List<Map<LocationIdentity, Node>> processLoop(Loop loop, Map<LocationIdentity, Node> state) {
+            LoopInfo<Map<LocationIdentity, Node>> info = ReentrantBlockIterator.processLoop(this, loop, new HashMap<>(state));
+
+            assert loop.header.getBeginNode() instanceof LoopBeginNode;
+            Map<LocationIdentity, Node> headerState = merge(loop.header, info.endStates);
+            getBlockToKillMap().put(loop.header, headerState);
+
+            for (Map<LocationIdentity, Node> exitState : info.exitStates) {
+                for (LocationIdentity key : headerState.keySet()) {
+                    exitState.put(key, headerState.get(key));
+                }
+            }
+
+            return info.exitStates;
+        }
+    }
+
     private ControlFlowGraph cfg;
     private NodeMap<Block> earliestCache;
 
@@ -180,10 +252,13 @@
      * Map from blocks to the nodes in each block.
      */
     private BlockMap<List<ScheduledNode>> blockToNodesMap;
+    private BlockMap<Map<LocationIdentity, Node>> blockToKillMap;
+    private NodeMap<Map<LocationIdentity, Node>> mergeToKillMap;
     private final Map<FloatingNode, List<FixedNode>> phantomUsages = new IdentityHashMap<>();
     private final Map<FixedNode, List<FloatingNode>> phantomInputs = new IdentityHashMap<>();
+    private final SchedulingStrategy selectedStrategy;
     private final MemoryScheduling memsched;
-    private final SchedulingStrategy selectedStrategy;
+    private final boolean dumpSchedule;
 
     public SchedulePhase() {
         this(OptScheduleOutOfLoops.getValue() ? SchedulingStrategy.LATEST_OUT_OF_LOOPS : SchedulingStrategy.LATEST);
@@ -201,11 +276,13 @@
             this.memsched = MemoryScheduling.NONE;
         }
         this.selectedStrategy = strategy;
+        this.dumpSchedule = false;
     }
 
-    public SchedulePhase(SchedulingStrategy strategy, MemoryScheduling memsched) {
+    public SchedulePhase(SchedulingStrategy strategy, MemoryScheduling memsched, boolean dumpSchedule) {
         this.selectedStrategy = strategy;
         this.memsched = memsched;
+        this.dumpSchedule = dumpSchedule;
     }
 
     @Override
@@ -226,14 +303,98 @@
 
             assignBlockToNodes(graph, selectedStrategy);
             sortNodesWithinBlocks(graph, selectedStrategy);
+            printSchedule("after sorting nodes within blocks");
         } else if (memsched == MemoryScheduling.OPTIMAL && selectedStrategy != SchedulingStrategy.EARLIEST && graph.getNodes(FloatingReadNode.class).isNotEmpty()) {
-            // TODO
+            mergeToKillMap = graph.createNodeMap();
+
+            blockToKillMap = new BlockMap<>(cfg);
+            for (Block b : cfg.getBlocks()) {
+                blockToKillMap.put(b, new HashMap<LocationIdentity, Node>());
+            }
+
+            // initialize killMaps with lastLocationAccess
+            for (FloatingReadNode n : graph.getNodes(FloatingReadNode.class)) {
+                if (n.location().getLocationIdentity() == FINAL_LOCATION) {
+                    continue;
+                }
+                Node first = n.lastLocationAccess();
+                assert first != null;
+
+                Map<LocationIdentity, Node> killMap = blockToKillMap.get(forKillLocation(first));
+                killMap.put(n.location().getLocationIdentity(), first);
+            }
+
+            // distribute and compute killMaps for all blocks
+            NewMemoryScheduleClosure closure = new NewMemoryScheduleClosure();
+            ReentrantBlockIterator.apply(closure, getCFG().getStartBlock());
+            printSchedule("after computing killMaps");
+
+            assignBlockToNodes(graph, selectedStrategy);
+            printSchedule("after assign nodes to blocks");
+
+            sortNodesWithinBlocks(graph, selectedStrategy);
+            printSchedule("after sorting nodes within blocks");
         } else {
             assignBlockToNodes(graph, selectedStrategy);
             sortNodesWithinBlocks(graph, selectedStrategy);
         }
     }
 
+    private Block forKillLocation(Node n) {
+        Block b = cfg.getNodeToBlock().get(n);
+        assert b != null : "all lastAccess locations should have a block assignment from CFG";
+        return b;
+    }
+
+    private void printSchedule(String desc) {
+        if (dumpSchedule) {
+            TTY.print("=== %s / %s / %s (%s) ===\n", getCFG().getStartBlock().getBeginNode().graph(), selectedStrategy, memsched, desc);
+            for (Block b : getCFG().getBlocks()) {
+                TTY.print("==== b: %s. ", b);
+                TTY.print("dom: %s. ", b.getDominator());
+                TTY.print("post-dom: %s. ", b.getPostdominator());
+                TTY.print("preds: %s. ", b.getPredecessors());
+                TTY.print("succs: %s ====\n", b.getSuccessors());
+                BlockMap<Map<LocationIdentity, Node>> killMaps = getBlockToKillMap();
+                if (killMaps != null) {
+                    TTY.print("X block kills: \n");
+                    for (LocationIdentity locId : killMaps.get(b).keySet()) {
+                        TTY.print("X %s killed by %s\n", locId, killMaps.get(b).get(locId));
+                    }
+                }
+
+                if (getBlockToNodesMap().get(b) != null) {
+                    for (Node n : nodesFor(b)) {
+                        printNode(n);
+                    }
+                } else {
+                    for (Node n : b.getNodes()) {
+                        printNode(n);
+                    }
+                }
+            }
+            TTY.print("\n\n");
+        }
+    }
+
+    private static void printNode(Node n) {
+        TTY.print("%s", n);
+        if (n instanceof MemoryCheckpoint.Single) {
+            TTY.print(" // kills %s", ((MemoryCheckpoint.Single) n).getLocationIdentity());
+        } else if (n instanceof MemoryCheckpoint.Multi) {
+            TTY.print(" // kills ");
+            for (LocationIdentity locid : ((MemoryCheckpoint.Multi) n).getLocationIdentities()) {
+                TTY.print("%s, ", locid);
+            }
+        } else if (n instanceof FloatingReadNode) {
+            FloatingReadNode frn = (FloatingReadNode) n;
+            TTY.print(" // from %s", frn.location().getLocationIdentity());
+            TTY.print(", lastAccess: %s", frn.lastLocationAccess());
+            TTY.print(", object: %s", frn.object());
+        }
+        TTY.print("\n");
+    }
+
     public ControlFlowGraph getCFG() {
         return cfg;
     }
@@ -245,6 +406,14 @@
         return blockToNodesMap;
     }
 
+    public BlockMap<Map<LocationIdentity, Node>> getBlockToKillMap() {
+        return blockToKillMap;
+    }
+
+    public NodeMap<Map<LocationIdentity, Node>> getMergeToKillMap() {
+        return mergeToKillMap;
+    }
+
     /**
      * Gets the nodes in a given block.
      */
@@ -295,16 +464,21 @@
                 break;
             case LATEST:
             case LATEST_OUT_OF_LOOPS:
-                block = latestBlock(node, strategy);
+                if (memsched == MemoryScheduling.OPTIMAL && node instanceof FloatingReadNode) {
+                    block = optimalBlock((FloatingReadNode) node, strategy);
+                } else {
+                    block = latestBlock(node, strategy);
+                }
                 if (block == null) {
                     block = earliestBlock(node);
                 } else if (strategy == SchedulingStrategy.LATEST_OUT_OF_LOOPS && !(node instanceof VirtualObjectNode)) {
                     // schedule at the latest position possible in the outermost loop possible
                     Block earliestBlock = earliestBlock(node);
+                    Block before = block;
                     block = scheduleOutOfLoops(node, block, earliestBlock);
                     if (!earliestBlock.dominates(block)) {
-                        throw new SchedulingError("%s: Graph cannot be scheduled : inconsistent for %s, %d usages, (%s needs to dominate %s)", node.graph(), node, node.usages().count(),
-                                        earliestBlock, block);
+                        throw new SchedulingError("%s: Graph cannot be scheduled : inconsistent for %s, %d usages, (%s needs to dominate %s (before %s))", node.graph(), node, node.usages().count(),
+                                        earliestBlock, block, before);
                     }
                 }
                 break;
@@ -315,6 +489,67 @@
         blockToNodesMap.get(block).add(node);
     }
 
+    // assign floating read to block according to kill maps
+    private Block optimalBlock(FloatingReadNode n, SchedulingStrategy strategy) {
+        assert memsched == MemoryScheduling.OPTIMAL;
+
+        LocationIdentity locid = n.location().getLocationIdentity();
+        if (locid == FINAL_LOCATION) {
+            return latestBlock(n, strategy);
+        }
+        Node upperBound = n.lastLocationAccess();
+        Block upperBoundBlock = forKillLocation(upperBound);
+        Block earliestBlock = earliestBlock(n);
+        assert upperBoundBlock.dominates(earliestBlock) : "upper bound (" + upperBoundBlock + ") should dominate earliest (" + earliestBlock + ")";
+
+        Block currentBlock = latestBlock(n, strategy);
+        assert currentBlock != null && earliestBlock.dominates(currentBlock) : "earliest (" + earliestBlock + ") should dominate latest block (" + currentBlock + ")";
+        Block previousBlock = currentBlock;
+
+        int iterations = 0;
+        // iterate the dominator tree until the last kill matches or reaching a upper bound
+        while (true) {
+            iterations++;
+            Node lastKill = blockToKillMap.get(currentBlock).get(locid);
+
+            if (lastKill.equals(upperBound)) {
+                // assign node to the block which kills the location
+
+                if (previousBlock.getBeginNode() instanceof MergeNode) {
+                    // merges kill locations right at the beginning of a block
+                    MergeNode merge = (MergeNode) previousBlock.getBeginNode();
+                    if (getMergeToKillMap().get(merge).containsKey(locid)) {
+                        printIterations(iterations, "kill by merge");
+                        return currentBlock;
+                    }
+                }
+
+                printIterations(iterations, "regular kill");
+                return previousBlock;
+            }
+
+            if (upperBoundBlock == currentBlock) { // reached upper bound
+                printIterations(iterations, "upper bound");
+                return currentBlock;
+            }
+
+            if (earliestBlock == currentBlock) { // reached earliest block
+                printIterations(iterations, "earliest block");
+                return currentBlock;
+            }
+
+            previousBlock = currentBlock;
+            currentBlock = currentBlock.getDominator();
+            assert currentBlock != null;
+        }
+    }
+
+    private void printIterations(int iterations, String desc) {
+        if (dumpSchedule) {
+            TTY.print("\niterations: %d,  %s\n\n", iterations, desc);
+        }
+    }
+
     /**
      * Calculates the last block that the given node could be scheduled in, i.e., the common
      * dominator of all usages. To do so all usages are also assigned to blocks.
@@ -343,6 +578,7 @@
             }
         }
 
+        assert cdbc.block == null || earliestBlock(node).dominates(cdbc.block) : "failed to find correct latest schedule for " + node;
         return cdbc.block;
     }
 
@@ -516,9 +752,16 @@
         NodeBitMap visited = graph.createNodeBitMap();
         for (Block b : cfg.getBlocks()) {
             sortNodesWithinBlock(b, visited, strategy);
+            assert noDuplicatedNodesInBlock(b) : "duplicated nodes in " + b;
         }
     }
 
+    private boolean noDuplicatedNodesInBlock(Block b) {
+        List<ScheduledNode> list = blockToNodesMap.get(b);
+        HashSet<ScheduledNode> hashset = new HashSet<>(list);
+        return list.size() == hashset.size();
+    }
+
     private void sortNodesWithinBlock(Block b, NodeBitMap visited, SchedulingStrategy strategy) {
         if (visited.isMarked(b.getBeginNode()) || cfg.blockFor(b.getBeginNode()) != b) {
             throw new SchedulingError();
@@ -573,10 +816,32 @@
     private List<ScheduledNode> sortNodesWithinBlockLatest(Block b, NodeBitMap visited) {
         List<ScheduledNode> instructions = blockToNodesMap.get(b);
         List<ScheduledNode> sortedInstructions = new ArrayList<>(blockToNodesMap.get(b).size() + 2);
+        List<FloatingReadNode> reads = new ArrayList<>();
+        // TODO: need bitmap for just within a block
+        NodeBitMap beforeLastLocation = cfg.graph.createNodeBitMap();
 
+        if (memsched == MemoryScheduling.OPTIMAL) {
+            /*
+             * TODO: add assert for invariant
+             * "floatingreads occur always after memory checkpoints in unsorted list"
+             */
+            for (ScheduledNode i : instructions) {
+                if (i instanceof FloatingReadNode) {
+                    FloatingReadNode frn = (FloatingReadNode) i;
+                    if (frn.location().getLocationIdentity() != FINAL_LOCATION) {
+                        reads.add(frn);
+                        if (nodesFor(b).contains(frn.lastLocationAccess())) {
+                            assert !beforeLastLocation.isMarked(frn);
+                            beforeLastLocation.mark(frn);
+                        }
+                    }
+                }
+            }
+        }
         for (ScheduledNode i : instructions) {
-            addToLatestSorting(b, i, sortedInstructions, visited);
+            addToLatestSorting(b, i, sortedInstructions, visited, reads, beforeLastLocation);
         }
+        assert reads.size() == 0 : "not all reads are scheduled";
 
         // Make sure that last node gets really last (i.e. when a frame state successor hangs off
         // it).
@@ -605,7 +870,23 @@
         return sortedInstructions;
     }
 
-    private void addUnscheduledToLatestSorting(Block b, VirtualState state, List<ScheduledNode> sortedInstructions, NodeBitMap visited) {
+    private void processKillLocation(Block b, Node node, LocationIdentity identity, List<ScheduledNode> sortedInstructions, NodeBitMap visited, List<FloatingReadNode> reads,
+                    NodeBitMap beforeLastLocation) {
+        for (FloatingReadNode frn : new ArrayList<>(reads)) { // TODO: change to iterator?
+            LocationIdentity readLocation = frn.location().getLocationIdentity();
+            assert readLocation != FINAL_LOCATION;
+            if (frn.lastLocationAccess() == node) {
+                assert identity == ANY_LOCATION || readLocation == identity : "location doesn't match: " + readLocation + ", " + identity;
+                beforeLastLocation.clear(frn);
+            } else if (!beforeLastLocation.isMarked(frn) && (readLocation == identity || (!(node instanceof StartNode) && ANY_LOCATION == identity))) {
+                // TODO: replace instanceof check with object identity check
+                reads.remove(frn);
+                addToLatestSorting(b, frn, sortedInstructions, visited, reads, beforeLastLocation);
+            }
+        }
+    }
+
+    private void addUnscheduledToLatestSorting(Block b, VirtualState state, List<ScheduledNode> sortedInstructions, NodeBitMap visited, List<FloatingReadNode> reads, NodeBitMap beforeLastLocation) {
         if (state != null) {
             // UnscheduledNodes should never be marked as visited.
             if (visited.isMarked(state)) {
@@ -614,15 +895,15 @@
 
             for (Node input : state.inputs()) {
                 if (input instanceof VirtualState) {
-                    addUnscheduledToLatestSorting(b, (VirtualState) input, sortedInstructions, visited);
+                    addUnscheduledToLatestSorting(b, (VirtualState) input, sortedInstructions, visited, reads, beforeLastLocation);
                 } else {
-                    addToLatestSorting(b, (ScheduledNode) input, sortedInstructions, visited);
+                    addToLatestSorting(b, (ScheduledNode) input, sortedInstructions, visited, reads, beforeLastLocation);
                 }
             }
         }
     }
 
-    private void addToLatestSorting(Block b, ScheduledNode i, List<ScheduledNode> sortedInstructions, NodeBitMap visited) {
+    private void addToLatestSorting(Block b, ScheduledNode i, List<ScheduledNode> sortedInstructions, NodeBitMap visited, List<FloatingReadNode> reads, NodeBitMap beforeLastLocation) {
         if (i == null || visited.isMarked(i) || cfg.getNodeToBlock().get(i) != b || i instanceof PhiNode || i instanceof LocalNode) {
             return;
         }
@@ -633,22 +914,40 @@
                 assert state == null;
                 state = (FrameState) input;
             } else {
-                addToLatestSorting(b, (ScheduledNode) input, sortedInstructions, visited);
+                addToLatestSorting(b, (ScheduledNode) input, sortedInstructions, visited, reads, beforeLastLocation);
             }
         }
         List<FloatingNode> inputs = phantomInputs.get(i);
         if (inputs != null) {
             for (FloatingNode input : inputs) {
-                addToLatestSorting(b, input, sortedInstructions, visited);
+                addToLatestSorting(b, input, sortedInstructions, visited, reads, beforeLastLocation);
             }
         }
 
-        addToLatestSorting(b, (ScheduledNode) i.predecessor(), sortedInstructions, visited);
+        if (memsched == MemoryScheduling.OPTIMAL && reads.size() != 0) {
+            if (i instanceof MemoryCheckpoint.Single) {
+                LocationIdentity identity = ((MemoryCheckpoint.Single) i).getLocationIdentity();
+                processKillLocation(b, i, identity, sortedInstructions, visited, reads, beforeLastLocation);
+            } else if (i instanceof MemoryCheckpoint.Multi) {
+                for (LocationIdentity identity : ((MemoryCheckpoint.Multi) i).getLocationIdentities()) {
+                    processKillLocation(b, i, identity, sortedInstructions, visited, reads, beforeLastLocation);
+                }
+            }
+            assert MemoryCheckpoint.TypeAssertion.correctType(i);
+        }
+
+        addToLatestSorting(b, (ScheduledNode) i.predecessor(), sortedInstructions, visited, reads, beforeLastLocation);
         visited.mark(i);
-        addUnscheduledToLatestSorting(b, state, sortedInstructions, visited);
+        addUnscheduledToLatestSorting(b, state, sortedInstructions, visited, reads, beforeLastLocation);
 
         // Now predecessors and inputs are scheduled => we can add this node.
-        sortedInstructions.add(i);
+        if (!sortedInstructions.contains(i)) {
+            sortedInstructions.add(i);
+        }
+
+        if (i instanceof FloatingReadNode) {
+            reads.remove(i);
+        }
     }
 
     /**