# HG changeset patch # User Gilles Duboscq # Date 1311950213 -7200 # Node ID 6c5242cd893025281ec1d99a04f3f8328938e517 # Parent d007292cdb703bd1c899a8cabef55d237a2ea046 Remterialization during scheduling, can take live range into account diff -r d007292cdb70 -r 6c5242cd8930 graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/GraalOptions.java --- a/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/GraalOptions.java Thu Jul 28 11:33:23 2011 +0200 +++ b/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/GraalOptions.java Fri Jul 29 16:36:53 2011 +0200 @@ -166,6 +166,7 @@ public static boolean OptReadElimination = ____; public static boolean OptGVN = ____; public static boolean Rematerialize = ____; + public static boolean SplitMaterialization = ____; public static boolean OptCanonicalizer = true; public static boolean OptLoops = ____; public static boolean OptOptimisticSchedule = ____; diff -r d007292cdb70 -r 6c5242cd8930 graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/debug/IdealGraphPrinter.java --- a/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/debug/IdealGraphPrinter.java Thu Jul 28 11:33:23 2011 +0200 +++ b/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/debug/IdealGraphPrinter.java Fri Jul 29 16:36:53 2011 +0200 @@ -121,12 +121,21 @@ stream.printf(" %n", escape(title)); noBlockNodes.clear(); IdentifyBlocksPhase schedule = null; - try { - schedule = new IdentifyBlocksPhase(true); - schedule.apply(graph, false, false); - } catch (Throwable t) { - // nothing to do here... - //t.printStackTrace(); + if (debugObjects != null) { + for (Object obj : debugObjects.values()) { + if (obj instanceof IdentifyBlocksPhase) { + schedule = (IdentifyBlocksPhase) obj; + } + } + } + if (schedule == null) { + try { + schedule = new IdentifyBlocksPhase(true, false); + schedule.apply(graph, false, false); + } catch (Throwable t) { + // nothing to do here... + //t.printStackTrace(); + } } List loops = null; try { diff -r d007292cdb70 -r 6c5242cd8930 graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/graph/IR.java --- a/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/graph/IR.java Thu Jul 28 11:33:23 2011 +0200 +++ b/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/graph/IR.java Fri Jul 29 16:36:53 2011 +0200 @@ -131,7 +131,7 @@ new GlobalValueNumberingPhase().apply(graph); if (GraalOptions.Rematerialize) { //new Rematerialization2Phase().apply(graph); - new RematerializationPhase().apply(graph); + //new RematerializationPhase().apply(graph); } graph.stopRecordModifications(); } @@ -143,7 +143,7 @@ graph.recordModifications(EdgeType.USAGES); new GlobalValueNumberingPhase().apply(graph); if (GraalOptions.Rematerialize) { - new RematerializationPhase().apply(graph); + //new RematerializationPhase().apply(graph); } graph.stopRecordModifications(); } @@ -156,6 +156,12 @@ schedule.apply(graph); compilation.stats.loopCount = schedule.loopCount(); + if (compilation.compiler.isObserved()) { + Map debug = new HashMap(); + debug.put("schedule", schedule); + compilation.compiler.fireCompilationEvent(new CompilationEvent(compilation, "After IdentifyBlocksPhase", graph, true, false, debug)); + } + List blocks = schedule.getBlocks(); List lirBlocks = new ArrayList(); diff -r d007292cdb70 -r 6c5242cd8930 graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/ir/LoopEnd.java --- a/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/ir/LoopEnd.java Thu Jul 28 11:33:23 2011 +0200 +++ b/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/ir/LoopEnd.java Fri Jul 29 16:36:53 2011 +0200 @@ -84,11 +84,6 @@ } @Override - public String toString() { - return "LoopEnd:" + super.toString(); - } - - @Override public Iterable< ? extends Node> dataInputs() { return Collections.emptyList(); } diff -r d007292cdb70 -r 6c5242cd8930 graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/schedule/Block.java --- a/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/schedule/Block.java Thu Jul 28 11:33:23 2011 +0200 +++ b/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/schedule/Block.java Fri Jul 29 16:36:53 2011 +0200 @@ -33,13 +33,15 @@ private int blockID; private final List successors = new ArrayList(); private final List predecessors = new ArrayList(); + private final List dominated = new ArrayList(); private List instructions = new ArrayList(); private Block dominator; private Block javaBlock; - private final List dominators = new ArrayList(); private Anchor anchor; + private EndNode end; private int loopDepth = 0; private int loopIndex = -1; + private double probability; private Node firstNode; private Node lastNode; @@ -53,6 +55,14 @@ this.anchor = null; } + public EndNode end() { + return end; + } + + public void setEnd(EndNode end) { + this.end = end; + } + public Block javaBlock() { return javaBlock; } @@ -81,6 +91,17 @@ loopIndex = i; } + public double probability() { + return probability; + } + + + public void setProbability(double probability) { + if (probability > this.probability) { + this.probability = probability; + } + } + public Anchor createAnchor() { if (anchor == null) { if (firstNode instanceof Anchor) { @@ -131,11 +152,11 @@ assert this.dominator == null; assert dominator != null; this.dominator = dominator; - dominator.dominators.add(this); + dominator.dominated.add(this); } - public List getDominators() { - return Collections.unmodifiableList(dominators); + public List getDominated() { + return Collections.unmodifiableList(dominated); } public List getInstructions() { diff -r d007292cdb70 -r 6c5242cd8930 graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/schedule/IdentifyBlocksPhase.java --- a/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/schedule/IdentifyBlocksPhase.java Thu Jul 28 11:33:23 2011 +0200 +++ b/graal/com.oracle.max.graal.compiler/src/com/oracle/max/graal/compiler/schedule/IdentifyBlocksPhase.java Fri Jul 29 16:36:53 2011 +0200 @@ -23,6 +23,7 @@ package com.oracle.max.graal.compiler.schedule; import java.util.*; +import java.util.Map.Entry; import com.oracle.max.graal.compiler.*; import com.oracle.max.graal.compiler.debug.*; @@ -33,15 +34,28 @@ public class IdentifyBlocksPhase extends Phase { + private static final double LIVE_RANGE_COST_FACTOR = 0.1; + private static final double MATERIALIZATION_COST_FACTOR = 1.0 - LIVE_RANGE_COST_FACTOR; + private final List blocks = new ArrayList(); private NodeMap nodeToBlock; private Graph graph; private boolean scheduleAllNodes; + private boolean splitMaterialization; private int loopCount; public IdentifyBlocksPhase(boolean scheduleAllNodes) { + this(scheduleAllNodes, GraalOptions.SplitMaterialization && filter()); + } + + public static boolean filter() { + return true; //GraalCompilation.compilation().method.name().contains("mergeOrClone"); + } + + public IdentifyBlocksPhase(boolean scheduleAllNodes, boolean splitMaterialization) { super(scheduleAllNodes ? "FullSchedule" : "PartSchedule", false); this.scheduleAllNodes = scheduleAllNodes; + this.splitMaterialization = splitMaterialization; } @@ -91,6 +105,10 @@ } } + if (n instanceof EndNode) { + assert b.end() == null || n == b.end(); + b.setEnd((EndNode) n); + } if (b.lastNode() == null) { b.setFirstNode(n); b.setLastNode(n); @@ -118,7 +136,7 @@ System.out.println(nodeToBlock); System.out.println("Blocks :"); for (Block b : blocks) { - System.out.println(b + " [S:" + b.getSuccessors() + ", P:" + b.getPredecessors() + ", D:" + b.getDominators()); + System.out.println(b + " [S:" + b.getSuccessors() + ", P:" + b.getPredecessors() + ", D:" + b.getDominated()); System.out.println(" f " + b.firstNode()); for (Node n : b.getInstructions()) { System.out.println(" - " + n); @@ -140,6 +158,9 @@ block = null; } block = assignBlockNew(currentNode, block); + if (currentNode instanceof FixedNode) { + block.setProbability(((FixedNode) currentNode).probability()); + } if (currentNode.predecessors().size() == 0) { // Either dead code or at a merge node => stop iteration. break; @@ -175,7 +196,7 @@ if (scheduleAllNodes) { computeLoopInformation(); // Will make the graph cyclic. - assignLatestPossibleBlockToNodes(); + assignBlockToNodes(); sortNodesWithinBlocks(); } else { computeJavaBlocks(); @@ -283,132 +304,344 @@ } } - private void assignLatestPossibleBlockToNodes() { + private void assignBlockToNodes() { for (Node n : graph.getNodes()) { - assignLatestPossibleBlockToNode(n); + assignBlockToNode(n); } } - private Block assignLatestPossibleBlockToNode(Node n) { + private void assignBlockToNode(Node n) { if (n == null) { - return null; + return; } assert !n.isDeleted(); Block prevBlock = nodeToBlock.get(n); if (prevBlock != null) { - return prevBlock; + return; } + Block earliest = earliestBlock(n); + //TTY.println("Earliest for " + n + " : " + earliest); + // if in CFG, schedule at the latest position possible in the outermost loop possible + // if floating, use rematerialization to place the node, it tries to compute the value only when it will be used, + // in the block with the smallest probability (outside of loops), while minimizing live ranges + int suxCount = 0; + for (Node sux : n.successors()) { + if (sux != null) { + suxCount++; + } + } + if (!splitMaterialization || noRematerialization(n) || n.predecessors().size() > 0 || suxCount > 0) { + Block latestBlock = latestBlock(n); + if (latestBlock == null) { + latestBlock = earliest; + } + //TTY.println("Latest for " + n + " : " + latestBlock); + Block block = scheduleOutOfLoops(n, latestBlock, earliest); + nodeToBlock.set(n, block); + block.getInstructions().add(n); + } else { + Map> usages = computeUsages(n, earliest); + if (usages.isEmpty()) { + nodeToBlock.set(n, earliest); + earliest.getInstructions().add(n); + } else { + maybeMaterialize(n, earliest, usages, false); + } + } + } + + private BitMap usagesBitMap(Set usages) { + BitMap usagesBitMap = new BitMap(blocks.size()); + for (Usage u : usages) { + usagesBitMap.set(u.block.blockID()); + } + return usagesBitMap; + } + + + private void maybeMaterialize(Node node, Block block, Map> usages, boolean materializedInDominator) { + Set blockUsages = usages.get(block); + if (blockUsages == null || blockUsages.isEmpty()) { + return; + } + boolean forced = false; + if (!materializedInDominator) { + for (Usage usage : blockUsages) { + if (usage.block == block) { + forced = true; + break; + } + } + } + if (forced || materializationCost(block, blockUsages) < materializationCostAtChildren(block, usages)) { + Node n; + if (nodeToBlock.get(node) == null) { + n = node; + } else { + n = node.copy(); + for (int i = 0; i < node.inputs().size(); i++) { + n.inputs().set(i, node.inputs().get(i)); + } + for (Usage usage : blockUsages) { + patch(usage, node, n); + } + TTY.println("> Rematerialized " + node + " (new node id = " + n.id() + ") in " + block); + GraalMetrics.Rematerializations++; + nodeToBlock.grow(n); + } + materializedInDominator = true; + nodeToBlock.set(n, block); + block.getInstructions().add(n); + } + if (!materializedInDominator || GraalOptions.Rematerialize) { + for (Block child : block.getDominated()) { + maybeMaterialize(node, child, usages, materializedInDominator); + } + } + } + + private double materializationCostAtChildren(Block block, Map> usages) { + double cost = 0.0; + for (Block child : block.getDominated()) { + Set blockUsages = usages.get(child); + if (blockUsages != null && !blockUsages.isEmpty()) { // XXX should never be empty if not null + cost += materializationCost(child, blockUsages); + } + } + return cost; + } + + private double materializationCost(Block block, Set usages) { + //TODO node cost + double blockProbability = block.probability(); + if (blockProbability <= 0.0) { + blockProbability = 0.01; + } + return LIVE_RANGE_COST_FACTOR * liveRange(block, usages) * MATERIALIZATION_COST_FACTOR * blockProbability * 1.0; + } + + + private Block latestBlock(Node n) { Block block = null; for (Node succ : n.successors()) { - block = getCommonDominator(block, assignLatestPossibleBlockToNode(succ)); + if (succ == null) { + continue; + } + assignBlockToNode(succ); + block = getCommonDominator(block, nodeToBlock.get(succ)); } + ensureScheduledUsages(n); for (Node usage : n.usages()) { - if (usage instanceof Phi) { - Phi phi = (Phi) usage; - Merge merge = phi.merge(); - Block mergeBlock = nodeToBlock.get(merge); - assert mergeBlock != null : "no block for merge " + merge.id(); - for (int i = 0; i < phi.valueCount(); ++i) { - if (phi.valueAt(i) == n) { - if (mergeBlock.getPredecessors().size() <= i) { - TTY.println(merge.toString()); - TTY.println(phi.toString()); - TTY.println(merge.phiPredecessors().toString()); - TTY.println(phi.inputs().toString()); - TTY.println("value count: " + phi.valueCount()); - } - block = getCommonDominator(block, mergeBlock.getPredecessors().get(i)); - } - } - } else if (usage instanceof FrameState && ((FrameState) usage).block() != null) { - Merge merge = ((FrameState) usage).block(); - for (Node pred : merge.cfgPredecessors()) { - block = getCommonDominator(block, nodeToBlock.get(pred)); - } - } else if (usage instanceof LoopCounter) { - LoopCounter counter = (LoopCounter) usage; - if (n == counter.init() || n == counter.stride()) { - LoopBegin loopBegin = counter.loopBegin(); - Block mergeBlock = nodeToBlock.get(loopBegin); - block = getCommonDominator(block, mergeBlock.dominator()); - } - } else { - block = getCommonDominator(block, assignLatestPossibleBlockToNode(usage)); + for (Block usageBlock : blocksForUsage(n, usage)) { + block = getCommonDominator(block, usageBlock); } } - - - if (block != null) { - if (GraalOptions.OptOptimisticSchedule) { - block = scheduleOutOfLoops(n, block); - } - nodeToBlock.set(n, block); - block.getInstructions().add(n); - } return block; } - private Block scheduleOutOfLoops(Node n, Block latestBlock) { - Block cur = latestBlock; - while (cur.loopDepth() != 0) { - if (cur.isLoopHeader()) { - assert cur.getPredecessors().size() == 2 : cur.getPredecessors().size(); - if (canSchedule(n, cur.getPredecessors().get(0))) { - // TTY.println("can schedule out of loop!" + n); - return scheduleOutOfLoops(n, cur.getPredecessors().get(0)); - } else { - break; - } + private Block earliestBlock(Node n) { + Block earliest = nodeToBlock.get(n); + if (earliest != null) { + return earliest; + } + BitMap bits = new BitMap(blocks.size()); + ArrayList before = new ArrayList(n.predecessors().size() + n.inputs().size()); + before.addAll(n.predecessors()); + before.addAll(n.inputs()); + for (Node pred : before) { + if (pred == null) { + continue; } - Block prev = cur; - cur = cur.dominator(); - - // This must be a loop exit. - if (cur.loopDepth() > prev.loopDepth()) { -// TTY.println("break out because of different loop depth"); - break; + Block b = earliestBlock(pred); + if (!bits.get(b.blockID())) { + earliest = b; + do { + bits.set(b.blockID()); + b = b.dominator(); + } while(b != null && !bits.get(b.blockID())); } } - return latestBlock; + if (earliest == null) { + Block start = nodeToBlock.get(graph.start()); + assert start != null; + return start; + } + return earliest; + } + + + private Block scheduleOutOfLoops(Node n, Block latestBlock, Block earliest) { + assert latestBlock != null : "no latest : " + n; + Block cur = latestBlock; + while (cur.loopDepth() != 0 && cur != earliest && cur.dominator() != null && cur.dominator().loopDepth() <= cur.loopDepth()) { + cur = cur.dominator(); + } + return cur; } - private boolean canSchedule(Node n, Block block) { - Set allowedBlocks = new HashSet(); - Block cur = block; - while (cur != null) { - allowedBlocks.add(cur); - cur = cur.dominator(); + + private Set blocksForUsage(Node node, Node usage) { + Set blocks = new HashSet(2); + if (usage instanceof Phi) { + Phi phi = (Phi) usage; + Merge merge = phi.merge(); + Block mergeBlock = nodeToBlock.get(merge); + assert mergeBlock != null : "no block for merge " + merge.id(); + for (int i = 0; i < phi.valueCount(); ++i) { + if (phi.valueAt(i) == node) { + if (mergeBlock.getPredecessors().size() <= i) { + TTY.println(merge.toString()); + TTY.println(phi.toString()); + TTY.println(merge.phiPredecessors().toString()); + TTY.println(phi.inputs().toString()); + TTY.println("value count: " + phi.valueCount()); + } + blocks.add(mergeBlock.getPredecessors().get(i)); + } + } + } else if (usage instanceof FrameState && ((FrameState) usage).block() != null) { + Merge merge = ((FrameState) usage).block(); + Block block = null; + for (Node pred : merge.cfgPredecessors()) { + block = getCommonDominator(block, nodeToBlock.get(pred)); + } + blocks.add(block); + } else if (usage instanceof LoopCounter) { + LoopCounter counter = (LoopCounter) usage; + if (node == counter.init() || node == counter.stride()) { + LoopBegin loopBegin = counter.loopBegin(); + Block mergeBlock = nodeToBlock.get(loopBegin); + blocks.add(mergeBlock.dominator()); + } + } else { + assignBlockToNode(usage); + blocks.add(nodeToBlock.get(usage)); } - // Now we know the allowed blocks for inputs and predecessors. - return checkNodesAbove(allowedBlocks, n); + return blocks; } - private boolean checkNodesAbove(Set allowedBlocks, Node n) { - if (n == null) { - return true; - } - - if (nodeToBlock.get(n) != null) { - return allowedBlocks.contains(nodeToBlock.get(n)); + private void patch(Usage usage, Node original, Node patch) { + if (usage.node instanceof Phi) { + Phi phi = (Phi) usage.node; + Node pred; + Block phiBlock = nodeToBlock.get(phi); + if (phiBlock.isLoopHeader()) { + LoopBegin loopBegin = (LoopBegin) phiBlock.firstNode(); + if (usage.block == phiBlock.dominator()) { + pred = loopBegin.forwardEdge(); + } else { + assert usage.block == nodeToBlock.get(loopBegin.loopEnd()); + pred = loopBegin.loopEnd(); + } + } else { + pred = usage.block.end(); + } + int index = phi.merge().phiPredecessorIndex(pred); + phi.setValueAt(index, (Value) patch); } else { - assert !(n instanceof Phi) : ((Phi) n).merge(); - for (Node input : n.inputs()) { - if (!checkNodesAbove(allowedBlocks, input)) { - return false; - } - } - for (Node pred : n.predecessors()) { - if (!checkNodesAbove(allowedBlocks, pred)) { - return false; - } - } - return true; + usage.node.inputs().replace(original, patch); } } + private void ensureScheduledUsages(Node node) { + // assign block to all usages to ensure that any splitting/rematerialization is done on them + List usages = new ArrayList(node.usages()); + for (Node usage : usages) { + assignBlockToNode(usage); + } + // now true usages are ready + } + + private Map> computeUsages(Node node, Block from) { //TODO use a List instead of a Set here ? + Map> blockUsages = new HashMap>(); + ensureScheduledUsages(node); + for (Node usage : node.dataUsages()) { + for (Block b : blocksForUsage(node, usage)) { + addUsageToTree(usage, b, from, blockUsages); + } + } + /*TTY.println("Usages for " + node + " from " + from); + for (Entry> entry : blockUsages.entrySet()) { + TTY.println(" - " + entry.getKey() + " :"); + for (Usage usage : entry.getValue()) { + TTY.println(" + " + usage.node + " in " + usage.block); + } + }*/ + return blockUsages; + } + + private void addUsageToTree(Node usage, Block usageBlock, Block from, Map> blockUsages) { + Block current = usageBlock; + while (current != null) { + Set usages = blockUsages.get(current); + if (usages == null) { + usages = new HashSet(); + blockUsages.put(current, usages); + } + usages.add(new Usage(usageBlock, usage)); + if (current == from) { + current = null; + } else { + current = current.dominator(); + } + } + } + + private double liveRange(Block from, Set usages) { + BitMap notInRange = new BitMap(blocks.size()); + BitMap inRange = new BitMap(blocks.size()); + DoubleHolder range = new DoubleHolder(); + markInRange(from, inRange, notInRange, usagesBitMap(usages), range); + return range.value; + } + + private boolean noRematerialization(Node n) { + return n instanceof Local || n instanceof LocationNode || n instanceof Constant || n instanceof StateSplit || n instanceof FrameState; + } + + private static class Usage { + public final Block block; + public final Node node; + public Usage(Block block, Node node) { + this.block = block; + this.node = node; + } + } + + private static class DoubleHolder{ + public double value; + } + + private static boolean markInRange(Block b, BitMap inRange, BitMap notInRange, BitMap usages, DoubleHolder range) { + int blockID = b.blockID(); + if (notInRange.get(blockID)) { + return false; + } + if (inRange.get(blockID)) { + return true; + } + boolean ret = false; + if (usages.get(blockID)) { + ret = true; + } + if (!b.isLoopEnd()) { + for (Block sux : b.getSuccessors()) { + if (markInRange(sux, inRange, notInRange, usages, range)) { + ret = true; + } + } + } + if (ret) { + inRange.set(blockID); + range.value += b.probability() * (b.getInstructions().size() + 2); + } else { + notInRange.set(blockID); + } + return ret; + } private Block getCommonDominator(Block a, Block b) { if (a == null) {