changeset 5786:f69a406355b2

new tail duplication phase
author Lukas Stadler <lukas.stadler@jku.at>
date Fri, 06 Jul 2012 16:28:12 +0200
parents e5f0cf5b5627
children 0017be5ba31e
files graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalCompiler.java graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalOptions.java graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/phases/TailDuplicationPhase.java graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/util/InliningUtil.java graal/com.oracle.graal.graph/src/com/oracle/graal/graph/NodeBitMap.java graal/com.oracle.graal.java/src/com/oracle/graal/java/GraphBuilderPhase.java graal/com.oracle.graal.lir/src/com/oracle/graal/lir/cfg/Block.java graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/PhiNode.java graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/ValueAnchorNode.java graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/util/GraphUtil.java
diffstat 10 files changed, 612 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalCompiler.java	Fri Jul 06 16:25:59 2012 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalCompiler.java	Fri Jul 06 16:28:12 2012 +0200
@@ -170,9 +170,17 @@
 
         new LoweringPhase(runtime, assumptions).apply(graph);
 
+        if (GraalOptions.OptTailDuplication) {
+            new TailDuplicationPhase().apply(graph);
+            if (GraalOptions.OptCanonicalizer) {
+                new CanonicalizerPhase(target, runtime, assumptions).apply(graph);
+            }
+        }
+
         if (GraalOptions.CullFrameStates) {
             new CullFrameStatesPhase().apply(graph);
         }
+
         new FloatingReadPhase().apply(graph);
         if (GraalOptions.OptGVN) {
             new GlobalValueNumberingPhase().apply(graph);
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalOptions.java	Fri Jul 06 16:25:59 2012 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/GraalOptions.java	Fri Jul 06 16:28:12 2012 +0200
@@ -71,6 +71,8 @@
     public static int     ForcedInlineEscapeWeight           = 10;
     public static boolean PrintEscapeAnalysis                = ____;
 
+    public static double TailDuplicationProbability          = 0.5;
+
     // absolute probability analysis
     public static boolean ProbabilityAnalysis                = true;
     public static int     LoopFrequencyPropagationPolicy     = -2;
@@ -210,6 +212,7 @@
     public static boolean OptLivenessAnalysis                = true;
     public static boolean OptLoopTransform                   = true;
     public static boolean OptSafepointElimination            = true;
+    public static boolean OptTailDuplication                 = true;
 
     /**
      * Insert a counter in the method prologue to track the most frequently called methods that were compiled by Graal.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/phases/TailDuplicationPhase.java	Fri Jul 06 16:28:12 2012 +0200
@@ -0,0 +1,521 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.compiler.phases;
+
+import java.util.*;
+
+import com.oracle.graal.compiler.*;
+import com.oracle.graal.debug.*;
+import com.oracle.graal.graph.Graph.DuplicationReplacement;
+import com.oracle.graal.graph.*;
+import com.oracle.graal.nodes.*;
+import com.oracle.graal.nodes.VirtualState.NodeClosure;
+import com.oracle.graal.nodes.extended.*;
+import com.oracle.graal.nodes.java.*;
+import com.oracle.graal.nodes.type.*;
+import com.oracle.graal.nodes.util.*;
+
+/**
+ * This class is a phase that looks for opportunities for tail duplication. The static method
+ * {@link #tailDuplicate(MergeNode, TailDuplicationDecision, List)} can also be used to drive tail duplication from
+ * other places, e.g., inlining.
+ */
+public class TailDuplicationPhase extends Phase {
+
+    /*
+     * Various metrics on the circumstances in which tail duplication was/wasn't performed.
+     */
+    private static final DebugMetric metricDuplicationMonitors = Debug.metric("DuplicationMonitors");
+    private static final DebugMetric metricDuplicationEnd = Debug.metric("DuplicationEnd");
+    private static final DebugMetric metricDuplicationEndPerformed = Debug.metric("DuplicationEndPerformed");
+    private static final DebugMetric metricDuplicationOther = Debug.metric("DuplicationOther");
+    private static final DebugMetric metricDuplicationOtherPerformed = Debug.metric("DuplicationOtherPerformed");
+
+    /**
+     * This interface is used by tail duplication to let clients decide if tail duplication should be performed.
+     */
+    public interface TailDuplicationDecision {
+
+        /**
+         * Queries if tail duplication should be performed at the given merge. If this method returns true then the tail
+         * duplication will be performed, because all other checks have happened before.
+         *
+         * @param merge The merge at which tail duplication can be performed.
+         * @param fixedNodeCount The size of the set of fixed nodes that forms the base for the duplicated set of nodes.
+         * @return true if the tail duplication should be performed, false otherwise.
+         */
+        boolean doTransform(MergeNode merge, int fixedNodeCount);
+    }
+
+    /**
+     * A tail duplication decision closure that employs the default algorithm: Check if there are any phis on the merge
+     * whose stamps improve and that have usages within the duplicated set of fixed nodes.
+     */
+    public static final TailDuplicationDecision DEFAULT_DECISION = new TailDuplicationDecision() {
+
+        public boolean doTransform(MergeNode merge, int fixedNodeCount) {
+            HashSet<PhiNode> improvements = new HashSet<>();
+            for (PhiNode phi : merge.phis()) {
+                Stamp phiStamp = phi.stamp();
+                for (ValueNode input : phi.values()) {
+                    if (!input.stamp().equals(phiStamp)) {
+                        improvements.add(phi);
+                        break;
+                    }
+                }
+            }
+            if (improvements.isEmpty()) {
+                return false;
+            }
+            FixedNode current = merge;
+            int opportunities = 0;
+            while (current instanceof FixedWithNextNode) {
+                current = ((FixedWithNextNode) current).next();
+                for (PhiNode phi : improvements) {
+                    if (current.inputs().contains(phi)) {
+                        opportunities++;
+                    }
+                }
+            }
+            return opportunities > 0;
+        }
+    };
+
+    /**
+     * A tail duplication decision closure that always returns true.
+     */
+    public static final TailDuplicationDecision TRUE_DECISION = new TailDuplicationDecision() {
+
+        @Override
+        public boolean doTransform(MergeNode merge, int fixedNodeCount) {
+            return true;
+        }
+    };
+
+    @Override
+    protected void run(StructuredGraph graph) {
+        // A snapshot is taken here, so that new MergeNode instances aren't considered for tail duplication.
+        for (MergeNode merge : graph.getNodes(MergeNode.class).snapshot()) {
+            if (!(merge instanceof LoopBeginNode) && merge.probability() >= GraalOptions.TailDuplicationProbability) {
+                tailDuplicate(merge, DEFAULT_DECISION, null);
+            }
+        }
+    }
+
+    /**
+     * This method attempts to duplicate the tail of the given merge. The merge must not be a {@link LoopBeginNode}. If
+     * the merge is eligible for duplication (at least one fixed node in its tail, no {@link MonitorEnterNode}/
+     * {@link MonitorExitNode}, non-null {@link MergeNode#stateAfter()}) then the decision callback is used to determine
+     * whether the tail duplication should actually be performed. If replacements is non-null, then this list of
+     * {@link PiNode}s is used to replace one value per merge end.
+     *
+     * @param merge The merge whose tail should be duplicated.
+     * @param decision A callback that can make the final decision if tail duplication should occur or not.
+     * @param replacements A list of {@link PiNode}s, or null. If this list is non-null then its size needs to match the
+     *            merge's end count. Each entry can either be null or a {@link PiNode}, and is used to replace
+     *            {@link PiNode#object()} with the {@link PiNode} in the duplicated branch that corresponds to the
+     *            entry.
+     */
+    public static void tailDuplicate(MergeNode merge, TailDuplicationDecision decision, List<PiNode> replacements) {
+        assert !(merge instanceof LoopBeginNode);
+        assert replacements == null || replacements.size() == merge.forwardEndCount();
+        FixedNode fixed = merge;
+        int fixedCount = 0;
+        boolean containsMonitor = false;
+        while (fixed instanceof FixedWithNextNode) {
+            if (fixed instanceof MonitorEnterNode || fixed instanceof MonitorExitNode) {
+                containsMonitor = true;
+            }
+            fixed = ((FixedWithNextNode) fixed).next();
+            fixedCount++;
+        }
+        if (containsMonitor) {
+            // cannot currently be handled
+            // TODO (ls) re-evaluate this limitation after changes to the lock representation and the LIR generator
+            metricDuplicationMonitors.increment();
+        } else if (fixedCount > 1) {
+            if (fixed instanceof EndNode && !(((EndNode) fixed).merge() instanceof LoopBeginNode)) {
+                metricDuplicationEnd.increment();
+                if (decision.doTransform(merge, fixedCount)) {
+                    metricDuplicationEndPerformed.add(fixedCount);
+                    new DuplicationOperation(merge, replacements).duplicate();
+                }
+            } else if (merge.stateAfter() != null) {
+                metricDuplicationOther.increment();
+                if (decision.doTransform(merge, fixedCount)) {
+                    metricDuplicationOtherPerformed.add(fixedCount);
+                    new DuplicationOperation(merge, replacements).duplicate();
+                }
+            }
+        }
+    }
+
+    /**
+     * This class encapsulates one tail duplication operation on a specific {@link MergeNode}.
+     */
+    private static class DuplicationOperation {
+
+        private final MergeNode merge;
+        private final StructuredGraph graph;
+
+        private final HashMap<ValueNode, PhiNode> bottomPhis = new HashMap<>();
+        private final List<PiNode> replacements;
+
+        /**
+         * Initializes the tail duplication operation without actually performing any work.
+         *
+         * @param merge The merge whose tail should be duplicated.
+         * @param replacements A list of replacement {@link PiNode}s, or null. If this is non-null, then the size of the
+         *            list needs to match the number of end nodes at the merge.
+         */
+        public DuplicationOperation(MergeNode merge, List<PiNode> replacements) {
+            this.merge = merge;
+            this.replacements = replacements;
+            this.graph = (StructuredGraph) merge.graph();
+        }
+
+        /**
+         * Performs the actual tail duplication:
+         * <ul>
+         * <li>Creates a new {@link ValueAnchorNode} at the beginning of the duplicated area, an transfers all
+         * dependencies from the merge to this anchor.</li>
+         * <li>Determines the set of fixed nodes to be duplicated.</li>
+         * <li>Creates the new merge at the bottom of the duplicated area.</li>
+         * <li>Determines the complete set of duplicated nodes.</li>
+         * <li>Performs the actual duplication.</li>
+         * </ul>
+         */
+        private void duplicate() {
+            Debug.log("tail duplication at merge %s in %s (prob %f)", merge, graph.method(), merge.probability());
+
+            ValueAnchorNode anchor = addValueAnchor();
+
+            // determine the fixed nodes that should be duplicated (currently: all nodes up until the first control
+            // split, end node, deopt or return.
+            ArrayList<FixedNode> fixedNodes = new ArrayList<>();
+            FixedNode fixed = merge.next();
+            FrameState stateAfter = merge.stateAfter();
+            while (fixed instanceof FixedWithNextNode) {
+                fixedNodes.add(fixed);
+                if (fixed instanceof StateSplit && ((StateSplit) fixed).stateAfter() != null) {
+                    stateAfter = ((StateSplit) fixed).stateAfter();
+                }
+                fixed = ((FixedWithNextNode) fixed).next();
+            }
+
+            EndNode endAfter = createNewMerge(fixed, stateAfter);
+            MergeNode mergeAfter = endAfter.merge();
+            fixedNodes.add(endAfter);
+            final HashSet<Node> duplicatedNodes = buildDuplicatedNodeSet(fixedNodes, stateAfter);
+            mergeAfter.clearEnds();
+            expandDuplicated(duplicatedNodes, mergeAfter);
+
+            List<EndNode> endSnapshot = merge.forwardEnds().snapshot();
+            List<PhiNode> phiSnapshot = merge.phis().snapshot();
+
+            int endIndex = 0;
+            for (final EndNode forwardEnd : merge.forwardEnds()) {
+                Map<Node, Node> duplicates;
+                if (replacements == null || replacements.get(endIndex) == null) {
+                    duplicates = graph.addDuplicates(duplicatedNodes, (DuplicationReplacement) null);
+                } else {
+                    HashMap<Node, Node> replace = new HashMap<>();
+                    replace.put(replacements.get(endIndex).object(), replacements.get(endIndex));
+                    duplicates = graph.addDuplicates(duplicatedNodes, replace);
+                }
+                for (Map.Entry<ValueNode, PhiNode> phi : bottomPhis.entrySet()) {
+                    phi.getValue().initializeValueAt(merge.forwardEndIndex(forwardEnd), (ValueNode) duplicates.get(phi.getKey()));
+                }
+                mergeAfter.addForwardEnd((EndNode) duplicates.get(endAfter));
+
+                // re-wire the duplicated ValueAnchorNode to the predecessor of the corresponding EndNode
+                FixedNode anchorDuplicate = (FixedNode) duplicates.get(anchor);
+                ((FixedWithNextNode) forwardEnd.predecessor()).setNext(anchorDuplicate);
+                // move dependencies on the ValueAnchorNode to the previous BeginNode
+                BeginNode prevBegin = BeginNode.prevBegin(anchorDuplicate);
+                anchorDuplicate.replaceAtUsages(prevBegin);
+
+                // re-wire the phi duplicates to the correct input
+                for (PhiNode phi : phiSnapshot) {
+                    PhiNode phiDuplicate = (PhiNode) duplicates.get(phi);
+                    for (Node usage : phiDuplicate.usages()) {
+                        if (usage instanceof ValueNode) {
+                            ((ValueNode) usage).dependencies().add(prevBegin);
+                        }
+                    }
+                    phiDuplicate.replaceAtUsages(phi.valueAt(forwardEnd));
+                    phiDuplicate.safeDelete();
+                }
+                endIndex++;
+            }
+            GraphUtil.killCFG(merge);
+            for (EndNode forwardEnd : endSnapshot) {
+                forwardEnd.safeDelete();
+            }
+            for (PhiNode phi : phiSnapshot) {
+                // these phis should go away, but they still need to be anchored to a merge to be valid...
+                if (phi.isAlive()) {
+                    phi.setMerge(mergeAfter);
+                }
+            }
+            Debug.dump(graph, "After tail duplication at %s", merge);
+        }
+
+        /**
+         * Inserts a new ValueAnchorNode after the merge and transfers all dependency-usages (not phis) to this
+         * ValueAnchorNode.
+         *
+         * @return The new {@link ValueAnchorNode} that was created.
+         */
+        private ValueAnchorNode addValueAnchor() {
+            ValueAnchorNode anchor = graph.add(new ValueAnchorNode());
+            graph.addAfterFixed(merge, anchor);
+            for (Node usage : merge.usages().snapshot()) {
+                if (usage instanceof PhiNode && ((PhiNode) usage).merge() == merge) {
+                    // nothing to do
+                } else {
+                    usage.replaceFirstInput(merge, anchor);
+                }
+            }
+            return anchor;
+        }
+
+        /**
+         * Given a set of fixed nodes, this method determines the set of fixed and floating nodes that needs to be
+         * duplicated, i.e., all nodes that due to data flow and other dependencies needs to be duplicated.
+         *
+         * @param fixedNodes The set of fixed nodes that should be duplicated.
+         * @param stateAfter The frame state of the merge that follows the set of fixed nodes. All {@link ValueNode}s
+         *            reachable from this state are considered to be reachable from within the duplicated set of nodes.
+         * @return The set of nodes that need to be duplicated.
+         */
+        private HashSet<Node> buildDuplicatedNodeSet(final ArrayList<FixedNode> fixedNodes, FrameState stateAfter) {
+            final NodeBitMap aboveBound = graph.createNodeBitMap();
+            final NodeBitMap belowBound = graph.createNodeBitMap();
+
+            final Deque<Node> worklist = new ArrayDeque<>();
+
+            // Build the set of nodes that have (transitive) usages within the duplicatedNodes.
+            // This is achieved by iterating all nodes that are reachable via inputs from the the fixed nodes.
+            aboveBound.markAll(fixedNodes);
+            worklist.addAll(fixedNodes);
+
+            // the phis at the original merge should always be duplicated
+            for (PhiNode phi : merge.phis()) {
+                aboveBound.mark(phi);
+                worklist.add(phi);
+            }
+
+            NodeClosure<Node> aboveClosure = new NodeClosure<Node>() {
+
+                @Override
+                public void apply(Node usage, Node node) {
+                    if (node instanceof PhiNode && !fixedNodes.contains(((PhiNode) node).merge())) {
+                        // stop iterating: phis belonging to outside merges are known to be outside.
+                    } else if (node instanceof FixedNode) {
+                        // stop iterating: fixed nodes within the given set are traversal roots anyway, and all other
+                        // fixed nodes are known to be outside.
+                    } else if (!aboveBound.isMarked(node)) {
+                        worklist.add(node);
+                        aboveBound.mark(node);
+                    }
+                }
+            };
+
+            if (stateAfter != null) {
+                stateAfter.applyToNonVirtual(aboveClosure);
+            }
+            while (!worklist.isEmpty()) {
+                Node current = worklist.remove();
+                for (Node input : current.inputs()) {
+                    aboveClosure.apply(current, input);
+                }
+            }
+
+            // Build the set of nodes that have (transitive) inputs within the duplicatedNodes.
+            // This is achieved by iterating all nodes that are reachable via usages from the fixed nodes.
+            belowBound.markAll(fixedNodes);
+            worklist.addAll(fixedNodes);
+
+            // the phis at the original merge should always be duplicated
+            for (PhiNode phi : merge.phis()) {
+                belowBound.mark(phi);
+                worklist.add(phi);
+            }
+
+            while (!worklist.isEmpty()) {
+                Node current = worklist.remove();
+                for (Node usage : current.usages()) {
+                    if (usage instanceof PhiNode && !fixedNodes.contains(((PhiNode) usage).merge())) {
+                        // stop iterating: phis belonging to outside merges are known to be outside.
+                    } else if (usage instanceof FixedNode) {
+                        // stop iterating: fixed nodes within the given set are traversal roots anyway, and all other
+                        // fixed nodes are known to be outside.
+                    } else if (!belowBound.isMarked(usage)) {
+                        worklist.add(usage);
+                        belowBound.mark(usage);
+                    }
+                }
+            }
+
+            // build the intersection
+            belowBound.intersect(aboveBound);
+            HashSet<Node> result = new HashSet<>();
+            for (Node node : belowBound) {
+                result.add(node);
+            }
+            return result;
+        }
+
+        /**
+         * Creates a new merge and end node construct at the end of the duplicated area. While it is useless in itself
+         * (merge with only one end) it simplifies the later duplication step.
+         *
+         * @param successor The successor of the duplicated set of nodes, i.e., the first node that should not be
+         *            duplicated.
+         * @param stateAfterMerge The frame state that should be used for the merge.
+         * @return The newly created end node.
+         */
+        private EndNode createNewMerge(FixedNode successor, FrameState stateAfterMerge) {
+            MergeNode newBottomMerge = graph.add(new MergeNode());
+            newBottomMerge.setProbability(successor.probability());
+            EndNode newBottomEnd = graph.add(new EndNode());
+            newBottomMerge.addForwardEnd(newBottomEnd);
+            newBottomMerge.setStateAfter(stateAfterMerge);
+            ((FixedWithNextNode) successor.predecessor()).setNext(newBottomEnd);
+            newBottomMerge.setNext(successor);
+            return newBottomEnd;
+        }
+
+        /**
+         * Expands the set of nodes to be duplicated by looking at a number of conditions:
+         * <ul>
+         * <li>{@link ValueNode}s that have usages on the outside need to be replaced with phis for the outside usages.</li>
+         * <li>Non-{@link ValueNode} nodes that have outside usages (frame states, virtual object states, ...) need to
+         * be cloned immediately for the outside usages.</li>
+         * <li>Nodes that have a {@link StampFactory#extension()} or {@link StampFactory#condition()} stamp need to be
+         * cloned immediately for the outside usages.</li>
+         * <li>Dependencies into the duplicated nodes will be replaced with dependencies on the merge.</li>
+         * <li>Outside non-{@link ValueNode}s with usages within the duplicated set of nodes need to also be duplicated.
+         * </li>
+         * <li>Outside {@link ValueNode}s with {@link StampFactory#extension()} or {@link StampFactory#condition()}
+         * stamps that have usages within the duplicated set of nodes need to also be duplicated.</li>
+         * </ul>
+         *
+         * @param duplicatedNodes The set of duplicated nodes that will be modified (expanded).
+         * @param newBottomMerge The merge that follows the duplicated set of nodes. It will be used for newly created
+         *            phis and to as a target for dependencies that pointed into the duplicated set of nodes.
+         */
+        private void expandDuplicated(HashSet<Node> duplicatedNodes, MergeNode newBottomMerge) {
+            Deque<Node> worklist = new ArrayDeque<>(duplicatedNodes);
+
+            while (!worklist.isEmpty()) {
+                Node duplicated = worklist.remove();
+                if (hasUsagesOutside(duplicated, duplicatedNodes)) {
+                    boolean cloneForOutsideUsages = false;
+                    if (duplicated instanceof ValueNode) {
+                        ValueNode node = (ValueNode) duplicated;
+                        if (node.stamp() == StampFactory.dependency()) {
+                            // re-route dependencies to the merge
+                            replaceUsagesOutside(node, newBottomMerge, duplicatedNodes);
+                            // TODO(ls) maybe introduce phis for dependencies
+                        } else if (node.stamp() == StampFactory.extension() || node.stamp() == StampFactory.condition()) {
+                            cloneForOutsideUsages = true;
+                        } else {
+                            // introduce a new phi
+                            PhiNode newPhi = bottomPhis.get(node);
+                            if (newPhi == null) {
+                                newPhi = graph.add(new PhiNode(node.kind(), newBottomMerge));
+                                bottomPhis.put(node, newPhi);
+                                newPhi.addInput(node);
+                            }
+                            replaceUsagesOutside(node, newPhi, duplicatedNodes);
+                        }
+                    } else {
+                        cloneForOutsideUsages = true;
+                    }
+                    if (cloneForOutsideUsages) {
+                        // clone the offending node to the outside
+                        Node newOutsideClone = duplicated.copyWithInputs();
+                        replaceUsagesOutside(duplicated, newOutsideClone, duplicatedNodes);
+                        // this might cause other nodes to have outside usages, we need to look at those as well
+                        for (Node input : newOutsideClone.inputs()) {
+                            if (duplicatedNodes.contains(input)) {
+                                worklist.add(input);
+                            }
+                        }
+                    }
+                }
+                // check if this node has an input that lies outside and cannot be shared
+                for (Node input : duplicated.inputs()) {
+                    if (!duplicatedNodes.contains(input)) {
+                        boolean duplicateInput = false;
+                        if (input instanceof VirtualState) {
+                            duplicateInput = true;
+                        } else if (input instanceof ValueNode) {
+                            Stamp inputStamp = ((ValueNode) input).stamp();
+                            if (inputStamp == StampFactory.extension() || inputStamp == StampFactory.condition()) {
+                                duplicateInput = true;
+                            }
+                        }
+                        if (duplicateInput) {
+                            duplicatedNodes.add(input);
+                            worklist.add(input);
+                        }
+                    }
+                }
+            }
+        }
+
+        /**
+         * Checks if the given node has usages that are not within the given set of nodes.
+         *
+         * @param node The node whose usages are checked.
+         * @param nodeSet The set of nodes that are considered to be "within".
+         * @return true if the given node has usages on the outside, false otherwise.
+         */
+        private static boolean hasUsagesOutside(Node node, HashSet<Node> nodeSet) {
+            for (Node usage : node.usages()) {
+                if (!nodeSet.contains(usage)) {
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        /**
+         * Replaces the given node with the given replacement at all usages that are not within the given set of nodes.
+         *
+         * @param node The node to be replaced at outside usages.
+         * @param replacement The node that replaced the given node at outside usages.
+         * @param nodeSet The set of nodes that are considered to be "within".
+         */
+        private static void replaceUsagesOutside(Node node, Node replacement, HashSet<Node> nodeSet) {
+            for (Node usage : node.usages().snapshot()) {
+                if (!nodeSet.contains(usage)) {
+                    usage.replaceFirstInput(node, replacement);
+                }
+            }
+        }
+    }
+}
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/util/InliningUtil.java	Fri Jul 06 16:25:59 2012 +0200
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/util/InliningUtil.java	Fri Jul 06 16:28:12 2012 +0200
@@ -28,13 +28,13 @@
 
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
-import com.oracle.graal.api.meta.JavaTypeProfile.*;
+import com.oracle.graal.api.meta.JavaTypeProfile.ProfiledType;
 import com.oracle.graal.compiler.*;
 import com.oracle.graal.compiler.phases.*;
 import com.oracle.graal.debug.*;
 import com.oracle.graal.graph.*;
 import com.oracle.graal.nodes.*;
-import com.oracle.graal.nodes.FrameState.*;
+import com.oracle.graal.nodes.FrameState.InliningIdentifier;
 import com.oracle.graal.nodes.calc.*;
 import com.oracle.graal.nodes.extended.*;
 import com.oracle.graal.nodes.java.*;
@@ -45,6 +45,8 @@
 
 public class InliningUtil {
 
+    private static final DebugMetric metricInliningTailDuplication = Debug.metric("InliningTailDuplication");
+
     public interface InliningCallback {
         StructuredGraph buildGraph(ResolvedJavaMethod method);
         double inliningWeight(ResolvedJavaMethod caller, ResolvedJavaMethod method, Invoke invoke);
@@ -270,6 +272,7 @@
         private void inlineMultipleMethods(StructuredGraph graph, GraalCodeCacheProvider runtime, InliningCallback callback, int numberOfMethods, boolean hasReturnValue) {
             FixedNode continuation = invoke.next();
 
+            ValueNode originalReceiver = invoke.callTarget().receiver();
             // setup merge and phi nodes for results and exceptions
             MergeNode returnMerge = graph.add(new MergeNode());
             returnMerge.setProbability(invoke.probability());
@@ -329,7 +332,7 @@
                 GraphUtil.killCFG(invokeWithExceptionNode.exceptionEdge());
             }
 
-            // replace the invoke with a cascade of if nodes
+            // replace the invoke with a switch on the type of the actual receiver
             ReadHubNode objectClassNode = graph.add(new ReadHubNode(invoke.callTarget().receiver()));
             graph.addBeforeFixed(invoke.node(), objectClassNode);
             FixedNode dispatchOnType = createDispatchOnType(graph, objectClassNode, calleeEntryNodes, unknownTypeNode);
@@ -340,6 +343,8 @@
             invoke.node().replaceAtUsages(returnValuePhi);
             invoke.node().replaceAndDelete(dispatchOnType);
 
+            ArrayList<PiNode> replacements = new ArrayList<>();
+
             // do the actual inlining for every invoke
             for (int i = 0; i < calleeEntryNodes.length; i++) {
                 BeginNode node = calleeEntryNodes[i];
@@ -347,7 +352,7 @@
 
                 ResolvedJavaType commonType = getLeastCommonType(i);
                 ValueNode receiver = invokeForInlining.callTarget().receiver();
-                ValueNode anchoredReceiver = createAnchoredReceiver(graph, node, commonType, receiver);
+                PiNode anchoredReceiver = createAnchoredReceiver(graph, node, commonType, receiver);
                 invokeForInlining.callTarget().replaceFirstInput(receiver, anchoredReceiver);
 
                 ResolvedJavaMethod concrete = concretes.get(i);
@@ -355,6 +360,31 @@
                 callback.recordMethodContentsAssumption(concrete);
                 assert !IntrinsificationPhase.canIntrinsify(invokeForInlining, concrete, runtime);
                 InliningUtil.inline(invokeForInlining, calleeGraph, false);
+                replacements.add(anchoredReceiver);
+            }
+            if (shouldFallbackToInvoke()) {
+                replacements.add(null);
+            }
+            if (GraalOptions.OptTailDuplication) {
+                /*
+                 * We might want to perform tail duplication at the merge after a type switch, is there are invokes that would
+                 * benefit from the improvement in type information.
+                 */
+                FixedNode current = returnMerge;
+                int opportunities = 0;
+                while (current instanceof FixedWithNextNode) {
+                    current = ((FixedWithNextNode) current).next();
+                    if (current instanceof InvokeNode && ((InvokeNode) current).callTarget().receiver() == originalReceiver) {
+                        opportunities++;
+                    } else if (current.inputs().contains(originalReceiver)) {
+                        opportunities++;
+                    }
+                }
+                if (opportunities > 0) {
+                    metricInliningTailDuplication.increment();
+                    Debug.log("MultiTypeGuardInlineInfo starting tail duplication (%d opportunities)", opportunities);
+                    TailDuplicationPhase.tailDuplicate(returnMerge, TailDuplicationPhase.TRUE_DECISION, replacements);
+                }
             }
         }
 
@@ -679,7 +709,7 @@
         }
     }
 
-    private static ValueNode createAnchoredReceiver(StructuredGraph graph, FixedNode anchor, ResolvedJavaType commonType, ValueNode receiver) {
+    private static PiNode createAnchoredReceiver(StructuredGraph graph, FixedNode anchor, ResolvedJavaType commonType, ValueNode receiver) {
         // to avoid that floating reads on receiver fields float above the type check
         return graph.unique(new PiNode(receiver, anchor, StampFactory.declaredNonNull(commonType)));
     }
--- a/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/NodeBitMap.java	Fri Jul 06 16:25:59 2012 +0200
+++ b/graal/com.oracle.graal.graph/src/com/oracle/graal/graph/NodeBitMap.java	Fri Jul 06 16:28:12 2012 +0200
@@ -26,9 +26,7 @@
 
 import com.oracle.graal.graph.iterators.*;
 
-
-
-public final class NodeBitMap extends AbstractNodeIterable<Node>{
+public final class NodeBitMap extends AbstractNodeIterable<Node> {
     private final boolean autoGrow;
     private final BitSet bitMap;
     private final Graph graph;
@@ -98,6 +96,11 @@
         bitMap.clear();
     }
 
+    public void intersect(NodeBitMap other) {
+        assert graph == other.graph;
+        bitMap.and(other.bitMap);
+    }
+
     public void grow(Node node) {
         nodeCount = Math.max(nodeCount, node.id() + 1);
     }
@@ -120,6 +123,7 @@
     }
 
     private static class MarkedNodeIterator implements Iterator<Node> {
+
         private final NodeBitMap visited;
         private Iterator<Node> nodes;
         private Node nextNode;
--- a/graal/com.oracle.graal.java/src/com/oracle/graal/java/GraphBuilderPhase.java	Fri Jul 06 16:25:59 2012 +0200
+++ b/graal/com.oracle.graal.java/src/com/oracle/graal/java/GraphBuilderPhase.java	Fri Jul 06 16:28:12 2012 +0200
@@ -1187,7 +1187,7 @@
     }
 
     private FixedNode createTarget(double probability, Block block, FrameStateBuilder stateAfter) {
-        assert probability >= 0 && probability <= 1;
+        assert probability >= 0 && probability <= 1.01 : probability;
         if (probability == 0 && optimisticOpts.removeNeverExecutedCode()) {
             return currentGraph.add(new DeoptimizeNode(DeoptimizationAction.InvalidateReprofile, DeoptimizationReason.UnreachedCode, graphId));
         } else {
--- a/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/cfg/Block.java	Fri Jul 06 16:25:59 2012 +0200
+++ b/graal/com.oracle.graal.lir/src/com/oracle/graal/lir/cfg/Block.java	Fri Jul 06 16:28:12 2012 +0200
@@ -157,6 +157,18 @@
             public Iterator<FixedNode> iterator() {
                 return new NodeIterator();
             }
+
+            @Override
+            public String toString() {
+                StringBuilder str = new StringBuilder().append('[');
+                for (FixedNode node : this) {
+                    str.append(node).append(", ");
+                }
+                if (str.length() > 1) {
+                    str.setLength(str.length() - 2);
+                }
+                return str.append(']').toString();
+            }
         };
     }
 
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/PhiNode.java	Fri Jul 06 16:25:59 2012 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/PhiNode.java	Fri Jul 06 16:28:12 2012 +0200
@@ -80,6 +80,11 @@
         return merge;
     }
 
+    public void setMerge(MergeNode x) {
+        updateUsages(merge, x);
+        merge = x;
+    }
+
     public NodeInputList<ValueNode> values() {
         return values;
     }
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/ValueAnchorNode.java	Fri Jul 06 16:25:59 2012 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/extended/ValueAnchorNode.java	Fri Jul 06 16:28:12 2012 +0200
@@ -37,7 +37,7 @@
 public final class ValueAnchorNode extends FixedWithNextNode implements Canonicalizable, LIRLowerable, Node.IterableNodeType {
 
     public ValueAnchorNode(ValueNode... values) {
-        super(StampFactory.forVoid(), values);
+        super(StampFactory.dependency(), values);
     }
 
     @Override
--- a/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/util/GraphUtil.java	Fri Jul 06 16:25:59 2012 +0200
+++ b/graal/com.oracle.graal.nodes/src/com/oracle/graal/nodes/util/GraphUtil.java	Fri Jul 06 16:28:12 2012 +0200
@@ -219,4 +219,23 @@
         }
         return v;
     }
+
+    /**
+     * Returns a string representation of the given collection of objects.
+     *
+     * @param objects The {@link Iterable} that will be used to iterate over the objects.
+     * @return A string of the format "[a, b, ...]".
+     */
+    public static String toString(Iterable< ? > objects) {
+        StringBuilder str = new StringBuilder();
+        str.append("[");
+        for (Object o : objects) {
+            str.append(o).append(", ");
+        }
+        if (str.length() > 1) {
+            str.setLength(str.length() - 2);
+        }
+        str.append("]");
+        return str.toString();
+    }
 }