changeset 17177:805a26002dc7

[SPARC] Implement stuffing of delay slots in branches and calls. Optimized the cmove for integer and fp ops
author Stefan Anzinger <stefan.anzinger@oracle.com>
date Mon, 22 Sep 2014 09:21:29 -0700
parents 65c75f0bfc7b
children bef7eac46e1e
files graal/com.oracle.graal.asm.sparc/src/com/oracle/graal/asm/sparc/SPARCAssembler.java graal/com.oracle.graal.asm.sparc/src/com/oracle/graal/asm/sparc/SPARCMacroAssembler.java graal/com.oracle.graal.compiler.sparc/src/com/oracle/graal/compiler/sparc/SPARCLIRGenerator.java graal/com.oracle.graal.compiler.sparc/src/com/oracle/graal/compiler/sparc/SPARCNodeLIRBuilder.java graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/gen/NodeLIRBuilder.java graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotBackend.java graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotCRuntimeCallEpilogueOp.java graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotCRuntimeCallPrologueOp.java graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotspotDirectStaticCallOp.java graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotspotDirectVirtualCallOp.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/DelaySlotHolder.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCArithmetic.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCByteSwapOp.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCCall.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCCompare.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCControlFlow.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCJumpOp.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCMathIntrinsicOp.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCMove.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCSaveRegistersOp.java graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/TailDelayedLIRInstruction.java
diffstat 21 files changed, 1131 insertions(+), 280 deletions(-) [+]
line wrap: on
line diff
--- a/graal/com.oracle.graal.asm.sparc/src/com/oracle/graal/asm/sparc/SPARCAssembler.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.asm.sparc/src/com/oracle/graal/asm/sparc/SPARCAssembler.java	Mon Sep 22 09:21:29 2014 -0700
@@ -29,6 +29,7 @@
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.asm.*;
 import com.oracle.graal.compiler.common.*;
+import com.oracle.graal.compiler.common.calc.*;
 import com.oracle.graal.sparc.*;
 
 /**
@@ -48,6 +49,10 @@
         super(target);
     }
 
+    public interface AssemblerEmittable {
+        public void emit(SPARCAssembler masm);
+    }
+
     // @formatter:off
     /**
      * Instruction format for Fmt00 instructions. This abstraction is needed as it
@@ -57,13 +62,15 @@
      * |31 30|29    25|24 22|21                                      0|
      */
     // @formatter:on
-    public static abstract class Fmt00 {
+    public static abstract class Fmt00 implements AssemblerEmittable {
 
         protected static final int OP_SHIFT = 30;
         protected static final int CBCOND_SHIFT = 28;
         protected static final int OP2_SHIFT = 22;
+        protected static final int A_SHIFT = 29;
 
         // @formatter:off
+        protected static final int A_MASK        = 0b0010_0000_0000_0000_0000_0000_0000_0000;
         protected static final int OP_MASK     = 0b1100_0000_0000_0000_0000_0000_0000_0000;
         protected static final int CBCOND_MASK = 0b0001_0000_0000_0000_0000_0000_0000_0000; // Used for distinguish CBcond and BPr instructions
         protected static final int OP2_MASK    = 0b0000_0001_1100_0000_0000_0000_0000_0000;
@@ -123,6 +130,19 @@
          * @param imm Displacement/imediate value. Can either be a 22 or 19 bit immediate (dependent on the instruction)
          */
         public abstract void setImm(int imm);
+
+        public abstract void emit(SPARCAssembler masm);
+
+        public boolean hasDelaySlot() {
+            return true;
+        }
+
+        public int getA() {
+            throw GraalInternalError.shouldNotReachHere();
+        }
+        public void setA(@SuppressWarnings("unused") int a) {
+            throw GraalInternalError.shouldNotReachHere();
+        }
     }
 
     // @formatter:off
@@ -133,7 +153,7 @@
      * |31 30|29    25|24 22|21                                      0|
      */
     // @formatter:on
-    public static class Fmt00a extends Fmt00 {
+    public static class Fmt00a extends Fmt00 implements AssemblerEmittable {
 
         private static final int RD_SHIFT = 25;
         private static final int IMM22_SHIFT = 0;
@@ -177,6 +197,7 @@
             return new Fmt00a(op2, imm22, rd);
         }
 
+        @Override
         public void emit(SPARCAssembler masm) {
             verify();
             masm.emitInt(getInstructionBits());
@@ -197,6 +218,11 @@
         public void setImm22(int imm22) {
             this.imm22 = imm22;
         }
+
+        @Override
+        public boolean hasDelaySlot() {
+            return false;
+        }
     }
 
     // @formatter:off
@@ -213,12 +239,10 @@
         private int disp22;
         private Label label;
 
-        private static final int A_SHIFT = 29;
         private static final int COND_SHIFT = 25;
         private static final int DISP22_SHIFT = 0;
 
         // @formatter:off
-        private static final int A_MASK      = 0b00100000000000000000000000000000;
         private static final int COND_MASK   = 0b00011110000000000000000000000000;
         private static final int DISP22_MASK = 0b00000000001111111111111111111111;
         // @formatter:on
@@ -251,6 +275,7 @@
             setLabel(label);
         }
 
+        @Override
         public void emit(SPARCAssembler masm) {
             if (label != null) {
                 final int pos = label.isBound() ? label.position() : patchUnbound(masm, label);
@@ -291,10 +316,12 @@
             return fmt;
         }
 
+        @Override
         public int getA() {
             return a;
         }
 
+        @Override
         public void setA(int a) {
             this.a = a;
         }
@@ -346,14 +373,12 @@
     // @formatter:on
     public static class Fmt00c extends Fmt00 {
 
-        private static final int A_SHIFT = 29;
         private static final int COND_SHIFT = 25;
         private static final int CC_SHIFT = 20;
         private static final int P_SHIFT = 19;
         private static final int DISP19_SHIFT = 0;
 
         // @formatter:off
-        private static final int A_MASK      = 0b00100000000000000000000000000000;
         private static final int COND_MASK   = 0b00011110000000000000000000000000;
         private static final int CC_MASK     = 0b00000000001100000000000000000000;
         private static final int P_MASK      = 0b00000000000010000000000000000000;
@@ -386,10 +411,12 @@
             this.label = label;
         }
 
+        @Override
         public int getA() {
             return a;
         }
 
+        @Override
         public void setA(int a) {
             this.a = a;
         }
@@ -462,6 +489,7 @@
             return fmt;
         }
 
+        @Override
         public void emit(SPARCAssembler masm) {
             if (label != null) {
                 final int pos = label.isBound() ? label.position() : patchUnbound(masm, label);
@@ -500,7 +528,6 @@
     // @formatter:on
     public static class Fmt00d extends Fmt00 {
 
-        private static final int A_SHIFT = 29;
         private static final int RCOND_SHIFT = 25;
         private static final int D16HI_SHIFT = 20;
         private static final int P_SHIFT = 19;
@@ -508,7 +535,6 @@
         private static final int D16LO_SHIFT = 0;
 
         // @formatter:off
-        private static final int A_MASK        = 0b0010_0000_0000_0000_0000_0000_0000_0000;
         private static final int RCOND_MASK    = 0b0000_1110_0000_0000_0000_0000_0000_0000;
         private static final int D16HI_MASK    = 0b0000_0000_0011_0000_0000_0000_0000_0000;
         private static final int P_MASK        = 0b0000_0000_0000_1000_0000_0000_0000_0000;
@@ -542,6 +568,12 @@
             this.disp16 = disp16 >> 2;
         }
 
+        @Override
+        public int getA() {
+            return annul;
+        }
+
+        @Override
         public void emit(SPARCAssembler masm) {
             if (label != null) {
                 final int pos = label.isBound() ? label.position() : patchUnbound(masm, label);
@@ -658,6 +690,7 @@
             assert isSimm10(this.disp10) : this.disp10;
         }
 
+        @Override
         public void emit(SPARCAssembler masm) {
             assert masm.hasFeature(CPUFeature.CBCOND);
             if (label != null) {
@@ -742,6 +775,11 @@
                 assert (regOrImmediate & ~0b1_1111) == 0 : regOrImmediate;
             }
         }
+
+        @Override
+        public boolean hasDelaySlot() {
+            return false;
+        }
     }
 
     // @formatter:off
@@ -936,7 +974,7 @@
      * |31 30|29    25|24     19|18     14|13|12|11         5|4       0|
      */
     // @formatter:on
-    public static class Fmt10 {
+    public static class Fmt10 implements AssemblerEmittable {
 
         private static final int OP_SHIFT = 30;
         private static final int RD_SHIFT = 25;
@@ -1286,7 +1324,7 @@
         /**
          * Converts regular CC codes to CC codes used by Movcc instructions.
          */
-        private static int getCC(CC cc) {
+        public static int getCC(CC cc) {
             switch (cc) {
                 case Icc:
                 case Xcc:
@@ -1346,6 +1384,74 @@
         }
     }
 
+    // @formatter:off
+    /**
+     * Instruction format for Fmovcc.
+     *
+     * | 10  |   rd   |   op3   | -|   cond  | opfcc | opf_low |   rs2   |
+     * |31 30|29    25|24     19|18|17     14|13   11|10      5|4       0|
+     */
+    // @formatter:on
+    public static class Fmt10d implements AssemblerEmittable {
+
+        private static final int OP_SHIFT = 30;
+        private static final int RD_SHIFT = 25;
+        private static final int OP3_SHIFT = 19;
+        private static final int COND_SHIFT = 14;
+        private static final int OPFCC_SHIFT = 12;
+        private static final int OPF_LOW_SHIFT = 11;
+        private static final int RS2_SHIFT = 0;
+
+        // @formatter:off
+        private static final int RD_MASK     = 0b0011_1110_0000_0000_0000_0000_0000_0000;
+        private static final int OP3_MASK    = 0b0000_0001_1111_1000_0000_0000_0000_0000;
+        private static final int COND_MASK   = 0b0000_0000_0000_0011_1100_0000_0000_0000;
+        private static final int OPFCC_MASK  = 0b0000_0000_0000_0000_0011_1000_0000_0000;
+        private static final int OPF_LOW_MASK= 0b0000_0000_0000_0000_0000_0111_1110_0000;
+        private static final int RS2_MASK    = 0b0000_0000_0000_0000_0000_0000_0001_1111;
+        // @formatter:on
+
+        private int rd;
+        private int op3;
+        private int cond;
+        private int opfcc;
+        private int opfLow;
+        private int rs2;
+
+        public Fmt10d(Op3s op3, Opfs opf, ConditionFlag cond, CC cc, Register rs2, Register rd) {
+            this(rd.encoding(), op3.getValue(), cond.getValue(), Fmt10c.getCC(cc), opf.getValue(), rs2.encoding());
+        }
+
+        public Fmt10d(int rd, int op3, int cond, int opfcc, int opfLow, int rs2) {
+            super();
+            this.rd = rd;
+            this.op3 = op3;
+            this.cond = cond;
+            this.opfcc = opfcc;
+            this.opfLow = opfLow;
+            this.rs2 = rs2;
+        }
+
+        public void emit(SPARCAssembler masm) {
+            verify();
+            masm.emitInt(getInstructionBits());
+        }
+
+        private int getInstructionBits() {
+            return Ops.ArithOp.getValue() << OP_SHIFT | rd << RD_SHIFT | op3 << OP3_SHIFT | cond << COND_SHIFT | opfcc << OPFCC_SHIFT | opfLow << OPF_LOW_SHIFT | rs2 << RS2_SHIFT;
+
+        }
+
+        public void verify() {
+            assert ((RD_MASK >> RD_SHIFT) & rd) == rd;
+            assert ((OP3_MASK >> OP3_SHIFT) & op3) == op3;
+            assert ((COND_MASK >> COND_SHIFT) & cond) == cond;
+            assert ((OPFCC_MASK >> OPFCC_SHIFT) & opfcc) == opfcc;
+            assert ((OPF_LOW_MASK >> OPF_LOW_SHIFT) & opfLow) == opfLow;
+            assert ((RS2_MASK >> RS2_SHIFT) & rs2) == rs2;
+        }
+    }
+
     public static class Fmt4d {
 
         public Fmt4d(SPARCAssembler masm, int op, int op3, int cond, int cc, int simm11, int rd) {
@@ -1395,6 +1501,11 @@
         public int getValue() {
             return value;
         }
+
+        public boolean appliesTo(int instructionWord) {
+            int opShift = 30;
+            return (instructionWord >>> opShift) == value;
+        }
     }
 
     public enum Op2s {
@@ -1493,7 +1604,7 @@
         Saved(0x31, "saved"),
 
         Fpop1(0b11_0100, "fpop1"),
-        Fpop2(0x35, "fpop2"),
+        Fpop2(0b11_0101, "fpop2"),
         Impdep1(0b11_0110, "impdep1"),
         Impdep2(0b11_0111, "impdep2"),
         Jmpl(0x38, "jmpl"),
@@ -1552,6 +1663,10 @@
         public String getOperator() {
             return operator;
         }
+
+        public boolean appliesTo(int instructionWord) {
+            return ((instructionWord >>> 19) & 0b1_1111) == value;
+        }
     }
 
     public enum Op5s {
@@ -1585,6 +1700,8 @@
         Fmovs(0b0_0000_0001, "fmovs"),
         Fmovd(0b0_0000_0010, "fmovd"),
         Fmovq(0b0_0000_0011, "fmovq"),
+        Fmovscc(0b00_0001, "fmovscc"),
+        Fmovdcc(0b00_0010, "fmovdcc"),
         Fnegs(0x05, "fnegs"),
         Fnegd(0x06, "fnegd"),
         Fnegq(0x07, "fnegq"),
@@ -1874,7 +1991,6 @@
         // for FBfcc & FBPfcc instruction
         F_Never(0, "f_never"),
         F_NotEqual(1, "f_notEqual"),
-        F_NotZero(1, "f_notZero"),
         F_LessOrGreater(2, "f_lessOrGreater"),
         F_UnorderedOrLess(3, "f_unorderedOrLess"),
         F_Less(4, "f_less"),
@@ -1883,7 +1999,6 @@
         F_Unordered(7, "f_unordered"),
         F_Always(8, "f_always"),
         F_Equal(9, "f_equal"),
-        F_Zero(9, "f_zero"),
         F_UnorderedOrEqual(10, "f_unorderedOrEqual"),
         F_GreaterOrEqual(11, "f_greaterOrEqual"),
         F_UnorderedGreaterOrEqual(12, "f_unorderedGreaterOrEqual"),
@@ -1942,16 +2057,100 @@
         }
 
         public ConditionFlag negate() {
+            //@formatter:off
             switch (this) {
-                case CarrySet:
-                    return CarryClear;
-                case CarryClear:
-                    return CarrySet;
+                case F_Never                  : return F_Always;
+                case F_Always                 : return F_Never;
+                case F_NotEqual               : return F_Equal;
+                case F_Equal                  : return F_NotEqual;
+                case F_LessOrGreater          : return F_UnorderedOrEqual;
+                case F_UnorderedOrEqual       : return F_LessOrGreater;
+                case F_Less                   : return F_UnorderedGreaterOrEqual;
+                case F_UnorderedGreaterOrEqual: return F_Less;
+                case F_LessOrEqual            : return F_UnorderedOrGreater;
+                case F_UnorderedOrGreater     : return F_LessOrEqual;
+                case F_Greater                : return F_UnorderedOrLessOrEqual;
+                case F_UnorderedOrLessOrEqual : return F_Greater;
+                case F_GreaterOrEqual         : return F_UnorderedOrLess;
+                case F_UnorderedOrLess        : return F_GreaterOrEqual;
+                case F_Unordered              : return F_Ordered;
+                case F_Ordered                : return F_Unordered;
+                case Never                    : return Always;
+                case Always                   : return Never;
+                case Equal                    : return NotEqual;
+                case NotEqual                 : return Equal;
+                case Zero                     : return NotZero;
+                case NotZero                  : return Zero;
+                case LessEqual                : return Greater;
+                case Greater                  : return LessEqual;
+                case Less                     : return GreaterEqual;
+                case GreaterEqual             : return Less;
+                case LessEqualUnsigned        : return GreaterUnsigned;
+                case GreaterUnsigned          : return LessEqualUnsigned;
+                case LessUnsigned             : return GreaterEqualUnsigned;
+                case GreaterEqualUnsigned     : return LessUnsigned;
+                case CarrySet                 : return CarryClear;
+                case CarryClear               : return CarrySet;
+                case Negative                 : return Positive;
+                case Positive                 : return Negative;
+                case OverflowSet              : return OverflowClear;
+                case OverflowClear            : return OverflowSet;
                 default:
                     GraalInternalError.unimplemented();
             }
+            //@formatter:on
             return null;
         }
+
+        public static ConditionFlag fromCondtition(CC conditionFlagsRegister, Condition cond, boolean unorderedIsTrue) {
+            switch (conditionFlagsRegister) {
+                case Xcc:
+                case Icc:
+                    switch (cond) {
+                        case EQ:
+                            return ConditionFlag.Equal;
+                        case NE:
+                            return ConditionFlag.NotEqual;
+                        case BT:
+                            return ConditionFlag.LessUnsigned;
+                        case LT:
+                            return ConditionFlag.Less;
+                        case BE:
+                            return ConditionFlag.LessEqualUnsigned;
+                        case LE:
+                            return ConditionFlag.LessEqual;
+                        case AE:
+                            return ConditionFlag.GreaterEqualUnsigned;
+                        case GE:
+                            return ConditionFlag.GreaterEqual;
+                        case AT:
+                            return ConditionFlag.GreaterUnsigned;
+                        case GT:
+                            return ConditionFlag.Greater;
+                    }
+                    throw GraalInternalError.shouldNotReachHere("Unimplemented for: " + cond);
+                case Fcc0:
+                case Fcc1:
+                case Fcc2:
+                case Fcc3:
+                    switch (cond) {
+                        case EQ:
+                            return unorderedIsTrue ? ConditionFlag.F_UnorderedOrEqual : ConditionFlag.F_Equal;
+                        case NE:
+                            return ConditionFlag.F_NotEqual;
+                        case LT:
+                            return unorderedIsTrue ? ConditionFlag.F_UnorderedOrLess : ConditionFlag.F_Less;
+                        case LE:
+                            return unorderedIsTrue ? ConditionFlag.F_UnorderedOrLessOrEqual : ConditionFlag.F_LessOrEqual;
+                        case GE:
+                            return unorderedIsTrue ? ConditionFlag.F_UnorderedGreaterOrEqual : ConditionFlag.F_GreaterOrEqual;
+                        case GT:
+                            return unorderedIsTrue ? ConditionFlag.F_UnorderedOrGreater : ConditionFlag.F_Greater;
+                    }
+                    throw GraalInternalError.shouldNotReachHere("Unkown condition: " + cond);
+            }
+            throw GraalInternalError.shouldNotReachHere("Unknown condition flag register " + conditionFlagsRegister);
+        }
     }
 
     public enum RCondition {
@@ -2069,10 +2268,14 @@
         return isSimm(imm, 10);
     }
 
-    public static boolean isSimm11(int imm) {
+    public static boolean isSimm11(long imm) {
         return isSimm(imm, 11);
     }
 
+    public static boolean isSimm11(Constant constant) {
+        return isSimm11(constant.asLong());
+    }
+
     public static boolean isSimm13(int imm) {
         return isSimm(imm, 13);
     }
@@ -2331,6 +2534,10 @@
         public Bpcc(CC cc, Label label) {
             super(0, ConditionFlag.CarryClear, Op2s.Bp, cc, 1, label);
         }
+
+        public Bpcc(CC cc, boolean annul, boolean predictTaken, Label label) {
+            super(annul ? 1 : 0, ConditionFlag.CarryClear, Op2s.Bp, cc, predictTaken ? 1 : 0, label);
+        }
     }
 
     public static class Bpcs extends Fmt00c {
@@ -2342,6 +2549,10 @@
         public Bpcs(CC cc, Label label) {
             super(0, ConditionFlag.CarrySet, Op2s.Bp, cc, 1, label);
         }
+
+        public Bpcs(CC cc, boolean annul, boolean predictTaken, Label label) {
+            super(annul ? 1 : 0, ConditionFlag.CarrySet, Op2s.Bp, cc, predictTaken ? 1 : 0, label);
+        }
     }
 
     public static class Bpe extends Fmt00c {
@@ -2372,6 +2583,10 @@
         public Bpg(CC cc, Label label) {
             super(0, ConditionFlag.Greater, Op2s.Bp, cc, 1, label);
         }
+
+        public Bpg(CC cc, boolean annul, boolean predictTaken, Label label) {
+            super(annul ? 1 : 0, ConditionFlag.Greater, Op2s.Bp, cc, predictTaken ? 1 : 0, label);
+        }
     }
 
     public static class Bpge extends Fmt00c {
@@ -2383,6 +2598,10 @@
         public Bpge(CC cc, Label label) {
             super(0, ConditionFlag.GreaterEqual, Op2s.Bp, cc, 1, label);
         }
+
+        public Bpge(CC cc, boolean annul, boolean predictTaken, Label label) {
+            super(annul ? 1 : 0, ConditionFlag.GreaterEqual, Op2s.Bp, cc, predictTaken ? 1 : 0, label);
+        }
     }
 
     public static class Bpgu extends Fmt00c {
@@ -2394,6 +2613,10 @@
         public Bpgu(CC cc, Label label) {
             super(0, ConditionFlag.GreaterUnsigned, Op2s.Bp, cc, 1, label);
         }
+
+        public Bpgu(CC cc, boolean annul, boolean predictTaken, Label label) {
+            super(annul ? 1 : 0, ConditionFlag.GreaterUnsigned, Op2s.Bp, cc, predictTaken ? 1 : 0, label);
+        }
     }
 
     public static class Bpl extends Fmt00c {
@@ -2435,6 +2658,10 @@
         public Bpleu(CC cc, Label label) {
             super(0, ConditionFlag.LessEqualUnsigned, Op2s.Bp, cc, 1, label);
         }
+
+        public Bpleu(CC cc, boolean annul, boolean predictTaken, Label label) {
+            super(annul ? 1 : 0, ConditionFlag.LessEqualUnsigned, Op2s.Bp, cc, predictTaken ? 1 : 0, label);
+        }
     }
 
     public static class Bpn extends Fmt00c {
@@ -3627,6 +3854,20 @@
         }
     }
 
+    public static class Fmovscc extends Fmt10d {
+
+        public Fmovscc(ConditionFlag cond, CC cca, Register src2, Register dst) {
+            super(Op3s.Fpop2, Opfs.Fmovscc, cond, cca, src2, dst);
+        }
+    }
+
+    public static class Fmovdcc extends Fmt10d {
+
+        public Fmovdcc(ConditionFlag cond, CC cca, Register src2, Register dst) {
+            super(Op3s.Fpop2, Opfs.Fmovdcc, cond, cca, src2, dst);
+        }
+    }
+
     public static class Movcc extends Fmt10c {
 
         public Movcc(ConditionFlag cond, CC cca, Register src2, Register dst) {
--- a/graal/com.oracle.graal.asm.sparc/src/com/oracle/graal/asm/sparc/SPARCMacroAssembler.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.asm.sparc/src/com/oracle/graal/asm/sparc/SPARCMacroAssembler.java	Mon Sep 22 09:21:29 2014 -0700
@@ -96,6 +96,10 @@
         public Bpgeu(CC cc, Label label) {
             super(cc, label);
         }
+
+        public Bpgeu(CC cc, boolean annul, boolean predictTaken, Label label) {
+            super(cc, annul, predictTaken, label);
+        }
     }
 
     public static class Bplu extends Bpcs {
@@ -107,6 +111,10 @@
         public Bplu(CC cc, Label label) {
             super(cc, label);
         }
+
+        public Bplu(CC cc, boolean annul, boolean predictTaken, Label label) {
+            super(cc, annul, predictTaken, label);
+        }
     }
 
     public static class Bset extends Or {
@@ -345,6 +353,14 @@
         private long value;
         private Register dst;
         private boolean forceRelocatable;
+        private boolean delayed = false;
+        private AssemblerEmittable delayedInstruction;
+
+        public Sethix(long value, Register dst, boolean forceRelocatable, boolean delayed) {
+            this(value, dst, forceRelocatable);
+            assert !(forceRelocatable && delayed) : "Relocatable sethix cannot be delayed";
+            this.delayed = delayed;
+        }
 
         public Sethix(long value, Register dst, boolean forceRelocatable) {
             this.value = value;
@@ -356,6 +372,17 @@
             this(value, dst, false);
         }
 
+        private void emitInstruction(AssemblerEmittable insn, SPARCMacroAssembler masm) {
+            if (delayed) {
+                if (this.delayedInstruction != null) {
+                    delayedInstruction.emit(masm);
+                }
+                delayedInstruction = insn;
+            } else {
+                insn.emit(masm);
+            }
+        }
+
         public void emit(SPARCMacroAssembler masm) {
             int hi = (int) (value >> 32);
             int lo = (int) (value & ~0);
@@ -364,43 +391,54 @@
             final int startPc = masm.position();
 
             if (hi == 0 && lo >= 0) {
-                new Sethi(hi22(lo), dst).emit(masm);
+                emitInstruction(new Sethi(hi22(lo), dst), masm);
             } else if (hi == -1) {
-                new Sethi(hi22(~lo), dst).emit(masm);
-                new Xor(dst, ~lo10(~0), dst).emit(masm);
+                emitInstruction(new Sethi(hi22(~lo), dst), masm);
+                emitInstruction(new Xor(dst, ~lo10(~0), dst), masm);
             } else {
                 int shiftcnt = 0;
-                new Sethi(hi22(hi), dst).emit(masm);
-                if ((hi & 0x3ff) != 0) {                                       // Any bits?
-                    new Or(dst, hi & 0x3ff, dst).emit(masm);                   // msb 32-bits are now in lsb 32
+                emitInstruction(new Sethi(hi22(hi), dst), masm);
+                if ((hi & 0x3ff) != 0) {                                  // Any bits?
+                    // msb 32-bits are now in lsb 32
+                    emitInstruction(new Or(dst, hi & 0x3ff, dst), masm);
                 }
-                if ((lo & 0xFFFFFC00) != 0) {                                  // done?
-                    if (((lo >> 20) & 0xfff) != 0) {                           // Any bits set?
-                        new Sllx(dst, 12, dst).emit(masm);                     // Make room for next 12 bits
-                        new Or(dst, (lo >> 20) & 0xfff, dst).emit(masm);       // Or in next 12
-                        shiftcnt = 0;                                          // We already shifted
+                if ((lo & 0xFFFFFC00) != 0) {                             // done?
+                    if (((lo >> 20) & 0xfff) != 0) {                      // Any bits set?
+                        // Make room for next 12 bits
+                        emitInstruction(new Sllx(dst, 12, dst), masm);
+                        // Or in next 12
+                        emitInstruction(new Or(dst, (lo >> 20) & 0xfff, dst), masm);
+                        shiftcnt = 0;                                     // We already shifted
                     } else {
                         shiftcnt = 12;
                     }
                     if (((lo >> 10) & 0x3ff) != 0) {
-                        new Sllx(dst, shiftcnt + 10, dst).emit(masm);          // Make room for last 10 bits
-                        new Or(dst, (lo >> 10) & 0x3ff, dst).emit(masm);       // Or in next 10
+                        // Make room for last 10 bits
+                        emitInstruction(new Sllx(dst, shiftcnt + 10, dst), masm);
+                        // Or in next 10
+                        emitInstruction(new Or(dst, (lo >> 10) & 0x3ff, dst), masm);
                         shiftcnt = 0;
                     } else {
                         shiftcnt = 10;
                     }
-                    new Sllx(dst, shiftcnt + 10, dst).emit(masm);              // Shift leaving disp field 0'd
+                    // Shift leaving disp field 0'd
+                    emitInstruction(new Sllx(dst, shiftcnt + 10, dst), masm);
                 } else {
-                    new Sllx(dst, 32, dst).emit(masm);
+                    emitInstruction(new Sllx(dst, 32, dst), masm);
                 }
             }
             // Pad out the instruction sequence so it can be patched later.
             if (forceRelocatable) {
                 while (masm.position() < (startPc + (INSTRUCTION_SIZE * 4))) {
-                    new Nop().emit(masm);
+                    emitInstruction(new Nop(), masm);
                 }
             }
         }
+
+        public void emitDelayed(SPARCMacroAssembler masm) {
+            assert delayedInstruction != null;
+            delayedInstruction.emit(masm);
+        }
     }
 
     public static class Setx {
@@ -408,11 +446,21 @@
         private long value;
         private Register dst;
         private boolean forceRelocatable;
+        private boolean delayed = false;
+        private boolean delayedFirstEmitted = false;
+        private Sethix sethix;
+        private AssemblerEmittable delayedAdd;
 
-        public Setx(long value, Register dst, boolean forceRelocatable) {
+        public Setx(long value, Register dst, boolean forceRelocatable, boolean delayed) {
+            assert !(forceRelocatable && delayed) : "Cannot use relocatable setx as delayable";
             this.value = value;
             this.dst = dst;
             this.forceRelocatable = forceRelocatable;
+            this.delayed = delayed;
+        }
+
+        public Setx(long value, Register dst, boolean forceRelocatable) {
+            this(value, dst, forceRelocatable, false);
         }
 
         public Setx(long value, Register dst) {
@@ -420,12 +468,46 @@
         }
 
         public void emit(SPARCMacroAssembler masm) {
-            new Sethix(value, dst, forceRelocatable).emit(masm);
+            assert !delayed;
+            doEmit(masm);
+        }
+
+        private void doEmit(SPARCMacroAssembler masm) {
+            sethix = new Sethix(value, dst, forceRelocatable, delayed);
+            sethix.emit(masm);
             int lo = (int) (value & ~0);
             if (lo10(lo) != 0 || forceRelocatable) {
-                new Add(dst, lo10(lo), dst).emit(masm);
+                Add add = new Add(dst, lo10(lo), dst);
+                if (delayed) {
+                    sethix.emitDelayed(masm);
+                    sethix = null;
+                    delayedAdd = add;
+                } else {
+                    sethix = null;
+                    add.emit(masm);
+                }
             }
         }
+
+        public void emitFirstPartOfDelayed(SPARCMacroAssembler masm) {
+            assert !forceRelocatable : "Cannot use delayed mode with relocatable setx";
+            assert delayed : "Can only be used in delayed mode";
+            doEmit(masm);
+            delayedFirstEmitted = true;
+        }
+
+        public void emitSecondPartOfDelayed(SPARCMacroAssembler masm) {
+            assert !forceRelocatable : "Cannot use delayed mode with relocatable setx";
+            assert delayed : "Can only be used in delayed mode";
+            assert delayedFirstEmitted : "First part has not been emitted so far.";
+            assert delayedAdd == null && sethix != null || delayedAdd != null && sethix == null : "Either add or sethix must be set";
+            if (delayedAdd != null) {
+                delayedAdd.emit(masm);
+            } else {
+                sethix.emitDelayed(masm);
+            }
+
+        }
     }
 
     public static class Signx extends Sra {
--- a/graal/com.oracle.graal.compiler.sparc/src/com/oracle/graal/compiler/sparc/SPARCLIRGenerator.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.compiler.sparc/src/com/oracle/graal/compiler/sparc/SPARCLIRGenerator.java	Mon Sep 22 09:21:29 2014 -0700
@@ -32,12 +32,12 @@
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.asm.sparc.*;
+import com.oracle.graal.asm.sparc.SPARCAssembler.CC;
 import com.oracle.graal.asm.sparc.SPARCAssembler.ConditionFlag;
 import com.oracle.graal.compiler.common.*;
 import com.oracle.graal.compiler.common.calc.*;
 import com.oracle.graal.compiler.common.type.*;
 import com.oracle.graal.lir.*;
-import com.oracle.graal.lir.StandardOp.JumpOp;
 import com.oracle.graal.lir.gen.*;
 import com.oracle.graal.lir.sparc.*;
 import com.oracle.graal.lir.sparc.SPARCArithmetic.BinaryRegConst;
@@ -48,7 +48,6 @@
 import com.oracle.graal.lir.sparc.SPARCCompare.CompareOp;
 import com.oracle.graal.lir.sparc.SPARCControlFlow.BranchOp;
 import com.oracle.graal.lir.sparc.SPARCControlFlow.CondMoveOp;
-import com.oracle.graal.lir.sparc.SPARCControlFlow.FloatCondMoveOp;
 import com.oracle.graal.lir.sparc.SPARCControlFlow.ReturnOp;
 import com.oracle.graal.lir.sparc.SPARCControlFlow.StrategySwitchOp;
 import com.oracle.graal.lir.sparc.SPARCControlFlow.TableSwitchOp;
@@ -227,29 +226,49 @@
 
     @Override
     public void emitJump(LabelRef label) {
-        append(new JumpOp(label));
+        append(new SPARCJumpOp(label));
     }
 
     @Override
-    public void emitCompareBranch(PlatformKind cmpKind, Value left, Value right, Condition cond, boolean unorderedIsTrue, LabelRef trueDestination, LabelRef falseDestination,
+    public void emitCompareBranch(PlatformKind cmpKind, Value x, Value y, Condition cond, boolean unorderedIsTrue, LabelRef trueDestination, LabelRef falseDestination,
                     double trueDestinationProbability) {
-        boolean mirrored = emitCompare(cmpKind, left, right);
-        Condition finalCondition = mirrored ? cond.mirror() : cond;
-
+        Variable left;
+        Value right;
+        Condition actualCondition = null;
+        if (isConstant(x)) {
+            left = load(y);
+            right = loadNonConst(x);
+            actualCondition = cond.mirror();
+        } else {
+            left = load(x);
+            right = loadNonConst(y);
+            actualCondition = cond;
+        }
+        SPARCCompare opcode = null;
         Kind kind = left.getKind().getStackKind();
         switch (kind) {
-            case Int:
+            case Object:
+                opcode = ACMP;
+                break;
             case Long:
-            case Object:
-                append(new BranchOp(finalCondition, trueDestination, falseDestination, kind));
+                opcode = LCMP;
+                break;
+            case Int:
+            case Short:
+            case Char:
+            case Byte:
+                opcode = ICMP;
                 break;
             case Float:
+                opcode = FCMP;
+                break;
             case Double:
-                append(new BranchOp(finalCondition, trueDestination, falseDestination, kind, unorderedIsTrue));
+                opcode = DCMP;
                 break;
             default:
-                throw GraalInternalError.shouldNotReachHere("" + left.getKind());
+                GraalInternalError.shouldNotReachHere(kind.toString());
         }
+        append(new SPARCControlFlow.CompareBranchOp(opcode, left, right, actualCondition, trueDestination, falseDestination, kind, unorderedIsTrue, trueDestinationProbability));
     }
 
     @Override
@@ -272,26 +291,52 @@
         }
     }
 
+    private Value loadSimm11(Value value) {
+        if (isConstant(value)) {
+            Constant c = asConstant(value);
+            if (c.isNull() || SPARCAssembler.isSimm11(c)) {
+                return value;
+            } else {
+                return load(c);
+            }
+        }
+        return emitMove(value);
+    }
+
     @Override
     public Variable emitConditionalMove(PlatformKind cmpKind, Value left, Value right, Condition cond, boolean unorderedIsTrue, Value trueValue, Value falseValue) {
         boolean mirrored = emitCompare(cmpKind, left, right);
-        Condition finalCondition = mirrored ? cond.mirror() : cond;
-
-        Variable result = newVariable(trueValue.getLIRKind());
-        Kind kind = left.getKind().getStackKind();
-        switch (kind) {
+        CC conditionFlags;
+        Value actualTrueValue = trueValue;
+        Value actualFalseValue = falseValue;
+        switch ((Kind) left.getLIRKind().getPlatformKind()) {
+            case Byte:
+            case Short:
+            case Char:
             case Int:
-            case Long:
+                conditionFlags = CC.Icc;
+                actualTrueValue = loadSimm11(trueValue);
+                actualFalseValue = loadSimm11(falseValue);
+                break;
             case Object:
-                append(new CondMoveOp(kind, result, finalCondition, load(trueValue), loadNonConst(falseValue)));
+            case Long:
+                conditionFlags = CC.Xcc;
+                actualTrueValue = loadSimm11(trueValue);
+                actualFalseValue = loadSimm11(falseValue);
                 break;
             case Float:
             case Double:
-                append(new FloatCondMoveOp(kind, result, finalCondition, unorderedIsTrue, load(trueValue), load(falseValue)));
+                conditionFlags = CC.Fcc0;
+                actualTrueValue = load(trueValue); // Floats cannot be immediate at all
+                actualFalseValue = load(falseValue);
                 break;
             default:
-                throw GraalInternalError.shouldNotReachHere("" + left.getKind());
+                throw GraalInternalError.shouldNotReachHere();
         }
+        Variable result = newVariable(trueValue.getLIRKind());
+        ConditionFlag finalCondition = ConditionFlag.fromCondtition(conditionFlags, mirrored ? cond.mirror() : cond, unorderedIsTrue);
+        Kind kind = result.getKind().getStackKind();
+        append(new CondMoveOp(kind, result, conditionFlags, finalCondition, actualTrueValue, actualFalseValue));
         return result;
     }
 
@@ -351,7 +396,23 @@
         emitIntegerTest(left, right);
         Variable result = newVariable(trueValue.getLIRKind());
         Kind kind = left.getKind().getStackKind();
-        append(new CondMoveOp(kind, result, Condition.EQ, load(trueValue), loadNonConst(falseValue)));
+        CC conditionCode;
+        switch (kind) {
+            case Object:
+            case Long:
+                conditionCode = CC.Xcc;
+                break;
+            case Int:
+            case Short:
+            case Char:
+            case Byte:
+                conditionCode = CC.Icc;
+                break;
+            default:
+                throw GraalInternalError.shouldNotReachHere();
+        }
+        ConditionFlag flag = ConditionFlag.fromCondtition(conditionCode, Condition.EQ, false);
+        append(new CondMoveOp(kind, result, conditionCode, flag, loadSimm11(trueValue), loadSimm11(falseValue)));
         return result;
     }
 
--- a/graal/com.oracle.graal.compiler.sparc/src/com/oracle/graal/compiler/sparc/SPARCNodeLIRBuilder.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.compiler.sparc/src/com/oracle/graal/compiler/sparc/SPARCNodeLIRBuilder.java	Mon Sep 22 09:21:29 2014 -0700
@@ -26,6 +26,8 @@
 import com.oracle.graal.api.code.*;
 import com.oracle.graal.api.meta.*;
 import com.oracle.graal.compiler.gen.*;
+import com.oracle.graal.lir.*;
+import com.oracle.graal.lir.StandardOp.*;
 import com.oracle.graal.lir.gen.*;
 import com.oracle.graal.lir.sparc.*;
 import com.oracle.graal.nodes.*;
@@ -55,4 +57,9 @@
         Value[] parameters = visitInvokeArguments(gen.getResult().getFrameMap().registerConfig.getCallingConvention(CallingConvention.Type.JavaCall, null, sig, gen.target(), false), node.arguments());
         append(new SPARCBreakpointOp(parameters));
     }
+
+    @Override
+    protected JumpOp newJumpOp(LabelRef ref) {
+        return new SPARCJumpOp(ref);
+    }
 }
--- a/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/gen/NodeLIRBuilder.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.compiler/src/com/oracle/graal/compiler/gen/NodeLIRBuilder.java	Mon Sep 22 09:21:29 2014 -0700
@@ -369,7 +369,11 @@
         }
         resolver.dispose();
 
-        append(new JumpOp(getLIRBlock(merge)));
+        append(newJumpOp(getLIRBlock(merge)));
+    }
+
+    protected JumpOp newJumpOp(LabelRef ref) {
+        return new JumpOp(ref);
     }
 
     protected LIRKind getPhiKind(PhiNode phi) {
--- a/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotBackend.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotBackend.java	Mon Sep 22 09:21:29 2014 -0700
@@ -43,6 +43,7 @@
 import com.oracle.graal.asm.sparc.SPARCMacroAssembler.Nop;
 import com.oracle.graal.asm.sparc.SPARCMacroAssembler.RestoreWindow;
 import com.oracle.graal.asm.sparc.SPARCMacroAssembler.Setx;
+import com.oracle.graal.compiler.common.cfg.*;
 import com.oracle.graal.hotspot.*;
 import com.oracle.graal.hotspot.meta.HotSpotCodeCacheProvider.MarkId;
 import com.oracle.graal.hotspot.meta.*;
@@ -52,6 +53,7 @@
 import com.oracle.graal.lir.asm.*;
 import com.oracle.graal.lir.gen.*;
 import com.oracle.graal.lir.sparc.*;
+import com.oracle.graal.lir.sparc.SPARCCall.*;
 import com.oracle.graal.nodes.*;
 import com.oracle.graal.nodes.spi.*;
 import com.oracle.graal.sparc.*;
@@ -207,6 +209,7 @@
 
     @Override
     public void emitCode(CompilationResultBuilder crb, LIR lir, ResolvedJavaMethod installedCodeOwner) {
+        fixupDelayedInstructions(lir);
         SPARCMacroAssembler masm = (SPARCMacroAssembler) crb.asm;
         FrameMap frameMap = crb.frameMap;
         RegisterConfig regConfig = frameMap.registerConfig;
@@ -261,4 +264,82 @@
         }
     }
 
+    private static void fixupDelayedInstructions(LIR l) {
+        for (AbstractBlock<?> b : l.codeEmittingOrder()) {
+            fixupDelayedInstructions(l, b);
+        }
+    }
+
+    private static void fixupDelayedInstructions(LIR l, AbstractBlock<?> block) {
+        TailDelayedLIRInstruction lastDelayable = null;
+        for (LIRInstruction inst : l.getLIRforBlock(block)) {
+            if (lastDelayable != null && inst instanceof DelaySlotHolder) {
+                if (isDelayable(inst, (LIRInstruction) lastDelayable)) {
+                    lastDelayable.setDelaySlotHolder((DelaySlotHolder) inst);
+                }
+                lastDelayable = null; // We must not pull over other delay slot holder.
+            } else if (inst instanceof TailDelayedLIRInstruction) {
+                lastDelayable = (TailDelayedLIRInstruction) inst;
+            } else {
+                lastDelayable = null;
+            }
+        }
+    }
+
+    public static boolean isDelayable(final LIRInstruction delaySlotHolder, final LIRInstruction other) {
+        final Set<Value> delaySlotHolderInputs = new HashSet<>(2);
+        final Set<LIRFrameState> otherFrameStates = new HashSet<>(2);
+        other.forEachState(new InstructionStateProcedure() {
+            @Override
+            protected void doState(LIRInstruction instruction, LIRFrameState state) {
+                otherFrameStates.add(state);
+            }
+        });
+        int frameStatesBefore = otherFrameStates.size();
+        delaySlotHolder.forEachState(new InstructionStateProcedure() {
+            @Override
+            protected void doState(LIRInstruction instruction, LIRFrameState state) {
+                otherFrameStates.add(state);
+            }
+        });
+        if (frameStatesBefore != otherFrameStates.size() && otherFrameStates.size() >= 2) {
+            // both have framestates, the instruction is not delayable
+            return false;
+        }
+        // Direct calls do not have dependencies to data before
+        if (delaySlotHolder instanceof DirectCallOp) {
+            return true;
+        }
+        delaySlotHolder.visitEachInput(new InstructionValueConsumer() {
+            @Override
+            protected void visitValue(LIRInstruction instruction, Value value) {
+                delaySlotHolderInputs.add(value);
+            }
+        });
+        delaySlotHolder.visitEachTemp(new InstructionValueConsumer() {
+            @Override
+            protected void visitValue(LIRInstruction instruction, Value value) {
+                delaySlotHolderInputs.add(value);
+            }
+        });
+        if (delaySlotHolderInputs.size() == 0) {
+            return true;
+        }
+        final Set<Value> otherOutputs = new HashSet<>();
+        other.visitEachOutput(new InstructionValueConsumer() {
+            @Override
+            protected void visitValue(LIRInstruction instruction, Value value) {
+                otherOutputs.add(value);
+            }
+        });
+        other.visitEachTemp(new InstructionValueConsumer() {
+            @Override
+            protected void visitValue(LIRInstruction instruction, Value value) {
+                otherOutputs.add(value);
+            }
+        });
+        int sizeBefore = otherOutputs.size();
+        otherOutputs.removeAll(delaySlotHolderInputs);
+        return otherOutputs.size() == sizeBefore;
+    }
 }
--- a/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotCRuntimeCallEpilogueOp.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotCRuntimeCallEpilogueOp.java	Mon Sep 22 09:21:29 2014 -0700
@@ -55,7 +55,7 @@
     public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
 
         // Restore the thread register when coming back from the runtime.
-        SPARCMove.move(crb, masm, thread.asValue(LIRKind.value(Kind.Long)), threadTemp);
+        SPARCMove.move(crb, masm, thread.asValue(LIRKind.value(Kind.Long)), threadTemp, DelaySlotHolder.DUMMY);
 
         // Reset last Java frame, last Java PC and flags.
         new Stx(g0, new SPARCAddress(thread, threadLastJavaSpOffset)).emit(masm);
--- a/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotCRuntimeCallPrologueOp.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotSpotCRuntimeCallPrologueOp.java	Mon Sep 22 09:21:29 2014 -0700
@@ -35,12 +35,13 @@
 import com.oracle.graal.lir.sparc.*;
 
 @Opcode("CRUNTIME_CALL_PROLOGUE")
-final class SPARCHotSpotCRuntimeCallPrologueOp extends SPARCLIRInstruction {
+final class SPARCHotSpotCRuntimeCallPrologueOp extends SPARCLIRInstruction implements TailDelayedLIRInstruction {
 
     private final int threadLastJavaSpOffset;
     private final Register thread;
     private final Register stackPointer;
     @Def({REG, STACK}) protected Value threadTemp;
+    private DelaySlotHolder delayHolder = DelaySlotHolder.DUMMY;
 
     public SPARCHotSpotCRuntimeCallPrologueOp(int threadLastJavaSpOffset, Register thread, Register stackPointer, Value threadTemp) {
         this.threadLastJavaSpOffset = threadLastJavaSpOffset;
@@ -56,6 +57,10 @@
         new Stx(g4, new SPARCAddress(thread, threadLastJavaSpOffset)).emit(masm);
 
         // Save the thread register when calling out to the runtime.
-        SPARCMove.move(crb, masm, threadTemp, thread.asValue(LIRKind.value(Kind.Long)));
+        SPARCMove.move(crb, masm, threadTemp, thread.asValue(LIRKind.value(Kind.Long)), delayHolder);
+    }
+
+    public void setDelaySlotHolder(DelaySlotHolder holder) {
+        this.delayHolder = holder;
     }
 }
--- a/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotspotDirectStaticCallOp.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotspotDirectStaticCallOp.java	Mon Sep 22 09:21:29 2014 -0700
@@ -46,8 +46,7 @@
     }
 
     @Override
-    public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+    public void emitCallPrefixCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
         MarkId.recordMark(crb, invokeKind == InvokeKind.Static ? MarkId.INVOKESTATIC : MarkId.INVOKESPECIAL);
-        super.emitCode(crb, masm);
     }
 }
--- a/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotspotDirectVirtualCallOp.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.hotspot.sparc/src/com/oracle/graal/hotspot/sparc/SPARCHotspotDirectVirtualCallOp.java	Mon Sep 22 09:21:29 2014 -0700
@@ -51,12 +51,11 @@
     }
 
     @Override
-    public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+    public void emitCallPrefixCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
         // The mark for an invocation that uses an inline cache must be placed at the
         // instruction that loads the Klass from the inline cache.
         MarkId.recordMark(crb, invokeKind == InvokeKind.Virtual ? MarkId.INVOKEVIRTUAL : MarkId.INVOKEINTERFACE);
         Register scratchRegister = g5;
         new Setx(HotSpotGraalRuntime.runtime().getConfig().nonOopBits, scratchRegister, true).emit(masm);
-        super.emitCode(crb, masm);
     }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/DelaySlotHolder.java	Mon Sep 22 09:21:29 2014 -0700
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.lir.sparc;
+
+import com.oracle.graal.asm.sparc.*;
+import com.oracle.graal.lir.*;
+import com.oracle.graal.lir.asm.*;
+
+/**
+ * This interface can be used for {@link LIRInstruction}s which may provide a delay slot. If a delay
+ * slot for this LIRInstruction is requrested, the requester just calls the method
+ * {@link #emitForDelay(CompilationResultBuilder, SPARCMacroAssembler)}.
+ *
+ * @see TailDelayedLIRInstruction
+ */
+public interface DelaySlotHolder {
+
+    DelaySlotHolder DUMMY = new DelaySlotHolder() {
+        public void emitForDelay(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+            // do nothing
+        }
+
+        @Override
+        public String toString() {
+            return "null";
+        }
+    };
+
+    public void emitForDelay(CompilationResultBuilder crb, SPARCMacroAssembler masm);
+
+}
--- a/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCArithmetic.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCArithmetic.java	Mon Sep 22 09:21:29 2014 -0700
@@ -51,11 +51,12 @@
     /**
      * Unary operation with separate source and destination operand.
      */
-    public static class Unary2Op extends SPARCLIRInstruction {
+    public static class Unary2Op extends SPARCLIRInstruction implements TailDelayedLIRInstruction {
 
         @Opcode private final SPARCArithmetic opcode;
         @Def({REG}) protected AllocatableValue result;
         @Use({REG}) protected AllocatableValue x;
+        private DelaySlotHolder delaySlotLir = DelaySlotHolder.DUMMY;
 
         public Unary2Op(SPARCArithmetic opcode, AllocatableValue result, AllocatableValue x) {
             this.opcode = opcode;
@@ -65,7 +66,11 @@
 
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-            emitUnary(crb, masm, opcode, result, x, null);
+            emitUnary(crb, masm, opcode, result, x, null, delaySlotLir);
+        }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotLir = holder;
         }
     }
 
@@ -73,13 +78,14 @@
      * Binary operation with two operands. The first source operand is combined with the
      * destination. The second source operand must be a register.
      */
-    public static class BinaryRegReg extends SPARCLIRInstruction {
+    public static class BinaryRegReg extends SPARCLIRInstruction implements TailDelayedLIRInstruction {
 
         @Opcode private final SPARCArithmetic opcode;
         @Def({REG}) protected Value result;
         @Use({REG}) protected Value x;
         @Alive({REG}) protected Value y;
         @State LIRFrameState state;
+        private DelaySlotHolder delaySlotLir = DelaySlotHolder.DUMMY;
 
         public BinaryRegReg(SPARCArithmetic opcode, Value result, Value x, Value y) {
             this(opcode, result, x, y, null);
@@ -95,7 +101,7 @@
 
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-            emitRegReg(crb, masm, opcode, result, x, y, state);
+            emitRegReg(crb, masm, opcode, result, x, y, state, delaySlotLir);
         }
 
         @Override
@@ -103,18 +109,23 @@
             super.verify();
             verifyKind(opcode, result, x, y);
         }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotLir = holder;
+        }
     }
 
     /**
      * Binary operation with single source/destination operand and one constant.
      */
-    public static class BinaryRegConst extends SPARCLIRInstruction {
+    public static class BinaryRegConst extends SPARCLIRInstruction implements TailDelayedLIRInstruction {
 
         @Opcode private final SPARCArithmetic opcode;
         @Def({REG}) protected AllocatableValue result;
         @Use({REG}) protected Value x;
         @State protected LIRFrameState state;
         protected Constant y;
+        private DelaySlotHolder delaySlotLir = DelaySlotHolder.DUMMY;
 
         public BinaryRegConst(SPARCArithmetic opcode, AllocatableValue result, Value x, Constant y) {
             this(opcode, result, x, y, null);
@@ -130,7 +141,7 @@
 
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-            emitRegConstant(crb, masm, opcode, result, x, y, null);
+            emitRegConstant(crb, masm, opcode, result, x, y, null, delaySlotLir);
         }
 
         @Override
@@ -138,12 +149,16 @@
             super.verify();
             verifyKind(opcode, result, x, y);
         }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotLir = holder;
+        }
     }
 
     /**
      * Special LIR instruction as it requires a bunch of scratch registers.
      */
-    public static class RemOp extends SPARCLIRInstruction {
+    public static class RemOp extends SPARCLIRInstruction implements TailDelayedLIRInstruction {
 
         @Opcode private final SPARCArithmetic opcode;
         @Def({REG}) protected Value result;
@@ -152,6 +167,7 @@
         @Temp({REG}) protected Value scratch1;
         @Temp({REG}) protected Value scratch2;
         @State protected LIRFrameState state;
+        private DelaySlotHolder delaySlotLir = DelaySlotHolder.DUMMY;
 
         public RemOp(SPARCArithmetic opcode, Value result, Value x, Value y, LIRFrameState state, LIRGeneratorTool gen) {
             this.opcode = opcode;
@@ -165,7 +181,7 @@
 
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-            emitRem(crb, masm, opcode, result, x, y, scratch1, scratch2, state);
+            emitRem(crb, masm, opcode, result, x, y, scratch1, scratch2, state, delaySlotLir);
         }
 
         @Override
@@ -173,12 +189,18 @@
             super.verify();
             verifyKind(opcode, result, x, y);
         }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotLir = holder;
+        }
     }
 
-    private static void emitRegConstant(CompilationResultBuilder crb, SPARCMacroAssembler masm, SPARCArithmetic opcode, Value dst, Value src1, Constant src2, LIRFrameState info) {
+    private static void emitRegConstant(CompilationResultBuilder crb, SPARCMacroAssembler masm, SPARCArithmetic opcode, Value dst, Value src1, Constant src2, LIRFrameState info,
+                    DelaySlotHolder delaySlotLir) {
         assert isSimm13(crb.asIntConst(src2)) : src2;
         int constant = crb.asIntConst(src2);
         int exceptionOffset = -1;
+        delaySlotLir.emitForDelay(crb, masm);
         switch (opcode) {
             case IADD:
                 new Add(asIntReg(src1), constant, asIntReg(dst)).emit(masm);
@@ -264,94 +286,119 @@
         }
     }
 
-    public static void emitRegReg(CompilationResultBuilder crb, SPARCMacroAssembler masm, SPARCArithmetic opcode, Value dst, Value src1, Value src2, LIRFrameState info) {
+    public static void emitRegReg(CompilationResultBuilder crb, SPARCMacroAssembler masm, SPARCArithmetic opcode, Value dst, Value src1, Value src2, LIRFrameState info, DelaySlotHolder delaySlotLir) {
         int exceptionOffset = -1;
         assert !isConstant(src1) : src1;
         assert !isConstant(src2) : src2;
         switch (opcode) {
             case IADD:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Add(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case ISUB:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Sub(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case IMUL:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Mulx(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case IDIV:
                 new Signx(asIntReg(src1), asIntReg(src1)).emit(masm);
                 new Signx(asIntReg(src2), asIntReg(src2)).emit(masm);
+                delaySlotLir.emitForDelay(crb, masm);
                 exceptionOffset = masm.position();
                 new Sdivx(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case IUDIV:
                 new Signx(asIntReg(src1), asIntReg(src1)).emit(masm);
                 new Signx(asIntReg(src2), asIntReg(src2)).emit(masm);
+                delaySlotLir.emitForDelay(crb, masm);
                 exceptionOffset = masm.position();
                 new Udivx(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case IAND:
+                delaySlotLir.emitForDelay(crb, masm);
                 new And(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case IOR:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Or(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case IXOR:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Xor(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case ISHL:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Sll(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case ISHR:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Sra(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case IUSHR:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Srl(asIntReg(src1), asIntReg(src2), asIntReg(dst)).emit(masm);
                 break;
             case IREM:
                 throw GraalInternalError.unimplemented();
             case LADD:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Add(asLongReg(src1), asLongReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LSUB:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Sub(asLongReg(src1), asLongReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LMUL:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Mulx(asLongReg(src1), asLongReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LDIV:
+                delaySlotLir.emitForDelay(crb, masm);
                 exceptionOffset = masm.position();
                 new Sdivx(asLongReg(src1), asLongReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LUDIV:
+                delaySlotLir.emitForDelay(crb, masm);
                 exceptionOffset = masm.position();
                 new Udivx(asLongReg(src1), asLongReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LAND:
+                delaySlotLir.emitForDelay(crb, masm);
                 new And(asLongReg(src1), asLongReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LOR:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Or(asLongReg(src1), asLongReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LXOR:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Xor(asLongReg(src1), asLongReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LSHL:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Sllx(asLongReg(src1), asIntReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LSHR:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Srax(asLongReg(src1), asIntReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case LUSHR:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Srlx(asLongReg(src1), asIntReg(src2), asLongReg(dst)).emit(masm);
                 break;
             case FADD:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fadds(asFloatReg(src1), asFloatReg(src2), asFloatReg(dst)).emit(masm);
                 break;
             case FSUB:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fsubs(asFloatReg(src1), asFloatReg(src2), asFloatReg(dst)).emit(masm);
                 break;
             case FMUL:
+                delaySlotLir.emitForDelay(crb, masm);
                 if (dst.getPlatformKind() == Kind.Double) {
                     new Fsmuld(asFloatReg(src1), asFloatReg(src2), asDoubleReg(dst)).emit(masm);
                 } else if (dst.getPlatformKind() == Kind.Float) {
@@ -359,27 +406,33 @@
                 }
                 break;
             case FDIV:
+                delaySlotLir.emitForDelay(crb, masm);
                 exceptionOffset = masm.position();
                 new Fdivs(asFloatReg(src1), asFloatReg(src2), asFloatReg(dst)).emit(masm);
                 break;
             case FREM:
                 throw GraalInternalError.unimplemented();
             case DADD:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Faddd(asDoubleReg(src1), asDoubleReg(src2), asDoubleReg(dst)).emit(masm);
                 break;
             case DSUB:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fsubd(asDoubleReg(src1), asDoubleReg(src2), asDoubleReg(dst)).emit(masm);
                 break;
             case DMUL:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fmuld(asDoubleReg(src1), asDoubleReg(src2), asDoubleReg(dst)).emit(masm);
                 break;
             case DDIV:
+                delaySlotLir.emitForDelay(crb, masm);
                 exceptionOffset = masm.position();
                 new Fdivd(asDoubleReg(src1), asDoubleReg(src2), asDoubleReg(dst)).emit(masm);
                 break;
             case DREM:
                 throw GraalInternalError.unimplemented();
             case DAND:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fandd(asDoubleReg(src1), asDoubleReg(src2), asDoubleReg(dst)).emit(masm);
                 break;
             default:
@@ -391,7 +444,8 @@
         }
     }
 
-    public static void emitRem(CompilationResultBuilder crb, SPARCMacroAssembler masm, SPARCArithmetic opcode, Value dst, Value src1, Value src2, Value scratch1, Value scratch2, LIRFrameState info) {
+    public static void emitRem(CompilationResultBuilder crb, SPARCMacroAssembler masm, SPARCArithmetic opcode, Value dst, Value src1, Value src2, Value scratch1, Value scratch2, LIRFrameState info,
+                    DelaySlotHolder delaySlotLir) {
         int exceptionOffset = -1;
         if (!isConstant(src1) && isConstant(src2)) {
             assert isSimm13(crb.asIntConst(src2));
@@ -404,18 +458,21 @@
                     exceptionOffset = masm.position();
                     new Sdivx(asIntReg(dst), crb.asIntConst(src2), asIntReg(scratch1)).emit(masm);
                     new Mulx(asIntReg(scratch1), crb.asIntConst(src2), asIntReg(scratch2)).emit(masm);
+                    delaySlotLir.emitForDelay(crb, masm);
                     new Sub(asIntReg(dst), asIntReg(scratch2), asIntReg(dst)).emit(masm);
                     break;
                 case LREM:
                     exceptionOffset = masm.position();
                     new Sdivx(asLongReg(src1), crb.asIntConst(src2), asLongReg(scratch1)).emit(masm);
                     new Mulx(asLongReg(scratch1), crb.asIntConst(src2), asLongReg(scratch2)).emit(masm);
+                    delaySlotLir.emitForDelay(crb, masm);
                     new Sub(asLongReg(src1), asLongReg(scratch2), asLongReg(dst)).emit(masm);
                     break;
                 case LUREM:
                     exceptionOffset = masm.position();
                     new Udivx(asLongReg(src1), crb.asIntConst(src2), asLongReg(scratch1)).emit(masm);
                     new Mulx(asLongReg(scratch1), crb.asIntConst(src2), asLongReg(scratch2)).emit(masm);
+                    delaySlotLir.emitForDelay(crb, masm);
                     new Sub(asLongReg(src1), asLongReg(scratch2), asLongReg(dst)).emit(masm);
                     break;
                 case IUREM:
@@ -438,6 +495,7 @@
                     exceptionOffset = masm.position();
                     new Sdivx(asLongReg(srcLeft), asLongReg(src2), asLongReg(scratch1)).emit(masm);
                     new Mulx(asLongReg(scratch1), asLongReg(src2), asLongReg(scratch1)).emit(masm);
+                    delaySlotLir.emitForDelay(crb, masm);
                     new Sub(asLongReg(srcLeft), asLongReg(scratch1), asLongReg(dst)).emit(masm);
                     break;
                 case LUREM:
@@ -450,6 +508,7 @@
                     exceptionOffset = masm.position();
                     new Udivx(asLongReg(srcLeft), asLongReg(src2), asLongReg(scratch1)).emit(masm);
                     new Mulx(asLongReg(scratch1), asLongReg(src2), asLongReg(scratch1)).emit(masm);
+                    delaySlotLir.emitForDelay(crb, masm);
                     new Sub(asLongReg(srcLeft), asLongReg(scratch1), asLongReg(dst)).emit(masm);
                     break;
                 case IREM:
@@ -464,6 +523,7 @@
                     exceptionOffset = masm.position();
                     new Sdivx(asIntReg(scratch1), asIntReg(scratch2), asIntReg(dst)).emit(masm);
                     new Mulx(asIntReg(dst), asIntReg(scratch2), asIntReg(dst)).emit(masm);
+                    delaySlotLir.emitForDelay(crb, masm);
                     new Sub(asIntReg(scratch1), asIntReg(dst), asIntReg(dst)).emit(masm);
                     break;
                 case IUREM:
@@ -474,6 +534,7 @@
                     exceptionOffset = masm.position();
                     new Udivx(asIntReg(scratch1), asIntReg(dst), asIntReg(scratch2)).emit(masm);
                     new Mulx(asIntReg(scratch2), asIntReg(dst), asIntReg(dst)).emit(masm);
+                    delaySlotLir.emitForDelay(crb, masm);
                     new Sub(asIntReg(scratch1), asIntReg(dst), asIntReg(dst)).emit(masm);
                     break;
                 default:
@@ -488,60 +549,76 @@
         }
     }
 
-    public static void emitUnary(CompilationResultBuilder crb, SPARCAssembler masm, SPARCArithmetic opcode, Value dst, Value src, LIRFrameState info) {
+    public static void emitUnary(CompilationResultBuilder crb, SPARCMacroAssembler masm, SPARCArithmetic opcode, Value dst, Value src, LIRFrameState info, DelaySlotHolder delaySlotLir) {
         int exceptionOffset = -1;
         Label notOrdered = new Label();
         switch (opcode) {
             case INEG:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Neg(asIntReg(src), asIntReg(dst)).emit(masm);
                 break;
             case LNEG:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Neg(asLongReg(src), asLongReg(dst)).emit(masm);
                 break;
             case INOT:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Not(asIntReg(src), asIntReg(dst)).emit(masm);
                 break;
             case LNOT:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Not(asLongReg(src), asLongReg(dst)).emit(masm);
                 break;
             case D2F:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fdtos(asDoubleReg(src), asFloatReg(dst)).emit(masm);
                 break;
             case L2D:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fxtod(asDoubleReg(src), asDoubleReg(dst)).emit(masm);
                 break;
             case L2F:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fxtos(asDoubleReg(src), asFloatReg(dst)).emit(masm);
                 break;
             case I2D:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fitod(asFloatReg(src), asDoubleReg(dst)).emit(masm);
                 break;
             case I2L:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Signx(asIntReg(src), asLongReg(dst)).emit(masm);
                 break;
             case L2I:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Signx(asLongReg(src), asIntReg(dst)).emit(masm);
                 break;
             case B2L:
                 new Sllx(asIntReg(src), 56, asLongReg(dst)).emit(masm);
+                delaySlotLir.emitForDelay(crb, masm);
                 new Srax(asLongReg(dst), 56, asLongReg(dst)).emit(masm);
                 break;
             case B2I:
                 new Sllx(asIntReg(src), 56, asIntReg(dst)).emit(masm);
+                delaySlotLir.emitForDelay(crb, masm);
                 new Srax(asIntReg(dst), 56, asIntReg(dst)).emit(masm);
                 break;
             case S2L:
                 new Sllx(asIntReg(src), 48, asLongReg(dst)).emit(masm);
+                delaySlotLir.emitForDelay(crb, masm);
                 new Srax(asLongReg(dst), 48, asLongReg(dst)).emit(masm);
                 break;
             case S2I:
                 new Sllx(asIntReg(src), 48, asIntReg(dst)).emit(masm);
+                delaySlotLir.emitForDelay(crb, masm);
                 new Srax(asIntReg(dst), 48, asIntReg(dst)).emit(masm);
                 break;
             case I2F:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fitos(asFloatReg(src), asFloatReg(dst)).emit(masm);
                 break;
             case F2D:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fstod(asFloatReg(src), asDoubleReg(dst)).emit(masm);
                 break;
             case F2L:
@@ -576,9 +653,11 @@
                 masm.bind(notOrdered);
                 break;
             case FNEG:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fnegs(asFloatReg(src), asFloatReg(dst)).emit(masm);
                 break;
             case DNEG:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Fnegd(asDoubleReg(src), asDoubleReg(dst)).emit(masm);
                 break;
             default:
--- a/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCByteSwapOp.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCByteSwapOp.java	Mon Sep 22 09:21:29 2014 -0700
@@ -35,12 +35,13 @@
 import com.oracle.graal.lir.gen.*;
 
 @Opcode("BSWAP")
-public class SPARCByteSwapOp extends SPARCLIRInstruction {
+public class SPARCByteSwapOp extends SPARCLIRInstruction implements TailDelayedLIRInstruction {
 
     @Def({REG, HINT}) protected Value result;
     @Use({REG}) protected Value input;
     @Temp({REG}) protected Value tempIndex;
     @Use({STACK}) protected StackSlot tmpSlot;
+    private DelaySlotHolder delaySlotLir = DelaySlotHolder.DUMMY;
 
     public SPARCByteSwapOp(LIRGeneratorTool tool, Value result, Value input) {
         this.result = result;
@@ -51,13 +52,14 @@
 
     @Override
     public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-        SPARCMove.move(crb, masm, tmpSlot, input);
+        SPARCMove.move(crb, masm, tmpSlot, input, DelaySlotHolder.DUMMY);
         SPARCAddress addr = (SPARCAddress) crb.asAddress(tmpSlot);
         if (addr.getIndex().equals(Register.None)) {
             Register tempReg = ValueUtil.asLongReg(tempIndex);
             new SPARCMacroAssembler.Setx(addr.getDisplacement(), tempReg, false).emit(masm);
             addr = new SPARCAddress(addr.getBase(), tempReg);
         }
+        delaySlotLir.emitForDelay(crb, masm);
         switch (input.getKind()) {
             case Int:
                 new SPARCAssembler.Lduwa(addr.getBase(), addr.getIndex(), asIntReg(result), Asi.ASI_PRIMARY_LITTLE).emit(masm);
@@ -69,4 +71,8 @@
                 throw GraalInternalError.shouldNotReachHere();
         }
     }
+
+    public void setDelaySlotHolder(DelaySlotHolder holder) {
+        this.delaySlotLir = holder;
+    }
 }
--- a/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCCall.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCCall.java	Mon Sep 22 09:21:29 2014 -0700
@@ -34,6 +34,7 @@
 import com.oracle.graal.asm.sparc.SPARCMacroAssembler.Jmp;
 import com.oracle.graal.asm.sparc.SPARCMacroAssembler.Nop;
 import com.oracle.graal.asm.sparc.SPARCMacroAssembler.Sethix;
+import com.oracle.graal.compiler.common.*;
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.asm.*;
 
@@ -72,15 +73,46 @@
     }
 
     @Opcode("CALL_DIRECT")
-    public static class DirectCallOp extends MethodCallOp {
+    public static class DirectCallOp extends MethodCallOp implements DelaySlotHolder {
+        private boolean emitted = false;
+        private int before = -1;
 
         public DirectCallOp(ResolvedJavaMethod callTarget, Value result, Value[] parameters, Value[] temps, LIRFrameState state) {
             super(callTarget, result, parameters, temps, state);
         }
 
         @Override
-        public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-            directCall(crb, masm, callTarget, null, true, state);
+        public final void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+            if (!emitted) {
+                emitCallPrefixCode(crb, masm);
+                directCall(crb, masm, callTarget, null, true, state);
+            } else {
+                int after = masm.position();
+                if (after - before == 4) {
+                    new Nop().emit(masm);
+                } else if (after - before == 8) {
+                    // everything is fine;
+                } else {
+                    GraalInternalError.shouldNotReachHere("" + (after - before));
+                }
+                after = masm.position();
+                crb.recordDirectCall(before, after, callTarget, state);
+                crb.recordExceptionHandlers(after, state);
+                masm.ensureUniquePC();
+            }
+        }
+
+        @SuppressWarnings("unused")
+        public void emitCallPrefixCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+            //
+        }
+
+        public void emitForDelay(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+            assert !emitted;
+            emitCallPrefixCode(crb, masm);
+            before = masm.position();
+            new Call(0).emit(masm);
+            emitted = true;
         }
     }
 
--- a/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCCompare.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCCompare.java	Mon Sep 22 09:21:29 2014 -0700
@@ -99,14 +99,14 @@
         } else {
             assert isConstant(y);
             switch (opcode) {
+                case LCMP:
+                    assert isSimm13(crb.asLongConst(y));
+                    new Cmp(asLongReg(x), (int) crb.asLongConst(y)).emit(masm);
+                    break;
                 case ICMP:
                     assert isSimm13(crb.asIntConst(y));
                     new Cmp(asIntReg(x), crb.asIntConst(y)).emit(masm);
                     break;
-                case LCMP:
-                    assert isSimm13(crb.asIntConst(y));
-                    new Cmp(asLongReg(x), crb.asIntConst(y)).emit(masm);
-                    break;
                 case ACMP:
                     if (((Constant) y).isNull()) {
                         new Cmp(asObjectReg(x), 0).emit(masm);
--- a/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCControlFlow.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCControlFlow.java	Mon Sep 22 09:21:29 2014 -0700
@@ -35,7 +35,7 @@
 import com.oracle.graal.compiler.common.*;
 import com.oracle.graal.compiler.common.calc.*;
 import com.oracle.graal.lir.*;
-import com.oracle.graal.lir.StandardOp.BlockEndOp;
+import com.oracle.graal.lir.StandardOp.*;
 import com.oracle.graal.lir.SwitchStrategy.BaseSwitchClosure;
 import com.oracle.graal.lir.asm.*;
 import com.oracle.graal.sparc.*;
@@ -62,6 +62,86 @@
         }
     }
 
+    public static class CompareBranchOp extends SPARCLIRInstruction implements BlockEndOp, DelaySlotHolder {
+
+        private final SPARCCompare opcode;
+        @Use({REG}) protected Value x;
+        @Use({REG, CONST}) protected Value y;
+        protected final Condition condition;
+        protected final LabelRef trueDestination;
+        protected final LabelRef falseDestination;
+        protected final Kind kind;
+        protected final boolean unorderedIsTrue;
+        private boolean emitted = false;
+        private double trueDestinationProbability;
+
+        public CompareBranchOp(SPARCCompare opcode, Value x, Value y, Condition condition, LabelRef trueDestination, LabelRef falseDestination, Kind kind, boolean unorderedIsTrue,
+                        double trueDestinationProbability) {
+            this.opcode = opcode;
+            this.x = x;
+            this.y = y;
+            this.condition = condition;
+            this.trueDestination = trueDestination;
+            this.falseDestination = falseDestination;
+            this.kind = kind;
+            this.unorderedIsTrue = unorderedIsTrue;
+            this.trueDestinationProbability = trueDestinationProbability;
+        }
+
+        @Override
+        public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+            if (!emitted) {
+                SPARCCompare.emit(crb, masm, opcode, x, y);
+                emitted = emitBranch(crb, masm, true);
+            }
+            assert emitted;
+        }
+
+        public void emitForDelay(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+            SPARCCompare.emit(crb, masm, opcode, x, y);
+            emitted = emitBranch(crb, masm, false);
+        }
+
+        public boolean emitBranch(CompilationResultBuilder crb, SPARCMacroAssembler masm, boolean withDelayedNop) {
+            Label actualTarget;
+            Condition actualCondition;
+            boolean branchOnUnordered;
+            boolean needJump;
+            boolean predictBranchTaken;
+            if (crb.isSuccessorEdge(trueDestination)) {
+                actualCondition = condition != null ? condition.negate() : null;
+                actualTarget = falseDestination.label();
+                predictBranchTaken = trueDestinationProbability < .5; // false branch needs jump
+                needJump = false;
+                branchOnUnordered = !unorderedIsTrue;
+            } else {
+                actualCondition = condition;
+                actualTarget = trueDestination.label();
+                needJump = !crb.isSuccessorEdge(falseDestination);
+                predictBranchTaken = trueDestinationProbability >= .5;
+                branchOnUnordered = unorderedIsTrue;
+            }
+            if (!withDelayedNop && needJump) {
+                // We cannot make use of the delay slot when we jump in true-case and false-case
+                return false;
+            }
+            if (kind == Kind.Double || kind == Kind.Float) {
+                emitFloatBranch(masm, actualTarget, actualCondition, branchOnUnordered);
+            } else {
+                CC cc = kind == Kind.Int ? CC.Icc : CC.Xcc;
+                assert actualCondition != null;
+                SPARCControlFlow.emitBranch(masm, actualTarget, actualCondition, cc, predictBranchTaken);
+            }
+            if (withDelayedNop) {
+                new Nop().emit(masm);  // delay slot
+            }
+            if (needJump) {
+                masm.jmp(falseDestination.label());
+            }
+            return true; // emitted
+        }
+    }
+
     public static class BranchOp extends SPARCLIRInstruction implements StandardOp.BranchOp {
         // TODO: Conditioncode/flag handling needs to be improved;
         protected final Condition condition;
@@ -121,25 +201,25 @@
             }
             assert kind == Kind.Int || kind == Kind.Long || kind == Kind.Object || kind == Kind.Double || kind == Kind.Float : kind;
             if (kind == Kind.Double || kind == Kind.Float) {
-                emitFloatCompare(masm, actualTarget, actualCondition, branchOnUnordered);
+                emitFloatBranch(masm, actualTarget, actualCondition, branchOnUnordered);
             } else {
                 CC cc = kind == Kind.Int ? CC.Icc : CC.Xcc;
                 if (actualCondition != null) {
-                    emitCompare(masm, actualTarget, actualCondition, cc);
+                    emitBranch(masm, actualTarget, actualCondition, cc, false);
                 } else if (actualConditionFlag != null) {
-                    emitCompare(masm, actualTarget, actualConditionFlag);
+                    emitBranch(masm, actualTarget, actualConditionFlag);
                 } else {
                     GraalInternalError.shouldNotReachHere();
                 }
-                new Nop().emit(masm);  // delay slot
             }
+            new Nop().emit(masm);  // delay slot
             if (needJump) {
                 masm.jmp(falseDestination.label());
             }
         }
     }
 
-    private static void emitFloatCompare(SPARCMacroAssembler masm, Label target, Condition actualCondition, boolean branchOnUnordered) {
+    private static void emitFloatBranch(SPARCMacroAssembler masm, Label target, Condition actualCondition, boolean branchOnUnordered) {
         switch (actualCondition) {
             case EQ:
                 if (branchOnUnordered) {
@@ -187,44 +267,44 @@
             default:
                 throw GraalInternalError.shouldNotReachHere();
         }
-        new Nop().emit(masm);
     }
 
-    private static void emitCompare(SPARCMacroAssembler masm, Label target, ConditionFlag actualCondition) {
+    private static void emitBranch(SPARCMacroAssembler masm, Label target, ConditionFlag actualCondition) {
         new Fmt00b(false, actualCondition, Op2s.Br, target).emit(masm);
     }
 
-    private static void emitCompare(SPARCMacroAssembler masm, Label target, Condition actualCondition, CC cc) {
+    private static void emitBranch(SPARCMacroAssembler masm, Label target, Condition actualCondition, CC cc, boolean predictTaken) {
+
         switch (actualCondition) {
             case EQ:
-                new Bpe(cc, target).emit(masm);
+                new Bpe(cc, false, predictTaken, target).emit(masm);
                 break;
             case NE:
-                new Bpne(cc, target).emit(masm);
+                new Bpne(cc, false, predictTaken, target).emit(masm);
                 break;
             case BT:
-                new Bplu(cc, target).emit(masm);
+                new Bplu(cc, false, predictTaken, target).emit(masm);
                 break;
             case LT:
-                new Bpl(cc, target).emit(masm);
+                new Bpl(cc, false, predictTaken, target).emit(masm);
                 break;
             case BE:
-                new Bpleu(cc, target).emit(masm);
+                new Bpleu(cc, false, predictTaken, target).emit(masm);
                 break;
             case LE:
-                new Bple(cc, target).emit(masm);
+                new Bple(cc, false, predictTaken, target).emit(masm);
                 break;
             case GE:
-                new Bpge(cc, target).emit(masm);
+                new Bpge(cc, false, predictTaken, target).emit(masm);
                 break;
             case AE:
-                new Bpgeu(cc, target).emit(masm);
+                new Bpgeu(cc, false, predictTaken, target).emit(masm);
                 break;
             case GT:
-                new Bpg(cc, target).emit(masm);
+                new Bpg(cc, false, predictTaken, target).emit(masm);
                 break;
             case AT:
-                new Bpgu(cc, target).emit(masm);
+                new Bpgu(cc, false, predictTaken, target).emit(masm);
                 break;
             default:
                 throw GraalInternalError.shouldNotReachHere();
@@ -273,18 +353,18 @@
                                 new Setx(lc, asIntReg(scratch)).emit(masm);
                                 new Cmp(keyRegister, asIntReg(scratch)).emit(masm);
                             }
-                            emitCompare(masm, target, condition, CC.Icc);
+                            emitBranch(masm, target, condition, CC.Icc, false);
                             break;
                         case Long: {
-                            SPARCMove.move(crb, masm, scratch, keyConstants[index]);
+                            SPARCMove.move(crb, masm, scratch, keyConstants[index], DelaySlotHolder.DUMMY);
                             new Cmp(keyRegister, asLongReg(scratch)).emit(masm);
-                            emitCompare(masm, target, condition, CC.Xcc);
+                            emitBranch(masm, target, condition, CC.Xcc, false);
                             break;
                         }
                         case Object: {
-                            SPARCMove.move(crb, masm, scratch, keyConstants[index]);
+                            SPARCMove.move(crb, masm, scratch, keyConstants[index], DelaySlotHolder.DUMMY);
                             new Cmp(keyRegister, asObjectReg(scratch)).emit(masm);
-                            emitCompare(masm, target, condition, CC.Ptrcc);
+                            emitBranch(masm, target, condition, CC.Ptrcc, false);
                             break;
                         }
                         default:
@@ -374,157 +454,82 @@
         private final Kind kind;
 
         @Def({REG, HINT}) protected Value result;
-        @Alive({REG}) protected Value trueValue;
-        @Use({REG, STACK, CONST}) protected Value falseValue;
+        @Use({REG, CONST}) protected Value trueValue;
+        @Use({REG, CONST}) protected Value falseValue;
 
         private final ConditionFlag condition;
+        private final CC cc;
 
-        public CondMoveOp(Kind kind, Variable result, Condition condition, Variable trueValue, Value falseValue) {
+        public CondMoveOp(Kind kind, Variable result, CC cc, ConditionFlag condition, Value trueValue, Value falseValue) {
             this.kind = kind;
             this.result = result;
-            this.condition = intCond(condition);
+            this.condition = condition;
             this.trueValue = trueValue;
             this.falseValue = falseValue;
-        }
-
-        @Override
-        public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-            // check that we don't overwrite an input operand before it is used.
-            assert !result.equals(trueValue);
-
-            SPARCMove.move(crb, masm, result, falseValue);
-            cmove(crb, masm, kind, result, condition, trueValue);
-        }
-    }
-
-    @Opcode("CMOVE")
-    public static class FloatCondMoveOp extends SPARCLIRInstruction {
-
-        private final Kind kind;
-
-        @Def({REG}) protected Value result;
-        @Alive({REG}) protected Value trueValue;
-        @Alive({REG}) protected Value falseValue;
-
-        private final ConditionFlag condition;
-        private final boolean unorderedIsTrue;
-
-        public FloatCondMoveOp(Kind kind, Variable result, Condition condition, boolean unorderedIsTrue, Variable trueValue, Variable falseValue) {
-            this.kind = kind;
-            this.result = result;
-            this.condition = floatCond(condition);
-            this.unorderedIsTrue = unorderedIsTrue;
-            this.trueValue = trueValue;
-            this.falseValue = falseValue;
+            this.cc = cc;
         }
 
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
             // check that we don't overwrite an input operand before it is used.
-            assert !result.equals(trueValue);
-
-            SPARCMove.move(crb, masm, result, trueValue);
-            cmove(crb, masm, kind, result, condition, falseValue);
-            // TODO: This may be omitted, when doing the right check beforehand (There are
-            // instructions which control the unordered behavior as well)
-            if (!unorderedIsTrue) {
-                cmove(crb, masm, kind, result, ConditionFlag.F_Unordered, falseValue);
+            // assert !result.equals(trueValue);
+            if (result.equals(trueValue)) { // We have the true value in place, do he opposite
+                cmove(masm, cc, kind, result, condition.negate(), falseValue);
+            } else if (result.equals(falseValue)) {
+                cmove(masm, cc, kind, result, condition, trueValue);
+            } else { // We have to move one of the input values to the result
+                ConditionFlag actualCondition = condition;
+                Value actualTrueValue = trueValue;
+                Value actualFalseValue = falseValue;
+                if (isConstant(falseValue) && isSimm11(asConstant(falseValue))) {
+                    actualCondition = condition.negate();
+                    actualTrueValue = falseValue;
+                    actualFalseValue = trueValue;
+                }
+                SPARCMove.move(crb, masm, result, actualFalseValue, DelaySlotHolder.DUMMY);
+                cmove(masm, cc, kind, result, actualCondition, actualTrueValue);
             }
         }
     }
 
-    private static void cmove(CompilationResultBuilder crb, SPARCMacroAssembler masm, Kind kind, Value result, ConditionFlag cond, Value other) {
-        if (!isRegister(other)) {
-            SPARCMove.move(crb, masm, result, other);
-            throw GraalInternalError.shouldNotReachHere("result should be scratch");
-        }
-        assert !asRegister(other).equals(asRegister(result)) : "other already overwritten by previous move";
+    private static void cmove(SPARCMacroAssembler masm, CC cc, Kind kind, Value result, ConditionFlag cond, Value other) {
         switch (kind) {
             case Int:
-                new Movcc(cond, CC.Icc, asRegister(other), asRegister(result)).emit(masm);
+                if (isConstant(other)) {
+                    int constant;
+                    if (asConstant(other).isNull()) {
+                        constant = 0;
+                    } else {
+                        constant = asConstant(other).asInt();
+                    }
+                    new Movcc(cond, cc, constant, asRegister(result)).emit(masm);
+                } else {
+                    new Movcc(cond, cc, asRegister(other), asRegister(result)).emit(masm);
+                }
                 break;
             case Long:
             case Object:
-                new Movcc(cond, CC.Xcc, asRegister(other), asRegister(result)).emit(masm);
+                if (isConstant(other)) {
+                    long constant;
+                    if (asConstant(other).isNull()) {
+                        constant = 0;
+                    } else {
+                        constant = asConstant(other).asLong();
+                    }
+                    assert isSimm11(constant);
+                    new Movcc(cond, cc, (int) constant, asRegister(result)).emit(masm);
+                } else {
+                    new Movcc(cond, cc, asRegister(other), asRegister(result)).emit(masm);
+                }
                 break;
             case Float:
+                new Fmovscc(cond, cc, asFloatReg(other), asFloatReg(result)).emit(masm);
+                break;
             case Double:
-                switch (cond) {
-                    case Equal:
-                        new Fbne(true, 2 * 4).emit(masm);
-                        break;
-                    case Greater:
-                        new Fble(true, 2 * 4).emit(masm);
-                        break;
-                    case GreaterEqual:
-                        new Fbl(true, 2 * 4).emit(masm);
-                        break;
-                    case Less:
-                        new Fbge(true, 2 * 4).emit(masm);
-                        break;
-                    case LessEqual:
-                        new Fbg(true, 2 * 4).emit(masm);
-                        break;
-                    case F_Ordered:
-                        new Fbo(true, 2 * 4).emit(masm);
-                        break;
-                    case F_Unordered:
-                        new Fbu(true, 2 * 4).emit(masm);
-                        break;
-                    default:
-                        GraalInternalError.shouldNotReachHere("Unknown condition code " + cond);
-                        break;
-                }
-                SPARCMove.move(crb, masm, result, other);
+                new Fmovdcc(cond, cc, asDoubleReg(other), asDoubleReg(result)).emit(masm);
                 break;
             default:
                 throw GraalInternalError.shouldNotReachHere();
         }
     }
-
-    private static ConditionFlag intCond(Condition cond) {
-        switch (cond) {
-            case EQ:
-                return ConditionFlag.Equal;
-            case NE:
-                return ConditionFlag.NotEqual;
-            case BT:
-                return ConditionFlag.LessUnsigned;
-            case LT:
-                return ConditionFlag.Less;
-            case BE:
-                return ConditionFlag.LessEqualUnsigned;
-            case LE:
-                return ConditionFlag.LessEqual;
-            case AE:
-                return ConditionFlag.GreaterEqualUnsigned;
-            case GE:
-                return ConditionFlag.GreaterEqual;
-            case AT:
-                return ConditionFlag.GreaterUnsigned;
-            case GT:
-                return ConditionFlag.Greater;
-            default:
-                throw GraalInternalError.shouldNotReachHere("Unimplemented for: " + cond);
-        }
-    }
-
-    private static ConditionFlag floatCond(Condition cond) {
-        switch (cond) {
-            case EQ:
-                return ConditionFlag.Equal;
-            case NE:
-                return ConditionFlag.NotEqual;
-            case LT:
-                return ConditionFlag.Less;
-            case LE:
-                return ConditionFlag.LessEqual;
-            case GE:
-                return ConditionFlag.GreaterEqual;
-            case GT:
-                return ConditionFlag.Greater;
-            default:
-                throw GraalInternalError.shouldNotReachHere("Unimplemented for " + cond);
-        }
-    }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCJumpOp.java	Mon Sep 22 09:21:29 2014 -0700
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.lir.sparc;
+
+import com.oracle.graal.asm.sparc.SPARCAssembler.Bpa;
+import com.oracle.graal.asm.sparc.*;
+import com.oracle.graal.asm.sparc.SPARCMacroAssembler.Nop;
+import com.oracle.graal.lir.*;
+import com.oracle.graal.lir.StandardOp.JumpOp;
+import com.oracle.graal.lir.asm.*;
+
+public class SPARCJumpOp extends JumpOp implements DelaySlotHolder {
+    private boolean emitDone = false;
+
+    public SPARCJumpOp(LabelRef destination) {
+        super(destination);
+    }
+
+    public void emitForDelay(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+        assert !emitDone;
+        if (!crb.isSuccessorEdge(destination())) {
+            new Bpa(destination().label()).emit(masm);
+        }
+        emitDone = true;
+    }
+
+    @Override
+    public void emitCode(CompilationResultBuilder crb) {
+        if (!emitDone) {
+            SPARCMacroAssembler masm = (SPARCMacroAssembler) crb.asm;
+            if (!crb.isSuccessorEdge(destination())) {
+                new Bpa(destination().label()).emit(masm);
+                new Nop().emit(masm);
+            }
+        }
+    }
+}
--- a/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCMathIntrinsicOp.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCMathIntrinsicOp.java	Mon Sep 22 09:21:29 2014 -0700
@@ -31,7 +31,7 @@
 import com.oracle.graal.lir.*;
 import com.oracle.graal.lir.asm.*;
 
-public class SPARCMathIntrinsicOp extends SPARCLIRInstruction {
+public class SPARCMathIntrinsicOp extends SPARCLIRInstruction implements TailDelayedLIRInstruction {
 
     public enum IntrinsicOpcode {
         SQRT,
@@ -46,6 +46,7 @@
     @Opcode private final IntrinsicOpcode opcode;
     @Def protected Value result;
     @Use protected Value input;
+    private DelaySlotHolder delaySlotHolder = DelaySlotHolder.DUMMY;
 
     public SPARCMathIntrinsicOp(IntrinsicOpcode opcode, Value result, Value input) {
         this.opcode = opcode;
@@ -56,6 +57,7 @@
     @Override
     public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
         Kind inputKind = (Kind) input.getLIRKind().getPlatformKind();
+        delaySlotHolder.emitForDelay(crb, masm);
         switch (opcode) {
             case SQRT:
                 switch (inputKind) {
@@ -90,4 +92,9 @@
                 throw GraalInternalError.shouldNotReachHere();
         }
     }
+
+    public void setDelaySlotHolder(DelaySlotHolder holder) {
+        this.delaySlotHolder = holder;
+    }
+
 }
--- a/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCMove.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCMove.java	Mon Sep 22 09:21:29 2014 -0700
@@ -43,10 +43,11 @@
 public class SPARCMove {
 
     @Opcode("MOVE_TOREG")
-    public static class MoveToRegOp extends SPARCLIRInstruction implements MoveOp {
+    public static class MoveToRegOp extends SPARCLIRInstruction implements MoveOp, TailDelayedLIRInstruction {
 
         @Def({REG, HINT}) protected AllocatableValue result;
         @Use({REG, STACK, CONST}) protected Value input;
+        private DelaySlotHolder delaySlotLir = DelaySlotHolder.DUMMY;
 
         public MoveToRegOp(AllocatableValue result, Value input) {
             this.result = result;
@@ -55,7 +56,12 @@
 
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-            move(crb, masm, getResult(), getInput());
+            move(crb, masm, getResult(), getInput(), delaySlotLir);
+        }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            assert delaySlotLir == DelaySlotHolder.DUMMY : "Should be set only once";
+            this.delaySlotLir = holder;
         }
 
         @Override
@@ -70,10 +76,11 @@
     }
 
     @Opcode("MOVE_FROMREG")
-    public static class MoveFromRegOp extends SPARCLIRInstruction implements MoveOp {
+    public static class MoveFromRegOp extends SPARCLIRInstruction implements MoveOp, TailDelayedLIRInstruction {
 
         @Def({REG, STACK}) protected AllocatableValue result;
         @Use({REG, CONST, HINT}) protected Value input;
+        private DelaySlotHolder delaySlotLir = DelaySlotHolder.DUMMY;
 
         public MoveFromRegOp(AllocatableValue result, Value input) {
             this.result = result;
@@ -82,7 +89,11 @@
 
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-            move(crb, masm, getResult(), getInput());
+            move(crb, masm, getResult(), getInput(), delaySlotLir);
+        }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotLir = holder;
         }
 
         @Override
@@ -100,11 +111,12 @@
      * Move between floating-point and general purpose register domain (WITHOUT VIS3)
      */
     @Opcode("MOVE")
-    public static class MoveFpGp extends SPARCLIRInstruction implements MoveOp {
+    public static class MoveFpGp extends SPARCLIRInstruction implements MoveOp, TailDelayedLIRInstruction {
 
         @Def({REG}) protected AllocatableValue result;
         @Use({REG}) protected AllocatableValue input;
         @Use({STACK}) protected StackSlot temp;
+        private DelaySlotHolder delaySlotLir = DelaySlotHolder.DUMMY;
 
         public MoveFpGp(AllocatableValue result, AllocatableValue input, StackSlot temp) {
             super();
@@ -122,6 +134,10 @@
             return result;
         }
 
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotLir = holder;
+        }
+
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
             Kind inputKind = (Kind) input.getPlatformKind();
@@ -159,6 +175,7 @@
                     default:
                         GraalInternalError.shouldNotReachHere();
                 }
+                delaySlotLir.emitForDelay(crb, masm);
                 switch (resultKind) {
                     case Long:
                         new Ldx(tempAddress, asLongReg(result)).emit(masm);
@@ -193,10 +210,11 @@
      * Move between floating-point and general purpose register domain (WITH VIS3)
      */
     @Opcode("MOVE")
-    public static class MoveFpGpVIS3 extends SPARCLIRInstruction implements MoveOp {
+    public static class MoveFpGpVIS3 extends SPARCLIRInstruction implements MoveOp, TailDelayedLIRInstruction {
 
         @Def({REG}) protected AllocatableValue result;
         @Use({REG}) protected AllocatableValue input;
+        private DelaySlotHolder delayHolder = DelaySlotHolder.DUMMY;
 
         public MoveFpGpVIS3(AllocatableValue result, AllocatableValue input) {
             super();
@@ -216,6 +234,7 @@
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
             Kind inputKind = (Kind) input.getPlatformKind();
             Kind resultKind = (Kind) result.getPlatformKind();
+            delayHolder.emitForDelay(crb, masm);
             if (resultKind == Float) {
                 if (inputKind == Int || inputKind == Short || inputKind == Char || inputKind == Byte) {
                     new Movwtos(asIntReg(input), asFloatReg(result)).emit(masm);
@@ -242,6 +261,10 @@
                 }
             }
         }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delayHolder = holder;
+        }
     }
 
     public abstract static class MemOp extends SPARCLIRInstruction implements ImplicitNullCheck {
@@ -256,14 +279,11 @@
             this.state = state;
         }
 
-        protected abstract void emitMemAccess(SPARCMacroAssembler masm);
+        protected abstract void emitMemAccess(CompilationResultBuilder crb, SPARCMacroAssembler masm);
 
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
-            if (state != null) {
-                crb.recordImplicitException(masm.position(), state);
-            }
-            emitMemAccess(masm);
+            emitMemAccess(crb, masm);
         }
 
         public boolean makeNullCheckFor(Value value, LIRFrameState nullCheckState, int implicitNullCheckLimit) {
@@ -275,9 +295,10 @@
         }
     }
 
-    public static class LoadOp extends MemOp {
+    public static class LoadOp extends MemOp implements TailDelayedLIRInstruction {
 
         @Def({REG}) protected AllocatableValue result;
+        DelaySlotHolder delaySlotHolder = DelaySlotHolder.DUMMY;
 
         public LoadOp(Kind kind, AllocatableValue result, SPARCAddressValue address, LIRFrameState state) {
             super(kind, address, state);
@@ -285,11 +306,15 @@
         }
 
         @Override
-        public void emitMemAccess(SPARCMacroAssembler masm) {
+        public void emitMemAccess(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
             try (SPARCScratchRegister sc = SPARCScratchRegister.get()) {
                 Register scratch = sc.getRegister();
                 final SPARCAddress addr = generateSimm13OffsetLoad(address.toAddress(), masm, scratch);
                 final Register dst = asRegister(result);
+                delaySlotHolder.emitForDelay(crb, masm);
+                if (state != null) {
+                    crb.recordImplicitException(masm.position(), state);
+                }
                 switch (kind) {
                     case Boolean:
                     case Byte:
@@ -321,12 +346,17 @@
                 }
             }
         }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotHolder = holder;
+        }
     }
 
-    public static class LoadAddressOp extends SPARCLIRInstruction {
+    public static class LoadAddressOp extends SPARCLIRInstruction implements TailDelayedLIRInstruction {
 
         @Def({REG}) protected AllocatableValue result;
         @Use({COMPOSITE, UNINITIALIZED}) protected SPARCAddressValue addressValue;
+        private DelaySlotHolder delaySlotHolder = DelaySlotHolder.DUMMY;
 
         public LoadAddressOp(AllocatableValue result, SPARCAddressValue address) {
             this.result = result;
@@ -336,7 +366,11 @@
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
             SPARCAddress address = addressValue.toAddress();
-            loadEffectiveAddress(address, asLongReg(result), masm);
+            loadEffectiveAddress(crb, masm, address, asLongReg(result), delaySlotHolder);
+        }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotHolder = holder;
         }
     }
 
@@ -375,10 +409,11 @@
         }
     }
 
-    public static class NullCheckOp extends SPARCLIRInstruction implements NullCheck {
+    public static class NullCheckOp extends SPARCLIRInstruction implements NullCheck, TailDelayedLIRInstruction {
 
         @Use({REG}) protected AllocatableValue input;
         @State protected LIRFrameState state;
+        private DelaySlotHolder delaySlotHolder = DelaySlotHolder.DUMMY;
 
         public NullCheckOp(Variable input, LIRFrameState state) {
             this.input = input;
@@ -387,6 +422,7 @@
 
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
+            delaySlotHolder.emitForDelay(crb, masm);
             crb.recordImplicitException(masm.position(), state);
             new Ldx(new SPARCAddress(asRegister(input), 0), r0).emit(masm);
         }
@@ -398,6 +434,10 @@
         public LIRFrameState getState() {
             return state;
         }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotHolder = holder;
+        }
     }
 
     @Opcode("CAS")
@@ -420,10 +460,11 @@
         }
     }
 
-    public static class StackLoadAddressOp extends SPARCLIRInstruction {
+    public static class StackLoadAddressOp extends SPARCLIRInstruction implements TailDelayedLIRInstruction {
 
         @Def({REG}) protected AllocatableValue result;
         @Use({STACK, UNINITIALIZED}) protected StackSlot slot;
+        private DelaySlotHolder delaySlotHolder = DelaySlotHolder.DUMMY;
 
         public StackLoadAddressOp(AllocatableValue result, StackSlot slot) {
             this.result = result;
@@ -433,27 +474,36 @@
         @Override
         public void emitCode(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
             SPARCAddress address = (SPARCAddress) crb.asAddress(slot);
-            loadEffectiveAddress(address, asLongReg(result), masm);
+            loadEffectiveAddress(crb, masm, address, asLongReg(result), delaySlotHolder);
+        }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotHolder = holder;
         }
     }
 
-    private static void loadEffectiveAddress(SPARCAddress address, Register result, SPARCMacroAssembler masm) {
+    private static void loadEffectiveAddress(CompilationResultBuilder crb, SPARCMacroAssembler masm, SPARCAddress address, Register result, DelaySlotHolder delaySlotHolder) {
         if (address.getIndex().equals(Register.None)) {
             if (isSimm13(address.getDisplacement())) {
+                delaySlotHolder.emitForDelay(crb, masm);
                 new Add(address.getBase(), address.getDisplacement(), result).emit(masm);
             } else {
                 assert result.encoding() != address.getBase().encoding();
                 new Setx(address.getDisplacement(), result).emit(masm);
+                // No relocation, therefore, the add can be delayed as well
+                delaySlotHolder.emitForDelay(crb, masm);
                 new Add(address.getBase(), result, result).emit(masm);
             }
         } else {
+            delaySlotHolder.emitForDelay(crb, masm);
             new Add(address.getBase(), address.getIndex(), result).emit(masm);
         }
     }
 
-    public static class StoreOp extends MemOp {
+    public static class StoreOp extends MemOp implements TailDelayedLIRInstruction {
 
         @Use({REG}) protected AllocatableValue input;
+        DelaySlotHolder delaySlotHolder = DelaySlotHolder.DUMMY;
 
         public StoreOp(Kind kind, SPARCAddressValue address, AllocatableValue input, LIRFrameState state) {
             super(kind, address, state);
@@ -461,11 +511,15 @@
         }
 
         @Override
-        public void emitMemAccess(SPARCMacroAssembler masm) {
+        public void emitMemAccess(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
             assert isRegister(input);
             try (SPARCScratchRegister sc = SPARCScratchRegister.get()) {
                 Register scratch = sc.getRegister();
                 SPARCAddress addr = generateSimm13OffsetLoad(address.toAddress(), masm, scratch);
+                delaySlotHolder.emitForDelay(crb, masm);
+                if (state != null) {
+                    crb.recordImplicitException(masm.position(), state);
+                }
                 switch (kind) {
                     case Boolean:
                     case Byte:
@@ -495,11 +549,16 @@
                 }
             }
         }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotHolder = holder;
+        }
     }
 
-    public static class StoreConstantOp extends MemOp {
+    public static class StoreConstantOp extends MemOp implements TailDelayedLIRInstruction {
 
         protected final Constant input;
+        DelaySlotHolder delaySlotHolder = DelaySlotHolder.DUMMY;
 
         public StoreConstantOp(Kind kind, SPARCAddressValue address, Constant input, LIRFrameState state) {
             super(kind, address, state);
@@ -510,10 +569,14 @@
         }
 
         @Override
-        public void emitMemAccess(SPARCMacroAssembler masm) {
+        public void emitMemAccess(CompilationResultBuilder crb, SPARCMacroAssembler masm) {
             try (SPARCScratchRegister sc = SPARCScratchRegister.get()) {
                 Register scratch = sc.getRegister();
                 SPARCAddress addr = generateSimm13OffsetLoad(address.toAddress(), masm, scratch);
+                delaySlotHolder.emitForDelay(crb, masm);
+                if (state != null) {
+                    crb.recordImplicitException(masm.position(), state);
+                }
                 switch (kind) {
                     case Boolean:
                     case Byte:
@@ -538,30 +601,34 @@
                 }
             }
         }
+
+        public void setDelaySlotHolder(DelaySlotHolder holder) {
+            this.delaySlotHolder = holder;
+        }
     }
 
-    public static void move(CompilationResultBuilder crb, SPARCMacroAssembler masm, Value result, Value input) {
+    public static void move(CompilationResultBuilder crb, SPARCMacroAssembler masm, Value result, Value input, DelaySlotHolder delaySlotLir) {
         if (isRegister(input)) {
             if (isRegister(result)) {
-                reg2reg(masm, result, input);
+                reg2reg(crb, masm, result, input, delaySlotLir);
             } else if (isStackSlot(result)) {
-                reg2stack(crb, masm, result, input);
+                reg2stack(crb, masm, result, input, delaySlotLir);
             } else {
                 throw GraalInternalError.shouldNotReachHere();
             }
         } else if (isStackSlot(input)) {
             if (isRegister(result)) {
-                stack2reg(crb, masm, result, input);
+                stack2reg(crb, masm, result, input, delaySlotLir);
             } else {
                 throw GraalInternalError.shouldNotReachHere();
             }
         } else if (isConstant(input)) {
             Constant constant = asConstant(input);
             if (isRegister(result)) {
-                const2reg(crb, masm, result, constant);
+                const2reg(crb, masm, result, constant, delaySlotLir);
             } else if (isStackSlot(result)) {
                 if (constant.isDefaultForKind() || constant.isNull()) {
-                    reg2stack(crb, masm, result, g0.asValue(LIRKind.derive(input)));
+                    reg2stack(crb, masm, result, g0.asValue(LIRKind.derive(input)), delaySlotLir);
                 } else {
                     try (SPARCScratchRegister sc = SPARCScratchRegister.get()) {
                         Register scratch = sc.getRegister();
@@ -571,7 +638,7 @@
                         } else {
                             new Setx(value, scratch).emit(masm);
                         }
-                        reg2stack(crb, masm, result, scratch.asValue(LIRKind.derive(input)));
+                        reg2stack(crb, masm, result, scratch.asValue(LIRKind.derive(input)), delaySlotLir);
                     }
                 }
             } else {
@@ -582,7 +649,7 @@
         }
     }
 
-    private static void reg2reg(SPARCAssembler masm, Value result, Value input) {
+    private static void reg2reg(CompilationResultBuilder crb, SPARCMacroAssembler masm, Value result, Value input, DelaySlotHolder delaySlotLir) {
         final Register src = asRegister(input);
         final Register dst = asRegister(result);
         if (src.equals(dst)) {
@@ -596,6 +663,7 @@
             case Int:
             case Long:
             case Object:
+                delaySlotLir.emitForDelay(crb, masm);
                 new Mov(src, dst).emit(masm);
                 break;
             case Float:
@@ -637,12 +705,13 @@
         }
     }
 
-    private static void reg2stack(CompilationResultBuilder crb, SPARCMacroAssembler masm, Value result, Value input) {
+    private static void reg2stack(CompilationResultBuilder crb, SPARCMacroAssembler masm, Value result, Value input, DelaySlotHolder delaySlotLir) {
         SPARCAddress dst = (SPARCAddress) crb.asAddress(result);
         try (SPARCScratchRegister sc = SPARCScratchRegister.get()) {
             Register scratch = sc.getRegister();
             dst = generateSimm13OffsetLoad(dst, masm, scratch);
             Register src = asRegister(input);
+            delaySlotLir.emitForDelay(crb, masm);
             switch (input.getKind()) {
                 case Byte:
                 case Boolean:
@@ -671,12 +740,13 @@
         }
     }
 
-    private static void stack2reg(CompilationResultBuilder crb, SPARCMacroAssembler masm, Value result, Value input) {
+    private static void stack2reg(CompilationResultBuilder crb, SPARCMacroAssembler masm, Value result, Value input, DelaySlotHolder delaySlotLir) {
         SPARCAddress src = (SPARCAddress) crb.asAddress(input);
         try (SPARCScratchRegister sc = SPARCScratchRegister.get()) {
             Register scratch = sc.getRegister();
             src = generateSimm13OffsetLoad(src, masm, scratch);
             Register dst = asRegister(result);
+            delaySlotLir.emitForDelay(crb, masm);
             switch (input.getKind()) {
                 case Boolean:
                 case Byte:
@@ -707,33 +777,44 @@
         }
     }
 
-    private static void const2reg(CompilationResultBuilder crb, SPARCMacroAssembler masm, Value result, Constant input) {
+    private static void const2reg(CompilationResultBuilder crb, SPARCMacroAssembler masm, Value result, Constant input, DelaySlotHolder delaySlotLir) {
         try (SPARCScratchRegister sc = SPARCScratchRegister.get()) {
             Register scratch = sc.getRegister();
             boolean hasVIS3 = ((SPARC) masm.target.arch).getFeatures().contains(CPUFeature.VIS3);
             switch (input.getKind().getStackKind()) {
                 case Int:
                     if (input.isDefaultForKind()) {
+                        delaySlotLir.emitForDelay(crb, masm);
                         new Clr(asIntReg(result)).emit(masm);
                     } else if (isSimm13(input.asLong())) {
+                        delaySlotLir.emitForDelay(crb, masm);
                         new Or(g0, input.asInt(), asIntReg(result)).emit(masm);
                     } else {
-                        new Setx(input.asLong(), asIntReg(result)).emit(masm);
+                        Setx set = new Setx(input.asLong(), asIntReg(result), false, true);
+                        set.emitFirstPartOfDelayed(masm);
+                        delaySlotLir.emitForDelay(crb, masm);
+                        set.emitSecondPartOfDelayed(masm);
                     }
                     break;
                 case Long:
                     if (input.isDefaultForKind()) {
+                        delaySlotLir.emitForDelay(crb, masm);
                         new Clr(asLongReg(result)).emit(masm);
                     } else if (isSimm13(input.asLong())) {
+                        delaySlotLir.emitForDelay(crb, masm);
                         new Or(g0, (int) input.asLong(), asLongReg(result)).emit(masm);
                     } else {
-                        new Setx(input.asLong(), asLongReg(result)).emit(masm);
+                        Setx setx = new Setx(input.asLong(), asLongReg(result), false, true);
+                        setx.emitFirstPartOfDelayed(masm);
+                        delaySlotLir.emitForDelay(crb, masm);
+                        setx.emitSecondPartOfDelayed(masm);
                     }
                     break;
                 case Float: {
                     float constant = input.asFloat();
                     int constantBits = java.lang.Float.floatToIntBits(constant);
                     if (constantBits == 0) {
+                        delaySlotLir.emitForDelay(crb, masm);
                         new Fzeros(asFloatReg(result)).emit(masm);
                     } else {
                         if (hasVIS3) {
@@ -742,6 +823,7 @@
                             } else {
                                 new Setx(constantBits, scratch, false).emit(masm);
                             }
+                            delaySlotLir.emitForDelay(crb, masm);
                             // Now load the float value
                             new Movwtos(scratch, asFloatReg(result)).emit(masm);
                         } else {
@@ -749,6 +831,7 @@
                             // First load the address into the scratch register
                             new Setx(0, scratch, true).emit(masm);
                             // Now load the float value
+                            delaySlotLir.emitForDelay(crb, masm);
                             new Ldf(scratch, asFloatReg(result)).emit(masm);
                         }
                     }
@@ -758,6 +841,7 @@
                     double constant = input.asDouble();
                     long constantBits = java.lang.Double.doubleToLongBits(constant);
                     if (constantBits == 0) {
+                        delaySlotLir.emitForDelay(crb, masm);
                         new Fzerod(asDoubleReg(result)).emit(masm);
                     } else {
                         if (hasVIS3) {
@@ -766,12 +850,14 @@
                             } else {
                                 new Setx(constantBits, scratch, false).emit(masm);
                             }
+                            delaySlotLir.emitForDelay(crb, masm);
                             // Now load the float value
                             new Movxtod(scratch, asDoubleReg(result)).emit(masm);
                         } else {
                             crb.asDoubleConstRef(input);
                             // First load the address into the scratch register
                             new Setx(0, scratch, true).emit(masm);
+                            delaySlotLir.emitForDelay(crb, masm);
                             // Now load the float value
                             new Lddf(scratch, asDoubleReg(result)).emit(masm);
                         }
@@ -780,16 +866,13 @@
                 }
                 case Object:
                     if (input.isNull()) {
+                        delaySlotLir.emitForDelay(crb, masm);
                         new Clr(asRegister(result)).emit(masm);
                     } else if (crb.target.inlineObjects) {
-                        crb.recordInlineDataInCode(input);
+                        crb.recordInlineDataInCode(input); // relocatable cannot be delayed
                         new Setx(0xDEADDEADDEADDEADL, asRegister(result), true).emit(masm);
                     } else {
-                        Register dst = asRegister(result);
-                        new Rdpc(dst).emit(masm);
-                        crb.asObjectConstRef(input);
-                        new Ldx(new SPARCAddress(dst, 0), dst).emit(masm);
-                        throw GraalInternalError.shouldNotReachHere("the patched offset might be too big for the load");
+                        throw GraalInternalError.unimplemented();
                     }
                     break;
                 default:
--- a/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCSaveRegistersOp.java	Fri Sep 19 09:53:13 2014 -0700
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/SPARCSaveRegistersOp.java	Mon Sep 22 09:21:29 2014 -0700
@@ -70,7 +70,7 @@
 
     private static void saveRegister(CompilationResultBuilder crb, SPARCMacroAssembler masm, StackSlot result, Register register) {
         RegisterValue input = register.asValue(result.getLIRKind());
-        SPARCMove.move(crb, masm, result, input);
+        SPARCMove.move(crb, masm, result, input, DelaySlotHolder.DUMMY);
     }
 
     @Override
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.lir.sparc/src/com/oracle/graal/lir/sparc/TailDelayedLIRInstruction.java	Mon Sep 22 09:21:29 2014 -0700
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.graal.lir.sparc;
+
+import com.oracle.graal.lir.*;
+
+/**
+ * Implementors of this interface are able to place the last instruction to the delay slot of the
+ * given {@link DelaySlotHolder}.
+ *
+ * This LIR instruction is still emitted in the usual way. But when emitting code for this LIR
+ * instruction before the last instruction, it can transfer control over to the delay slot holder
+ * LIR instruction, which then can emit code in order to get to the delay slot.
+ *
+ * Steps for emit delayed code
+ * <ol>
+ * <li>If this instruction contains more than one instruction, emit everything up to the second last
+ * instruction.</li>
+ * <li>Then call the
+ * {@link DelaySlotHolder#emitForDelay(com.oracle.graal.lir.asm.CompilationResultBuilder, com.oracle.graal.asm.sparc.SPARCMacroAssembler)}
+ * to let the delay-slot holder emit its code.</li>
+ * <li>emit the last instruction for this {@link LIRInstruction}</li>
+ * </ol>
+ *
+ * Note: If this instruction decides not to use the delay slot, it can skip the call of
+ * {@link DelaySlotHolder#emitForDelay(com.oracle.graal.lir.asm.CompilationResultBuilder, com.oracle.graal.asm.sparc.SPARCMacroAssembler)}
+ * and the code generation will continue without using the delay slot. Nothing other steps are
+ * required.
+ */
+public interface TailDelayedLIRInstruction {
+    public void setDelaySlotHolder(DelaySlotHolder holder);
+}