changeset 6497:64b7dd2075c0

renamed projects com.oracle.max.asm* to com.oracle.graal.asm*
author Doug Simon <doug.simon@oracle.com>
date Wed, 03 Oct 2012 17:42:12 +0200
parents 16d1411409b4
children 6bc8aa568cb9
files graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64.java graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64AsmOptions.java graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64Assembler.java graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64MacroAssembler.java graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/X86InstructionDecoder.java graal/com.oracle.graal.asm/overview.html graal/com.oracle.graal.asm/src/com/oracle/max/asm/AbstractAssembler.java graal/com.oracle.graal.asm/src/com/oracle/max/asm/AsmOptions.java graal/com.oracle.graal.asm/src/com/oracle/max/asm/Buffer.java graal/com.oracle.graal.asm/src/com/oracle/max/asm/Label.java graal/com.oracle.graal.asm/src/com/oracle/max/asm/NumUtil.java graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64.java graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64AsmOptions.java graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64Assembler.java graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64MacroAssembler.java graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/X86InstructionDecoder.java graal/com.oracle.max.asm/overview.html graal/com.oracle.max.asm/src/com/oracle/max/asm/AbstractAssembler.java graal/com.oracle.max.asm/src/com/oracle/max/asm/AsmOptions.java graal/com.oracle.max.asm/src/com/oracle/max/asm/Buffer.java graal/com.oracle.max.asm/src/com/oracle/max/asm/Label.java graal/com.oracle.max.asm/src/com/oracle/max/asm/NumUtil.java mx/projects src/share/vm/runtime/arguments.cpp
diffstat 24 files changed, 4682 insertions(+), 4682 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm.amd64;
+
+import static com.oracle.graal.api.code.Register.RegisterFlag.*;
+import static com.oracle.graal.api.meta.Kind.*;
+import static com.oracle.max.criutils.MemoryBarriers.*;
+
+import com.oracle.graal.api.code.*;
+import com.oracle.graal.api.code.Register.*;
+
+/**
+ * Represents the AMD64 architecture.
+ */
+public class AMD64 extends Architecture {
+
+    // General purpose CPU registers
+    public static final Register rax = new Register(0, 0, 8, "rax", CPU, RegisterFlag.Byte);
+    public static final Register rcx = new Register(1, 1, 8, "rcx", CPU, RegisterFlag.Byte);
+    public static final Register rdx = new Register(2, 2, 8, "rdx", CPU, RegisterFlag.Byte);
+    public static final Register rbx = new Register(3, 3, 8, "rbx", CPU, RegisterFlag.Byte);
+    public static final Register rsp = new Register(4, 4, 8, "rsp", CPU, RegisterFlag.Byte);
+    public static final Register rbp = new Register(5, 5, 8, "rbp", CPU, RegisterFlag.Byte);
+    public static final Register rsi = new Register(6, 6, 8, "rsi", CPU, RegisterFlag.Byte);
+    public static final Register rdi = new Register(7, 7, 8, "rdi", CPU, RegisterFlag.Byte);
+
+    public static final Register r8  = new Register(8,  8,  8, "r8", CPU, RegisterFlag.Byte);
+    public static final Register r9  = new Register(9,  9,  8, "r9", CPU, RegisterFlag.Byte);
+    public static final Register r10 = new Register(10, 10, 8, "r10", CPU, RegisterFlag.Byte);
+    public static final Register r11 = new Register(11, 11, 8, "r11", CPU, RegisterFlag.Byte);
+    public static final Register r12 = new Register(12, 12, 8, "r12", CPU, RegisterFlag.Byte);
+    public static final Register r13 = new Register(13, 13, 8, "r13", CPU, RegisterFlag.Byte);
+    public static final Register r14 = new Register(14, 14, 8, "r14", CPU, RegisterFlag.Byte);
+    public static final Register r15 = new Register(15, 15, 8, "r15", CPU, RegisterFlag.Byte);
+
+    public static final Register[] cpuRegisters = {
+        rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
+        r8, r9, r10, r11, r12, r13, r14, r15
+    };
+
+    // XMM registers
+    public static final Register xmm0 = new Register(16, 0, 8, "xmm0", FPU);
+    public static final Register xmm1 = new Register(17, 1, 8, "xmm1", FPU);
+    public static final Register xmm2 = new Register(18, 2, 8, "xmm2", FPU);
+    public static final Register xmm3 = new Register(19, 3, 8, "xmm3", FPU);
+    public static final Register xmm4 = new Register(20, 4, 8, "xmm4", FPU);
+    public static final Register xmm5 = new Register(21, 5, 8, "xmm5", FPU);
+    public static final Register xmm6 = new Register(22, 6, 8, "xmm6", FPU);
+    public static final Register xmm7 = new Register(23, 7, 8, "xmm7", FPU);
+
+    public static final Register xmm8 =  new Register(24,  8, 8, "xmm8",  FPU);
+    public static final Register xmm9 =  new Register(25,  9, 8, "xmm9",  FPU);
+    public static final Register xmm10 = new Register(26, 10, 8, "xmm10", FPU);
+    public static final Register xmm11 = new Register(27, 11, 8, "xmm11", FPU);
+    public static final Register xmm12 = new Register(28, 12, 8, "xmm12", FPU);
+    public static final Register xmm13 = new Register(29, 13, 8, "xmm13", FPU);
+    public static final Register xmm14 = new Register(30, 14, 8, "xmm14", FPU);
+    public static final Register xmm15 = new Register(31, 15, 8, "xmm15", FPU);
+
+    public static final Register[] xmmRegisters = {
+        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
+        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15
+    };
+
+    public static final Register[] cpuxmmRegisters = {
+        rax,  rcx,  rdx,   rbx,   rsp,   rbp,   rsi,   rdi,
+        r8,   r9,   r10,   r11,   r12,   r13,   r14,   r15,
+        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
+        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15
+    };
+
+    /**
+     * Register used to construct an instruction-relative address.
+     */
+    public static final Register rip = new Register(32, -1, 0, "rip");
+
+    public static final Register[] allRegisters = {
+        rax,  rcx,  rdx,   rbx,   rsp,   rbp,   rsi,   rdi,
+        r8,   r9,   r10,   r11,   r12,   r13,   r14,   r15,
+        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
+        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+        rip
+    };
+
+    public static final RegisterValue RSP = rsp.asValue(Long);
+
+    public AMD64() {
+        super("AMD64",
+              8,
+              ByteOrder.LittleEndian,
+              allRegisters,
+              LOAD_STORE | STORE_STORE,
+              1,
+              r15.encoding + 1,
+              8);
+    }
+
+    @Override
+    public boolean isX86() {
+        return true;
+    }
+
+    @Override
+    public boolean twoOperandMode() {
+        return true;
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64AsmOptions.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm.amd64;
+
+public class AMD64AsmOptions {
+    public static int     Atomics                       = 0;
+    public static boolean UseNormalNop                  = true;
+    public static boolean UseAddressNop                 = true;
+    public static boolean UseIncDec                     = false;
+    public static boolean UseXmmLoadAndClearUpper       = true;
+    public static boolean UseXmmRegToRegMoveAll         = false;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64Assembler.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,3034 @@
+/*
+ * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm.amd64;
+
+import static com.oracle.graal.api.code.ValueUtil.*;
+import static com.oracle.max.asm.NumUtil.*;
+import static com.oracle.max.asm.amd64.AMD64.*;
+import static com.oracle.max.asm.amd64.AMD64AsmOptions.*;
+import static com.oracle.max.criutils.MemoryBarriers.*;
+
+import com.oracle.graal.api.code.*;
+import com.oracle.graal.api.meta.*;
+import com.oracle.max.asm.*;
+
+/**
+ * This class implements an assembler that can encode most X86 instructions.
+ */
+public class AMD64Assembler extends AbstractAssembler {
+    /**
+     * The kind for pointers and raw registers.  Since we know we are 64 bit here, we can hardcode it.
+     */
+    private static final Kind Word = Kind.Long;
+
+    private static final int MinEncodingNeedsRex = 8;
+
+    /**
+     * The x86 condition codes used for conditional jumps/moves.
+     */
+    public enum ConditionFlag {
+        zero(0x4, "|zero|"),
+        notZero(0x5, "|nzero|"),
+        equal(0x4, "="),
+        notEqual(0x5, "!="),
+        less(0xc, "<"),
+        lessEqual(0xe, "<="),
+        greater(0xf, ">"),
+        greaterEqual(0xd, ">="),
+        below(0x2, "|<|"),
+        belowEqual(0x6, "|<=|"),
+        above(0x7, "|>|"),
+        aboveEqual(0x3, "|>=|"),
+        overflow(0x0, "|of|"),
+        noOverflow(0x1, "|nof|"),
+        carrySet(0x2, "|carry|"),
+        carryClear(0x3, "|ncarry|"),
+        negative(0x8, "|neg|"),
+        positive(0x9, "|pos|"),
+        parity(0xa, "|par|"),
+        noParity(0xb, "|npar|");
+
+        public final int value;
+        public final String operator;
+
+        private ConditionFlag(int value, String operator) {
+            this.value = value;
+            this.operator = operator;
+        }
+
+        public ConditionFlag negate() {
+            switch(this) {
+                case zero: return notZero;
+                case notZero: return zero;
+                case equal: return notEqual;
+                case notEqual: return equal;
+                case less: return greaterEqual;
+                case lessEqual: return greater;
+                case greater: return lessEqual;
+                case greaterEqual: return less;
+                case below: return aboveEqual;
+                case belowEqual: return above;
+                case above: return belowEqual;
+                case aboveEqual: return below;
+                case overflow: return noOverflow;
+                case noOverflow: return overflow;
+                case carrySet: return carryClear;
+                case carryClear: return carrySet;
+                case negative: return positive;
+                case positive: return negative;
+                case parity: return noParity;
+                case noParity: return parity;
+            }
+            throw new IllegalArgumentException();
+        }
+    }
+
+    /**
+     * Constants for X86 prefix bytes.
+     */
+    private static class Prefix {
+        private static final int REX = 0x40;
+        private static final int REXB = 0x41;
+        private static final int REXX = 0x42;
+        private static final int REXXB = 0x43;
+        private static final int REXR = 0x44;
+        private static final int REXRB = 0x45;
+        private static final int REXRX = 0x46;
+        private static final int REXRXB = 0x47;
+        private static final int REXW = 0x48;
+        private static final int REXWB = 0x49;
+        private static final int REXWX = 0x4A;
+        private static final int REXWXB = 0x4B;
+        private static final int REXWR = 0x4C;
+        private static final int REXWRB = 0x4D;
+        private static final int REXWRX = 0x4E;
+        private static final int REXWRXB = 0x4F;
+    }
+
+    /**
+     * The register to which {@link Register#Frame} and {@link Register#CallerFrame} are bound.
+     */
+    public final Register frameRegister;
+
+    /**
+     * Constructs an assembler for the AMD64 architecture.
+     *
+     * @param registerConfig the register configuration used to bind {@link Register#Frame} and
+     *            {@link Register#CallerFrame} to physical registers. This value can be null if this assembler
+     *            instance will not be used to assemble instructions using these logical registers.
+     */
+    public AMD64Assembler(TargetDescription target, RegisterConfig registerConfig) {
+        super(target);
+        this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister();
+    }
+
+    private static int encode(Register r) {
+        assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
+        return r.encoding & 0x7;
+    }
+
+    private void emitArithB(int op1, int op2, Register dst, int imm8) {
+        assert dst.isByte() : "must have byte register";
+        assert isUByte(op1) && isUByte(op2) : "wrong opcode";
+        assert isUByte(imm8) : "not a byte";
+        assert (op1 & 0x01) == 0 : "should be 8bit operation";
+        emitByte(op1);
+        emitByte(op2 | encode(dst));
+        emitByte(imm8);
+    }
+
+    private void emitArith(int op1, int op2, Register dst, int imm32) {
+        assert isUByte(op1) && isUByte(op2) : "wrong opcode";
+        assert (op1 & 0x01) == 1 : "should be 32bit operation";
+        assert (op1 & 0x02) == 0 : "sign-extension bit should not be set";
+        if (isByte(imm32)) {
+            emitByte(op1 | 0x02); // set sign bit
+            emitByte(op2 | encode(dst));
+            emitByte(imm32 & 0xFF);
+        } else {
+            emitByte(op1);
+            emitByte(op2 | encode(dst));
+            emitInt(imm32);
+        }
+    }
+
+    // immediate-to-memory forms
+    private void emitArithOperand(int op1, Register rm, Address adr, int imm32) {
+        assert (op1 & 0x01) == 1 : "should be 32bit operation";
+        assert (op1 & 0x02) == 0 : "sign-extension bit should not be set";
+        if (isByte(imm32)) {
+            emitByte(op1 | 0x02); // set sign bit
+            emitOperandHelper(rm, adr);
+            emitByte(imm32 & 0xFF);
+        } else {
+            emitByte(op1);
+            emitOperandHelper(rm, adr);
+            emitInt(imm32);
+        }
+    }
+
+    private void emitArith(int op1, int op2, Register dst, Register src) {
+        assert isUByte(op1) && isUByte(op2) : "wrong opcode";
+        emitByte(op1);
+        emitByte(op2 | encode(dst) << 3 | encode(src));
+    }
+
+    private void emitOperandHelper(Register reg, Address addr) {
+        Register base = isLegal(addr.getBase()) ? asRegister(addr.getBase()) : Register.None;
+        Register index = isLegal(addr.getIndex()) ? asRegister(addr.getIndex()) : Register.None;
+
+        Address.Scale scale = addr.getScale();
+        int disp = addr.getDisplacement();
+
+        if (base == Register.Frame) {
+            assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration";
+            base = frameRegister;
+//        } else if (base == Register.CallerFrame) {
+//            assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration";
+//            base = frameRegister;
+//            disp += targetMethod.frameSize() + 8;
+        }
+
+        // Encode the registers as needed in the fields they are used in
+
+        assert reg != Register.None;
+        int regenc = encode(reg) << 3;
+
+        if (base == AMD64.rip) {
+            // [00 000 101] disp32
+            emitByte(0x05 | regenc);
+            emitInt(disp);
+        } else if (addr == Address.Placeholder) {
+            // [00 000 101] disp32
+            emitByte(0x05 | regenc);
+            emitInt(0);
+
+        } else if (base.isValid()) {
+            int baseenc = base.isValid() ? encode(base) : 0;
+            if (index.isValid()) {
+                int indexenc = encode(index) << 3;
+                // [base + indexscale + disp]
+                if (disp == 0 && base != rbp && (base != r13)) {
+                    // [base + indexscale]
+                    // [00 reg 100][ss index base]
+                    assert index != rsp : "illegal addressing mode";
+                    emitByte(0x04 | regenc);
+                    emitByte(scale.log2 << 6 | indexenc | baseenc);
+                } else if (isByte(disp)) {
+                    // [base + indexscale + imm8]
+                    // [01 reg 100][ss index base] imm8
+                    assert index != rsp : "illegal addressing mode";
+                    emitByte(0x44 | regenc);
+                    emitByte(scale.log2 << 6 | indexenc | baseenc);
+                    emitByte(disp & 0xFF);
+                } else {
+                    // [base + indexscale + disp32]
+                    // [10 reg 100][ss index base] disp32
+                    assert index != rsp : "illegal addressing mode";
+                    emitByte(0x84 | regenc);
+                    emitByte(scale.log2 << 6 | indexenc | baseenc);
+                    emitInt(disp);
+                }
+            } else if (base == rsp || (base == r12)) {
+                // [rsp + disp]
+                if (disp == 0) {
+                    // [rsp]
+                    // [00 reg 100][00 100 100]
+                    emitByte(0x04 | regenc);
+                    emitByte(0x24);
+                } else if (isByte(disp)) {
+                    // [rsp + imm8]
+                    // [01 reg 100][00 100 100] disp8
+                    emitByte(0x44 | regenc);
+                    emitByte(0x24);
+                    emitByte(disp & 0xFF);
+                } else {
+                    // [rsp + imm32]
+                    // [10 reg 100][00 100 100] disp32
+                    emitByte(0x84 | regenc);
+                    emitByte(0x24);
+                    emitInt(disp);
+                }
+            } else {
+                // [base + disp]
+                assert base != rsp && (base != r12) : "illegal addressing mode";
+                if (disp == 0 && base != rbp && (base != r13)) {
+                    // [base]
+                    // [00 reg base]
+                    emitByte(0x00 | regenc | baseenc);
+                } else if (isByte(disp)) {
+                    // [base + disp8]
+                    // [01 reg base] disp8
+                    emitByte(0x40 | regenc | baseenc);
+                    emitByte(disp & 0xFF);
+                } else {
+                    // [base + disp32]
+                    // [10 reg base] disp32
+                    emitByte(0x80 | regenc | baseenc);
+                    emitInt(disp);
+                }
+            }
+        } else {
+            if (index.isValid()) {
+                int indexenc = encode(index) << 3;
+                // [indexscale + disp]
+                // [00 reg 100][ss index 101] disp32
+                assert index != rsp : "illegal addressing mode";
+                emitByte(0x04 | regenc);
+                emitByte(scale.log2 << 6 | indexenc | 0x05);
+                emitInt(disp);
+            } else {
+                // [disp] ABSOLUTE
+                // [00 reg 100][00 100 101] disp32
+                emitByte(0x04 | regenc);
+                emitByte(0x25);
+                emitInt(disp);
+            }
+        }
+    }
+
+    public final void addl(Address dst, int imm32) {
+        prefix(dst);
+        emitArithOperand(0x81, rax, dst, imm32);
+    }
+
+    public final void addl(Address dst, Register src) {
+        prefix(dst, src);
+        emitByte(0x01);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void addl(Register dst, int imm32) {
+        prefix(dst);
+        emitArith(0x81, 0xC0, dst, imm32);
+    }
+
+    public final void addl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x03);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void addl(Register dst, Register src) {
+        prefixAndEncode(dst.encoding, src.encoding);
+        emitArith(0x03, 0xC0, dst, src);
+    }
+
+    private void addrNop4() {
+        // 4 bytes: NOP DWORD PTR [EAX+0]
+        emitByte(0x0F);
+        emitByte(0x1F);
+        emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
+        emitByte(0); // 8-bits offset (1 byte)
+    }
+
+    private void addrNop5() {
+        // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
+        emitByte(0x0F);
+        emitByte(0x1F);
+        emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
+        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
+        emitByte(0); // 8-bits offset (1 byte)
+    }
+
+    private void addrNop7() {
+        // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
+        emitByte(0x0F);
+        emitByte(0x1F);
+        emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
+        emitInt(0); // 32-bits offset (4 bytes)
+    }
+
+    private void addrNop8() {
+        // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
+        emitByte(0x0F);
+        emitByte(0x1F);
+        emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
+        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
+        emitInt(0); // 32-bits offset (4 bytes)
+    }
+
+    public final void addsd(Register dst, Register src) {
+        assert dst.isFpu() && src.isFpu();
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x58);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void addsd(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0xF2);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x58);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void addss(Register dst, Register src) {
+        assert dst.isFpu() && src.isFpu();
+        emitByte(0xF3);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x58);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void addss(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0xF3);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x58);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void andl(Register dst, int imm32) {
+        prefix(dst);
+        emitArith(0x81, 0xE0, dst, imm32);
+    }
+
+    public final void andl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x23);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void andl(Register dst, Register src) {
+        prefixAndEncode(dst.encoding, src.encoding);
+        emitArith(0x23, 0xC0, dst, src);
+    }
+
+    public final void bsfq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xBC);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void bsfq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0xBC);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void bsrq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xBD);
+        emitByte(0xC0 | encode);
+    }
+
+
+    public final void bsrq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0xBD);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void bsrl(Register dst, Register src) {
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xBD);
+        emitByte(0xC0 | encode);
+    }
+
+
+    public final void bsrl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0xBD);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void bswapl(Register reg) { // bswap
+        int encode = prefixAndEncode(reg.encoding);
+        emitByte(0x0F);
+        emitByte(0xC8 | encode);
+    }
+
+    public final void btli(Address src, int imm8) {
+        prefixq(src);
+        emitByte(0x0F);
+        emitByte(0xBA);
+        emitOperandHelper(rsp, src);
+        emitByte(imm8);
+    }
+
+    public final void cdql() {
+        emitByte(0x99);
+    }
+
+    public final void cmovl(ConditionFlag cc, Register dst, Register src) {
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x40 | cc.value);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cmovl(ConditionFlag cc, Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x40 | cc.value);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void cmpb(Address dst, int imm8) {
+        prefix(dst);
+        emitByte(0x80);
+        emitOperandHelper(rdi, dst);
+        emitByte(imm8);
+    }
+
+    public final void cmpl(Address dst, int imm32) {
+        prefix(dst);
+        emitByte(0x81);
+        emitOperandHelper(rdi, dst);
+        emitInt(imm32);
+    }
+
+    public final void cmpl(Register dst, int imm32) {
+        prefix(dst);
+        emitArith(0x81, 0xF8, dst, imm32);
+    }
+
+    public final void cmpl(Register dst, Register src) {
+        prefixAndEncode(dst.encoding, src.encoding);
+        emitArith(0x3B, 0xC0, dst, src);
+    }
+
+    public final void cmpl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x3B);
+        emitOperandHelper(dst, src);
+    }
+
+    // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
+    // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
+    // The ZF is set if the compared values were equal, and cleared otherwise.
+    public final void cmpxchgl(Register reg, Address adr) { // cmpxchg
+        if ((Atomics & 2) != 0) {
+            // caveat: no instructionmark, so this isn't relocatable.
+            // Emit a synthetic, non-atomic, CAS equivalent.
+            // Beware. The synthetic form sets all ICCs, not just ZF.
+            // cmpxchg r,[m] is equivalent to X86.rax, = CAS (m, X86.rax, r)
+            cmpl(rax, adr);
+            movl(rax, adr);
+            if (reg != rax) {
+                Label l = new Label();
+                jcc(ConditionFlag.notEqual, l);
+                movl(adr, reg);
+                bind(l);
+            }
+        } else {
+
+            prefix(adr, reg);
+            emitByte(0x0F);
+            emitByte(0xB1);
+            emitOperandHelper(reg, adr);
+        }
+    }
+
+    public final void comisd(Register dst, Address src) {
+        assert dst.isFpu();
+        // NOTE: dbx seems to decode this as comiss even though the
+        // 0x66 is there. Strangly ucomisd comes out correct
+        emitByte(0x66);
+        comiss(dst, src);
+    }
+
+    public final void comiss(Register dst, Address src) {
+        assert dst.isFpu();
+
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x2F);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void cvtdq2pd(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+
+        emitByte(0xF3);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xE6);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvtdq2ps(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x5B);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvtsd2ss(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x5A);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvtsi2sdl(Register dst, Register src) {
+        assert dst.isFpu();
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x2A);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvtsi2ssl(Register dst, Register src) {
+        assert dst.isFpu();
+        emitByte(0xF3);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x2A);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvtss2sd(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0xF3);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x5A);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvttsd2sil(Register dst, Register src) {
+        assert src.isFpu();
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x2C);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvttss2sil(Register dst, Register src) {
+        assert src.isFpu();
+        emitByte(0xF3);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x2C);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void decl(Address dst) {
+        // Don't use it directly. Use Macrodecrement() instead.
+        prefix(dst);
+        emitByte(0xFF);
+        emitOperandHelper(rcx, dst);
+    }
+
+    public final void divsd(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0xF2);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x5E);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void divsd(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x5E);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void divss(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0xF3);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x5E);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void divss(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0xF3);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x5E);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void hlt() {
+        emitByte(0xF4);
+    }
+
+    public final void idivl(Register src) {
+        int encode = prefixAndEncode(src.encoding);
+        emitByte(0xF7);
+        emitByte(0xF8 | encode);
+    }
+
+    public final void divl(Register src) {
+        int encode = prefixAndEncode(src.encoding);
+        emitByte(0xF7);
+        emitByte(0xF0 | encode);
+    }
+
+    public final void imull(Register dst, Register src) {
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xAF);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void imull(Register dst, Register src, int value) {
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        if (isByte(value)) {
+            emitByte(0x6B);
+            emitByte(0xC0 | encode);
+            emitByte(value & 0xFF);
+        } else {
+            emitByte(0x69);
+            emitByte(0xC0 | encode);
+            emitInt(value);
+        }
+    }
+
+    public final void incl(Address dst) {
+        // Don't use it directly. Use Macroincrement() instead.
+        prefix(dst);
+        emitByte(0xFF);
+        emitOperandHelper(rax, dst);
+    }
+
+    public final void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
+        int shortSize = 2;
+        int longSize = 6;
+        long disp = jumpTarget - codeBuffer.position();
+        if (!forceDisp32 && isByte(disp - shortSize)) {
+            // 0111 tttn #8-bit disp
+            emitByte(0x70 | cc.value);
+            emitByte((int) ((disp - shortSize) & 0xFF));
+        } else {
+            // 0000 1111 1000 tttn #32-bit disp
+            assert isInt(disp - longSize) : "must be 32bit offset (call4)";
+            emitByte(0x0F);
+            emitByte(0x80 | cc.value);
+            emitInt((int) (disp - longSize));
+        }
+    }
+
+    public final void jcc(ConditionFlag cc, Label l) {
+        assert (0 <= cc.value) && (cc.value < 16) : "illegal cc";
+        if (l.isBound()) {
+            jcc(cc, l.position(), false);
+        } else {
+            // Note: could eliminate cond. jumps to this jump if condition
+            // is the same however, seems to be rather unlikely case.
+            // Note: use jccb() if label to be bound is very close to get
+            // an 8-bit displacement
+            l.addPatchAt(codeBuffer.position());
+            emitByte(0x0F);
+            emitByte(0x80 | cc.value);
+            emitInt(0);
+        }
+
+    }
+
+    public final void jccb(ConditionFlag cc, Label l) {
+        if (l.isBound()) {
+            int shortSize = 2;
+            int entry = l.position();
+            assert isByte(entry - (codeBuffer.position() + shortSize)) : "Dispacement too large for a short jmp";
+            long disp = entry - codeBuffer.position();
+            // 0111 tttn #8-bit disp
+            emitByte(0x70 | cc.value);
+            emitByte((int) ((disp - shortSize) & 0xFF));
+        } else {
+
+            l.addPatchAt(codeBuffer.position());
+            emitByte(0x70 | cc.value);
+            emitByte(0);
+        }
+    }
+
+    public final void jmp(Address adr) {
+        prefix(adr);
+        emitByte(0xFF);
+        emitOperandHelper(rsp, adr);
+    }
+
+    public final void jmp(int jumpTarget, boolean forceDisp32) {
+        int shortSize = 2;
+        int longSize = 5;
+        long disp = jumpTarget - codeBuffer.position();
+        if (!forceDisp32 && isByte(disp - shortSize)) {
+            emitByte(0xEB);
+            emitByte((int) ((disp - shortSize) & 0xFF));
+        } else {
+            emitByte(0xE9);
+            emitInt((int) (disp - longSize));
+        }
+    }
+
+    @Override
+    public final void jmp(Label l) {
+        if (l.isBound()) {
+            jmp(l.position(), false);
+        } else {
+            // By default, forward jumps are always 32-bit displacements, since
+            // we can't yet know where the label will be bound. If you're sure that
+            // the forward jump will not run beyond 256 bytes, use jmpb to
+            // force an 8-bit displacement.
+
+            l.addPatchAt(codeBuffer.position());
+            emitByte(0xE9);
+            emitInt(0);
+        }
+    }
+
+    public final void jmp(Register entry) {
+        int encode = prefixAndEncode(entry.encoding);
+        emitByte(0xFF);
+        emitByte(0xE0 | encode);
+    }
+
+    public final void jmpb(Label l) {
+        if (l.isBound()) {
+            int shortSize = 2;
+            int entry = l.position();
+            assert isByte((entry - codeBuffer.position()) + shortSize) : "Dispacement too large for a short jmp";
+            long offs = entry - codeBuffer.position();
+            emitByte(0xEB);
+            emitByte((int) ((offs - shortSize) & 0xFF));
+        } else {
+
+            l.addPatchAt(codeBuffer.position());
+            emitByte(0xEB);
+            emitByte(0);
+        }
+    }
+
+    public final void leaq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x8D);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void enter(int imm16, int imm8) {
+        emitByte(0xC8);
+        emitShort(imm16);
+        emitByte(imm8);
+    }
+
+    public final void leave() {
+        emitByte(0xC9);
+    }
+
+    public final void lock() {
+        if ((Atomics & 1) != 0) {
+            // Emit either nothing, a NOP, or a NOP: prefix
+            emitByte(0x90);
+        } else {
+            emitByte(0xF0);
+        }
+    }
+
+    // Emit mfence instruction
+    public final void mfence() {
+        emitByte(0x0F);
+        emitByte(0xAE);
+        emitByte(0xF0);
+    }
+
+    public final void mov(Register dst, Register src) {
+        movq(dst, src);
+    }
+
+    public final void movapd(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        int dstenc = dst.encoding;
+        int srcenc = src.encoding;
+        emitByte(0x66);
+        if (dstenc < 8) {
+            if (srcenc >= 8) {
+                emitByte(Prefix.REXB);
+                srcenc -= 8;
+            }
+        } else {
+            if (srcenc < 8) {
+                emitByte(Prefix.REXR);
+            } else {
+                emitByte(Prefix.REXRB);
+                srcenc -= 8;
+            }
+            dstenc -= 8;
+        }
+        emitByte(0x0F);
+        emitByte(0x28);
+        emitByte(0xC0 | dstenc << 3 | srcenc);
+    }
+
+    public final void movaps(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        int dstenc = dst.encoding;
+        int srcenc = src.encoding;
+        if (dstenc < 8) {
+            if (srcenc >= 8) {
+                emitByte(Prefix.REXB);
+                srcenc -= 8;
+            }
+        } else {
+            if (srcenc < 8) {
+                emitByte(Prefix.REXR);
+            } else {
+                emitByte(Prefix.REXRB);
+                srcenc -= 8;
+            }
+            dstenc -= 8;
+        }
+        emitByte(0x0F);
+        emitByte(0x28);
+        emitByte(0xC0 | dstenc << 3 | srcenc);
+    }
+
+    public final void movb(Register dst, Address src) {
+        prefix(src, dst); // , true)
+        emitByte(0x8A);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movb(Address dst, int imm8) {
+        prefix(dst);
+        emitByte(0xC6);
+        emitOperandHelper(rax, dst);
+        emitByte(imm8);
+    }
+
+    public final void movb(Address dst, Register src) {
+        assert src.isByte() : "must have byte register";
+        prefix(dst, src); // , true)
+        emitByte(0x88);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void movdl(Register dst, Register src) {
+        if (dst.isFpu()) {
+            assert !src.isFpu() : "does this hold?";
+            emitByte(0x66);
+            int encode = prefixAndEncode(dst.encoding, src.encoding);
+            emitByte(0x0F);
+            emitByte(0x6E);
+            emitByte(0xC0 | encode);
+        } else if (src.isFpu()) {
+            assert !dst.isFpu();
+            emitByte(0x66);
+            // swap src/dst to get correct prefix
+            int encode = prefixAndEncode(src.encoding, dst.encoding);
+            emitByte(0x0F);
+            emitByte(0x7E);
+            emitByte(0xC0 | encode);
+        }
+    }
+
+    public final void movdqa(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0x66);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x6F);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movdqa(Register dst, Register src) {
+        assert dst.isFpu();
+        emitByte(0x66);
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x6F);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movdqa(Address dst, Register src) {
+        assert src.isFpu();
+        emitByte(0x66);
+        prefix(dst, src);
+        emitByte(0x0F);
+        emitByte(0x7F);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void movdqu(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0xF3);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x6F);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movdqu(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+
+        emitByte(0xF3);
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x6F);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movdqu(Address dst, Register src) {
+        assert src.isFpu();
+
+        emitByte(0xF3);
+        prefix(dst, src);
+        emitByte(0x0F);
+        emitByte(0x7F);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void movl(Register dst, int imm32) {
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0xB8 | encode);
+        emitInt(imm32);
+    }
+
+    public final void movl(Register dst, Register src) {
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x8B);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x8B);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movl(Address dst, int imm32) {
+        prefix(dst);
+        emitByte(0xC7);
+        emitOperandHelper(rax, dst);
+        emitInt(imm32);
+    }
+
+    public final void movl(Address dst, Register src) {
+        prefix(dst, src);
+        emitByte(0x89);
+        emitOperandHelper(src, dst);
+    }
+
+    /**
+     * New CPUs require use of movsd and movss to avoid partial register stall
+     * when loading from memory. But for old Opteron use movlpd instead of movsd.
+     * The selection is done in {@link AMD64MacroAssembler#movdbl(Register, Address)}
+     * and {@link AMD64MacroAssembler#movflt(Register, Register)}.
+     */
+    public final void movlpd(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0x66);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x12);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movlpd(Address dst, Register src) {
+        assert src.isFpu();
+        emitByte(0x66);
+        prefix(dst, src);
+        emitByte(0x0F);
+        emitByte(0x13);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void movq(Register dst, Address src) {
+        if (dst.isFpu()) {
+            emitByte(0xF3);
+            prefixq(src, dst);
+            emitByte(0x0F);
+            emitByte(0x7E);
+            emitOperandHelper(dst, src);
+        } else {
+            prefixq(src, dst);
+            emitByte(0x8B);
+            emitOperandHelper(dst, src);
+        }
+    }
+
+    public final void movq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x8B);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movq(Address dst, Register src) {
+        if (src.isFpu()) {
+            emitByte(0x66);
+            prefixq(dst, src);
+            emitByte(0x0F);
+            emitByte(0xD6);
+            emitOperandHelper(src, dst);
+        } else {
+            prefixq(dst, src);
+            emitByte(0x89);
+            emitOperandHelper(src, dst);
+        }
+    }
+
+    public final void movsxb(Register dst, Address src) { // movsxb
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0xBE);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movsxb(Register dst, Register src) { // movsxb
+        int encode = prefixAndEncode(dst.encoding, src.encoding, true);
+        emitByte(0x0F);
+        emitByte(0xBE);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movsd(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x10);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movsd(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0xF2);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x10);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movsd(Address dst, Register src) {
+        assert src.isFpu();
+        emitByte(0xF2);
+        prefix(dst, src);
+        emitByte(0x0F);
+        emitByte(0x11);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void movss(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0xF3);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x10);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movss(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0xF3);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x10);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movss(Address dst, Register src) {
+        assert src.isFpu();
+        emitByte(0xF3);
+        prefix(dst, src);
+        emitByte(0x0F);
+        emitByte(0x11);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void movswl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0xBF);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movsxw(Register dst, Register src) { // movsxw
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xBF);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movsxw(Register dst, Address src) { // movsxw
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0xBF);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movzxd(Register dst, Register src) { // movzxd
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x63);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movzxd(Register dst, Address src) { // movzxd
+        prefix(src, dst);
+        emitByte(0x63);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movw(Address dst, int imm16) {
+        emitByte(0x66); // switch to 16-bit mode
+        prefix(dst);
+        emitByte(0xC7);
+        emitOperandHelper(rax, dst);
+        emitShort(imm16);
+    }
+
+    public final void movw(Register dst, Address src) {
+        emitByte(0x66);
+        prefix(src, dst);
+        emitByte(0x8B);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movw(Address dst, Register src) {
+        emitByte(0x66);
+        prefix(dst, src);
+        emitByte(0x89);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void movzxb(Register dst, Address src) { // movzxb
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0xB6);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movzxb(Register dst, Register src) { // movzxb
+        int encode = prefixAndEncode(dst.encoding, src.encoding, true);
+        emitByte(0x0F);
+        emitByte(0xB6);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movzxl(Register dst, Address src) { // movzxw
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0xB7);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movzxl(Register dst, Register src) { // movzxw
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xB7);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void mull(Address src) {
+        prefix(src);
+        emitByte(0xF7);
+        emitOperandHelper(rsp, src);
+    }
+
+    public final void mulsd(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0xF2);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x59);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void mulsd(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x59);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void mulss(Register dst, Address src) {
+        assert dst.isFpu();
+
+        emitByte(0xF3);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x59);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void mulss(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0xF3);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x59);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void negl(Register dst) {
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0xF7);
+        emitByte(0xD8 | encode);
+    }
+
+    public final void ensureUniquePC() {
+        nop();
+    }
+
+    public final void nop() {
+        nop(1);
+    }
+
+    public void nop(int count) {
+        int i = count;
+        if (UseNormalNop) {
+            assert i > 0 : " ";
+            // The fancy nops aren't currently recognized by debuggers making it a
+            // pain to disassemble code while debugging. If assert are on clearly
+            // speed is not an issue so simply use the single byte traditional nop
+            // to do alignment.
+
+            for (; i > 0; i--) {
+                emitByte(0x90);
+            }
+            return;
+        }
+
+        if (UseAddressNop) {
+            //
+            // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
+            // 1: 0x90
+            // 2: 0x66 0x90
+            // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
+            // 4: 0x0F 0x1F 0x40 0x00
+            // 5: 0x0F 0x1F 0x44 0x00 0x00
+            // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
+            // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
+            // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+            // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+            // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+            // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+
+            // The rest coding is AMD specific - use consecutive Address nops
+
+            // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
+            // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
+            // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
+            // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
+            // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+            // Size prefixes (0x66) are added for larger sizes
+
+            while (i >= 22) {
+                i -= 11;
+                emitByte(0x66); // size prefix
+                emitByte(0x66); // size prefix
+                emitByte(0x66); // size prefix
+                addrNop8();
+            }
+            // Generate first nop for size between 21-12
+            switch (i) {
+                case 21:
+                    i -= 1;
+                    emitByte(0x66); // size prefix
+                    // fall through
+                case 20:
+                    // fall through
+                case 19:
+                    i -= 1;
+                    emitByte(0x66); // size prefix
+                    // fall through
+                case 18:
+                    // fall through
+                case 17:
+                    i -= 1;
+                    emitByte(0x66); // size prefix
+                    // fall through
+                case 16:
+                    // fall through
+                case 15:
+                    i -= 8;
+                    addrNop8();
+                    break;
+                case 14:
+                case 13:
+                    i -= 7;
+                    addrNop7();
+                    break;
+                case 12:
+                    i -= 6;
+                    emitByte(0x66); // size prefix
+                    addrNop5();
+                    break;
+                default:
+                    assert i < 12;
+            }
+
+            // Generate second nop for size between 11-1
+            switch (i) {
+                case 11:
+                    emitByte(0x66); // size prefix
+                    emitByte(0x66); // size prefix
+                    emitByte(0x66); // size prefix
+                    addrNop8();
+                    break;
+                case 10:
+                    emitByte(0x66); // size prefix
+                    emitByte(0x66); // size prefix
+                    addrNop8();
+                    break;
+                case 9:
+                    emitByte(0x66); // size prefix
+                    addrNop8();
+                    break;
+                case 8:
+                    addrNop8();
+                    break;
+                case 7:
+                    addrNop7();
+                    break;
+                case 6:
+                    emitByte(0x66); // size prefix
+                    addrNop5();
+                    break;
+                case 5:
+                    addrNop5();
+                    break;
+                case 4:
+                    addrNop4();
+                    break;
+                case 3:
+                    // Don't use "0x0F 0x1F 0x00" - need patching safe padding
+                    emitByte(0x66); // size prefix
+                    emitByte(0x66); // size prefix
+                    emitByte(0x90); // nop
+                    break;
+                case 2:
+                    emitByte(0x66); // size prefix
+                    emitByte(0x90); // nop
+                    break;
+                case 1:
+                    emitByte(0x90); // nop
+                    break;
+                default:
+                    assert i == 0;
+            }
+            return;
+        }
+
+        // Using nops with size prefixes "0x66 0x90".
+        // From AMD Optimization Guide:
+        // 1: 0x90
+        // 2: 0x66 0x90
+        // 3: 0x66 0x66 0x90
+        // 4: 0x66 0x66 0x66 0x90
+        // 5: 0x66 0x66 0x90 0x66 0x90
+        // 6: 0x66 0x66 0x90 0x66 0x66 0x90
+        // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
+        // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
+        // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
+        // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
+        //
+        while (i > 12) {
+            i -= 4;
+            emitByte(0x66); // size prefix
+            emitByte(0x66);
+            emitByte(0x66);
+            emitByte(0x90); // nop
+        }
+        // 1 - 12 nops
+        if (i > 8) {
+            if (i > 9) {
+                i -= 1;
+                emitByte(0x66);
+            }
+            i -= 3;
+            emitByte(0x66);
+            emitByte(0x66);
+            emitByte(0x90);
+        }
+        // 1 - 8 nops
+        if (i > 4) {
+            if (i > 6) {
+                i -= 1;
+                emitByte(0x66);
+            }
+            i -= 3;
+            emitByte(0x66);
+            emitByte(0x66);
+            emitByte(0x90);
+        }
+        switch (i) {
+            case 4:
+                emitByte(0x66);
+                emitByte(0x66);
+                emitByte(0x66);
+                emitByte(0x90);
+                break;
+            case 3:
+                emitByte(0x66);
+                emitByte(0x66);
+                emitByte(0x90);
+                break;
+            case 2:
+                emitByte(0x66);
+                emitByte(0x90);
+                break;
+            case 1:
+                emitByte(0x90);
+                break;
+            default:
+                assert i == 0;
+        }
+    }
+
+    public final void notl(Register dst) {
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0xF7);
+        emitByte(0xD0 | encode);
+    }
+
+    public final void orl(Address dst, int imm32) {
+        prefix(dst);
+        emitByte(0x81);
+        emitOperandHelper(rcx, dst);
+        emitInt(imm32);
+    }
+
+    public final void orl(Register dst, int imm32) {
+        prefix(dst);
+        emitArith(0x81, 0xC8, dst, imm32);
+    }
+
+    public final void orl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x0B);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void orl(Register dst, Register src) {
+        prefixAndEncode(dst.encoding, src.encoding);
+        emitArith(0x0B, 0xC0, dst, src);
+    }
+
+    // generic
+    public final void pop(Register dst) {
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0x58 | encode);
+    }
+
+    public final void prefetchPrefix(Address src) {
+        prefix(src);
+        emitByte(0x0F);
+    }
+
+    public final void prefetchnta(Address src) {
+        prefetchPrefix(src);
+        emitByte(0x18);
+        emitOperandHelper(rax, src); // 0, src
+    }
+
+    public final void prefetchr(Address src) {
+        prefetchPrefix(src);
+        emitByte(0x0D);
+        emitOperandHelper(rax, src); // 0, src
+    }
+
+    public final void prefetcht0(Address src) {
+        prefetchPrefix(src);
+        emitByte(0x18);
+        emitOperandHelper(rcx, src); // 1, src
+
+    }
+
+    public final void prefetcht1(Address src) {
+        prefetchPrefix(src);
+        emitByte(0x18);
+        emitOperandHelper(rdx, src); // 2, src
+    }
+
+    public final void prefetcht2(Address src) {
+        prefetchPrefix(src);
+        emitByte(0x18);
+        emitOperandHelper(rbx, src); // 3, src
+    }
+
+    public final void prefetchw(Address src) {
+        prefetchPrefix(src);
+        emitByte(0x0D);
+        emitOperandHelper(rcx, src); // 1, src
+    }
+
+    public final void pshufd(Register dst, Register src, int mode) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        assert isUByte(mode) : "invalid value";
+
+        emitByte(0x66);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x70);
+        emitByte(0xC0 | encode);
+        emitByte(mode & 0xFF);
+    }
+
+    public final void pshufd(Register dst, Address src, int mode) {
+        assert dst.isFpu();
+        assert isUByte(mode) : "invalid value";
+
+        emitByte(0x66);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x70);
+        emitOperandHelper(dst, src);
+        emitByte(mode & 0xFF);
+
+    }
+
+    public final void pshuflw(Register dst, Register src, int mode) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        assert isUByte(mode) : "invalid value";
+
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x70);
+        emitByte(0xC0 | encode);
+        emitByte(mode & 0xFF);
+    }
+
+    public final void pshuflw(Register dst, Address src, int mode) {
+        assert dst.isFpu();
+        assert isUByte(mode) : "invalid value";
+
+        emitByte(0xF2);
+        prefix(src, dst); // QQ new
+        emitByte(0x0F);
+        emitByte(0x70);
+        emitOperandHelper(dst, src);
+        emitByte(mode & 0xFF);
+    }
+
+    public final void psrlq(Register dst, int shift) {
+        assert dst.isFpu();
+        // HMM Table D-1 says sse2 or mmx
+
+        int encode = prefixqAndEncode(xmm2.encoding, dst.encoding);
+        emitByte(0x66);
+        emitByte(0x0F);
+        emitByte(0x73);
+        emitByte(0xC0 | encode);
+        emitByte(shift);
+    }
+
+    public final void punpcklbw(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0x66);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x60);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void push(int imm32) {
+        // in 64bits we push 64bits onto the stack but only
+        // take a 32bit immediate
+        emitByte(0x68);
+        emitInt(imm32);
+    }
+
+    public final void push(Register src) {
+        int encode = prefixAndEncode(src.encoding);
+        emitByte(0x50 | encode);
+    }
+
+    public final void pushf() {
+        emitByte(0x9C);
+    }
+
+    public final void pxor(Register dst, Address src) {
+        assert dst.isFpu();
+
+        emitByte(0x66);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0xEF);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void pxor(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+
+        emitByte(0x66);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xEF);
+        emitByte(0xC0 | encode);
+
+    }
+
+    public final void rcll(Register dst, int imm8) {
+        assert isShiftCount(imm8) : "illegal shift count";
+        int encode = prefixAndEncode(dst.encoding);
+        if (imm8 == 1) {
+            emitByte(0xD1);
+            emitByte(0xD0 | encode);
+        } else {
+            emitByte(0xC1);
+            emitByte(0xD0 | encode);
+            emitByte(imm8);
+        }
+    }
+
+    public final void pause() {
+        emitByte(0xF3);
+        emitByte(0x90);
+    }
+
+    // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx heap words.
+    public final void repeatMoveWords() {
+        emitByte(0xF3);
+        emitByte(Prefix.REXW);
+        emitByte(0xA5);
+    }
+
+    // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx bytes.
+    public final void repeatMoveBytes() {
+        emitByte(0xF3);
+        emitByte(Prefix.REXW);
+        emitByte(0xA4);
+    }
+
+    // sets X86.rcx pointer sized words with X86.rax, value at [edi]
+    // generic
+    public final void repSet() { // repSet
+        emitByte(0xF3);
+        // STOSQ
+        emitByte(Prefix.REXW);
+        emitByte(0xAB);
+    }
+
+    // scans X86.rcx pointer sized words at [edi] for occurance of X86.rax,
+    // generic
+    public final void repneScan() { // repneScan
+        emitByte(0xF2);
+        // SCASQ
+        emitByte(Prefix.REXW);
+        emitByte(0xAF);
+    }
+
+    // scans X86.rcx 4 byte words at [edi] for occurance of X86.rax,
+    // generic
+    public final void repneScanl() { // repneScan
+        emitByte(0xF2);
+        // SCASL
+        emitByte(0xAF);
+    }
+
+    public final void ret(int imm16) {
+        if (imm16 == 0) {
+            emitByte(0xC3);
+        } else {
+            emitByte(0xC2);
+            emitShort(imm16);
+        }
+    }
+
+    public final void sarl(Register dst, int imm8) {
+        int encode = prefixAndEncode(dst.encoding);
+        assert isShiftCount(imm8) : "illegal shift count";
+        if (imm8 == 1) {
+            emitByte(0xD1);
+            emitByte(0xF8 | encode);
+        } else {
+            emitByte(0xC1);
+            emitByte(0xF8 | encode);
+            emitByte(imm8);
+        }
+    }
+
+    public final void sarl(Register dst) {
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0xD3);
+        emitByte(0xF8 | encode);
+    }
+
+    public final void sbbl(Address dst, int imm32) {
+        prefix(dst);
+        emitArithOperand(0x81, rbx, dst, imm32);
+    }
+
+    public final void sbbl(Register dst, int imm32) {
+        prefix(dst);
+        emitArith(0x81, 0xD8, dst, imm32);
+    }
+
+    public final void sbbl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x1B);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void sbbl(Register dst, Register src) {
+        prefixAndEncode(dst.encoding, src.encoding);
+        emitArith(0x1B, 0xC0, dst, src);
+    }
+
+    public final void setb(ConditionFlag cc, Register dst) {
+        assert 0 <= cc.value && cc.value < 16 : "illegal cc";
+        int encode = prefixAndEncode(dst.encoding, true);
+        emitByte(0x0F);
+        emitByte(0x90 | cc.value);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void shll(Register dst, int imm8) {
+        assert isShiftCount(imm8) : "illegal shift count";
+        int encode = prefixAndEncode(dst.encoding);
+        if (imm8 == 1) {
+            emitByte(0xD1);
+            emitByte(0xE0 | encode);
+        } else {
+            emitByte(0xC1);
+            emitByte(0xE0 | encode);
+            emitByte(imm8);
+        }
+    }
+
+    public final void shll(Register dst) {
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0xD3);
+        emitByte(0xE0 | encode);
+    }
+
+    public final void shrl(Register dst, int imm8) {
+        assert isShiftCount(imm8) : "illegal shift count";
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0xC1);
+        emitByte(0xE8 | encode);
+        emitByte(imm8);
+    }
+
+    public final void shrl(Register dst) {
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0xD3);
+        emitByte(0xE8 | encode);
+    }
+
+    // copies a single word from [esi] to [edi]
+    public final void smovl() {
+        emitByte(0xA5);
+    }
+
+    public final void sqrtsd(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        // HMM Table D-1 says sse2
+        // assert is64 || target.supportsSSE();
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x51);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void subl(Address dst, int imm32) {
+        prefix(dst);
+        if (isByte(imm32)) {
+            emitByte(0x83);
+            emitOperandHelper(rbp, dst);
+            emitByte(imm32 & 0xFF);
+        } else {
+            emitByte(0x81);
+            emitOperandHelper(rbp, dst);
+            emitInt(imm32);
+        }
+    }
+
+    public final void subl(Register dst, int imm32) {
+        prefix(dst);
+        emitArith(0x81, 0xE8, dst, imm32);
+    }
+
+    public final void subl(Address dst, Register src) {
+        prefix(dst, src);
+        emitByte(0x29);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void subl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x2B);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void subl(Register dst, Register src) {
+        prefixAndEncode(dst.encoding, src.encoding);
+        emitArith(0x2B, 0xC0, dst, src);
+    }
+
+    public final void subsd(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0xF2);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x5C);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void subsd(Register dst, Address src) {
+        assert dst.isFpu();
+
+        emitByte(0xF2);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x5C);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void subss(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0xF3);
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x5C);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void subss(Register dst, Address src) {
+        assert dst.isFpu();
+
+        emitByte(0xF3);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x5C);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void testb(Register dst, int imm8) {
+        prefixAndEncode(dst.encoding, true);
+        emitArithB(0xF6, 0xC0, dst, imm8);
+    }
+
+    public final void testl(Register dst, int imm32) {
+        // not using emitArith because test
+        // doesn't support sign-extension of
+        // 8bit operands
+        int encode = dst.encoding;
+        if (encode == 0) {
+            emitByte(0xA9);
+        } else {
+            encode = prefixAndEncode(encode);
+            emitByte(0xF7);
+            emitByte(0xC0 | encode);
+        }
+        emitInt(imm32);
+    }
+
+    public final void testl(Register dst, Register src) {
+        prefixAndEncode(dst.encoding, src.encoding);
+        emitArith(0x85, 0xC0, dst, src);
+    }
+
+    public final void testl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x85);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void ucomisd(Register dst, Address src) {
+        assert dst.isFpu();
+        emitByte(0x66);
+        ucomiss(dst, src);
+    }
+
+    public final void ucomisd(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        emitByte(0x66);
+        ucomiss(dst, src);
+    }
+
+    public final void ucomiss(Register dst, Address src) {
+        assert dst.isFpu();
+
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x2E);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void ucomiss(Register dst, Register src) {
+        assert dst.isFpu();
+        assert src.isFpu();
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x2E);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void xaddl(Address dst, Register src) {
+        assert src.isFpu();
+
+        prefix(dst, src);
+        emitByte(0x0F);
+        emitByte(0xC1);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void xchgl(Register dst, Address src) { // xchg
+        prefix(src, dst);
+        emitByte(0x87);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void xchgl(Register dst, Register src) {
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x87);
+        emitByte(0xc0 | encode);
+    }
+
+    public final void xorl(Register dst, int imm32) {
+        prefix(dst);
+        emitArith(0x81, 0xF0, dst, imm32);
+    }
+
+    public final void xorl(Register dst, Address src) {
+        prefix(src, dst);
+        emitByte(0x33);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void xorl(Register dst, Register src) {
+        prefixAndEncode(dst.encoding, src.encoding);
+        emitArith(0x33, 0xC0, dst, src);
+    }
+
+    public final void andpd(Register dst, Register src) {
+        emitByte(0x66);
+        andps(dst, src);
+    }
+
+    public final void andpd(Register dst, Address src) {
+        emitByte(0x66);
+        andps(dst, src);
+    }
+
+    public final void andps(Register dst, Register src) {
+        assert dst.isFpu() && src.isFpu();
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x54);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void andps(Register dst, Address src) {
+        assert dst.isFpu();
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x54);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void orpd(Register dst, Register src) {
+        emitByte(0x66);
+        orps(dst, src);
+    }
+
+    public final void orpd(Register dst, Address src) {
+        emitByte(0x66);
+        orps(dst, src);
+    }
+
+    public final void orps(Register dst, Register src) {
+        assert dst.isFpu() && src.isFpu();
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x56);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void orps(Register dst, Address src) {
+        assert dst.isFpu();
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x56);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void xorpd(Register dst, Register src) {
+        emitByte(0x66);
+        xorps(dst, src);
+    }
+
+    public final void xorpd(Register dst, Address src) {
+        emitByte(0x66);
+        xorps(dst, src);
+    }
+
+    public final void xorps(Register dst, Register src) {
+        assert dst.isFpu() && src.isFpu();
+        int encode = prefixAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x57);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void xorps(Register dst, Address src) {
+        assert dst.isFpu();
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x57);
+        emitOperandHelper(dst, src);
+    }
+
+    // 32bit only pieces of the assembler
+
+    public final void decl(Register dst) {
+        // Don't use it directly. Use Macrodecrementl() instead.
+        // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0xFF);
+        emitByte(0xC8 | encode);
+    }
+
+    public final void incl(Register dst) {
+        // Don't use it directly. Use Macroincrementl() instead.
+        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
+        int encode = prefixAndEncode(dst.encoding);
+        emitByte(0xFF);
+        emitByte(0xC0 | encode);
+    }
+
+    int prefixAndEncode(int regEnc) {
+        return prefixAndEncode(regEnc, false);
+    }
+
+    int prefixAndEncode(int regEnc, boolean byteinst) {
+        if (regEnc >= 8) {
+            emitByte(Prefix.REXB);
+            return regEnc - 8;
+        } else if (byteinst && regEnc >= 4) {
+            emitByte(Prefix.REX);
+        }
+        return regEnc;
+    }
+
+    int prefixqAndEncode(int regEnc) {
+        if (regEnc < 8) {
+            emitByte(Prefix.REXW);
+            return regEnc;
+        } else {
+            emitByte(Prefix.REXWB);
+            return regEnc - 8;
+        }
+    }
+
+    int prefixAndEncode(int dstEnc, int srcEnc) {
+        return prefixAndEncode(dstEnc, srcEnc, false);
+    }
+
+    int prefixAndEncode(int dstEncoding, int srcEncoding, boolean byteinst) {
+        int srcEnc = srcEncoding;
+        int dstEnc = dstEncoding;
+        if (dstEnc < 8) {
+            if (srcEnc >= 8) {
+                emitByte(Prefix.REXB);
+                srcEnc -= 8;
+            } else if (byteinst && srcEnc >= 4) {
+                emitByte(Prefix.REX);
+            }
+        } else {
+            if (srcEnc < 8) {
+                emitByte(Prefix.REXR);
+            } else {
+                emitByte(Prefix.REXRB);
+                srcEnc -= 8;
+            }
+            dstEnc -= 8;
+        }
+        return dstEnc << 3 | srcEnc;
+    }
+
+    /**
+     * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand prefix. If the given
+     * operands exceed 3 bits, the 4th bit is encoded in the prefix.
+     *
+     * @param regEncoding the encoding of the register part of the ModRM-Byte
+     * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
+     * @return the lower 6 bits of the ModRM-Byte that should be emitted
+     */
+    private int prefixqAndEncode(int regEncoding, int rmEncoding) {
+        int rmEnc = rmEncoding;
+        int regEnc = regEncoding;
+        if (regEnc < 8) {
+            if (rmEnc < 8) {
+                emitByte(Prefix.REXW);
+            } else {
+                emitByte(Prefix.REXWB);
+                rmEnc -= 8;
+            }
+        } else {
+            if (rmEnc < 8) {
+                emitByte(Prefix.REXWR);
+            } else {
+                emitByte(Prefix.REXWRB);
+                rmEnc -= 8;
+            }
+            regEnc -= 8;
+        }
+        return regEnc << 3 | rmEnc;
+    }
+
+    private void prefix(Register reg) {
+        if (reg.encoding >= 8) {
+            emitByte(Prefix.REXB);
+        }
+    }
+
+    private static boolean needsRex(Value value) {
+        return isRegister(value) && asRegister(value).encoding >= MinEncodingNeedsRex;
+    }
+
+
+    private void prefix(Address adr) {
+        if (needsRex(adr.getBase())) {
+            if (needsRex(adr.getIndex())) {
+                emitByte(Prefix.REXXB);
+            } else {
+                emitByte(Prefix.REXB);
+            }
+        } else {
+            if (needsRex(adr.getIndex())) {
+                emitByte(Prefix.REXX);
+            }
+        }
+    }
+
+    private void prefixq(Address adr) {
+        if (needsRex(adr.getBase())) {
+            if (needsRex(adr.getIndex())) {
+                emitByte(Prefix.REXWXB);
+            } else {
+                emitByte(Prefix.REXWB);
+            }
+        } else {
+            if (needsRex(adr.getIndex())) {
+                emitByte(Prefix.REXWX);
+            } else {
+                emitByte(Prefix.REXW);
+            }
+        }
+    }
+
+    private void prefix(Address adr, Register reg) {
+        if (reg.encoding < 8) {
+            if (needsRex(adr.getBase())) {
+                if (needsRex(adr.getIndex())) {
+                    emitByte(Prefix.REXXB);
+                } else {
+                    emitByte(Prefix.REXB);
+                }
+            } else {
+                if (needsRex(adr.getIndex())) {
+                    emitByte(Prefix.REXX);
+                } else if (reg.encoding >= 4) {
+                    emitByte(Prefix.REX);
+                }
+            }
+        } else {
+            if (needsRex(adr.getBase())) {
+                if (needsRex(adr.getIndex())) {
+                    emitByte(Prefix.REXRXB);
+                } else {
+                    emitByte(Prefix.REXRB);
+                }
+            } else {
+                if (needsRex(adr.getIndex())) {
+                    emitByte(Prefix.REXRX);
+                } else {
+                    emitByte(Prefix.REXR);
+                }
+            }
+        }
+    }
+
+    private void prefixq(Address adr, Register src) {
+        if (src.encoding < 8) {
+            if (needsRex(adr.getBase())) {
+                if (needsRex(adr.getIndex())) {
+                    emitByte(Prefix.REXWXB);
+                } else {
+                    emitByte(Prefix.REXWB);
+                }
+            } else {
+                if (needsRex(adr.getIndex())) {
+                    emitByte(Prefix.REXWX);
+                } else {
+                    emitByte(Prefix.REXW);
+                }
+            }
+        } else {
+            if (needsRex(adr.getBase())) {
+                if (needsRex(adr.getIndex())) {
+                    emitByte(Prefix.REXWRXB);
+                } else {
+                    emitByte(Prefix.REXWRB);
+                }
+            } else {
+                if (needsRex(adr.getIndex())) {
+                    emitByte(Prefix.REXWRX);
+                } else {
+                    emitByte(Prefix.REXWR);
+                }
+            }
+        }
+    }
+
+    public final void addq(Address dst, int imm32) {
+        prefixq(dst);
+        emitArithOperand(0x81, rax, dst, imm32);
+    }
+
+    public final void addq(Address dst, Register src) {
+        prefixq(dst, src);
+        emitByte(0x01);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void addq(Register dst, int imm32) {
+        prefixqAndEncode(dst.encoding);
+        emitArith(0x81, 0xC0, dst, imm32);
+    }
+
+    public final void addq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x03);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void addq(Register dst, Register src) {
+        prefixqAndEncode(dst.encoding, src.encoding);
+        emitArith(0x03, 0xC0, dst, src);
+    }
+
+    public final void andq(Register dst, int imm32) {
+        prefixqAndEncode(dst.encoding);
+        emitArith(0x81, 0xE0, dst, imm32);
+    }
+
+    public final void andq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x23);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void andq(Register dst, Register src) {
+        prefixqAndEncode(dst.encoding, src.encoding);
+        emitArith(0x23, 0xC0, dst, src);
+    }
+
+    public final void bswapq(Register reg) {
+        int encode = prefixqAndEncode(reg.encoding);
+        emitByte(0x0F);
+        emitByte(0xC8 | encode);
+    }
+
+    public final void cdqq() {
+        emitByte(Prefix.REXW);
+        emitByte(0x99);
+    }
+
+    public final void cmovq(ConditionFlag cc, Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x40 | cc.value);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cmovq(ConditionFlag cc, Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x0F);
+        emitByte(0x40 | cc.value);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void cmpq(Address dst, int imm32) {
+        prefixq(dst);
+        emitByte(0x81);
+        emitOperandHelper(rdi, dst);
+        emitInt(imm32);
+    }
+
+    public final void cmpq(Register dst, int imm32) {
+        prefixqAndEncode(dst.encoding);
+        emitArith(0x81, 0xF8, dst, imm32);
+    }
+
+    public final void cmpq(Address dst, Register src) {
+        prefixq(dst, src);
+        emitByte(0x3B);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void cmpq(Register dst, Register src) {
+        prefixqAndEncode(dst.encoding, src.encoding);
+        emitArith(0x3B, 0xC0, dst, src);
+    }
+
+    public final void cmpq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x3B);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void cmpxchgq(Register reg, Address adr) {
+        prefixq(adr, reg);
+        emitByte(0x0F);
+        emitByte(0xB1);
+        emitOperandHelper(reg, adr);
+    }
+
+    public final void cvtsi2sdq(Register dst, Register src) {
+        assert dst.isFpu();
+        emitByte(0xF2);
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x2A);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvtsi2ssq(Register dst, Register src) {
+        assert dst.isFpu();
+        emitByte(0xF3);
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x2A);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvttsd2siq(Register dst, Register src) {
+        assert src.isFpu();
+        emitByte(0xF2);
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x2C);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void cvttss2siq(Register dst, Register src) {
+        assert src.isFpu();
+        emitByte(0xF3);
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0x2C);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void decq(Register dst) {
+        // Don't use it directly. Use Macrodecrementq() instead.
+        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xFF);
+        emitByte(0xC8 | encode);
+    }
+
+    public final void decq(Address dst) {
+        // Don't use it directly. Use Macrodecrementq() instead.
+        prefixq(dst);
+        emitByte(0xFF);
+        emitOperandHelper(rcx, dst);
+    }
+
+    public final void divq(Register src) {
+        int encode = prefixqAndEncode(src.encoding);
+        emitByte(0xF7);
+        emitByte(0xF0 | encode);
+    }
+
+    public final void idivq(Register src) {
+        int encode = prefixqAndEncode(src.encoding);
+        emitByte(0xF7);
+        emitByte(0xF8 | encode);
+    }
+
+    public final void imulq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xAF);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void imulq(Register dst, Register src, int value) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        if (isByte(value)) {
+            emitByte(0x6B);
+            emitByte(0xC0 | encode);
+            emitByte(value);
+        } else {
+            emitByte(0x69);
+            emitByte(0xC0 | encode);
+            emitInt(value);
+        }
+    }
+
+    public final void incq(Register dst) {
+        // Don't use it directly. Use Macroincrementq() instead.
+        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xFF);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void incq(Address dst) {
+        // Don't use it directly. Use Macroincrementq() instead.
+        prefixq(dst);
+        emitByte(0xFF);
+        emitOperandHelper(rax, dst);
+    }
+
+    public final void movq(Register dst, long imm64) {
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xB8 | encode);
+        emitLong(imm64);
+    }
+
+    public final void movdq(Register dst, Register src) {
+
+        // table D-1 says MMX/SSE2
+        emitByte(0x66);
+
+        if (dst.isFpu()) {
+            assert dst.isFpu();
+            int encode = prefixqAndEncode(dst.encoding, src.encoding);
+            emitByte(0x0F);
+            emitByte(0x6E);
+            emitByte(0xC0 | encode);
+        } else if (src.isFpu()) {
+
+            // swap src/dst to get correct prefix
+            int encode = prefixqAndEncode(src.encoding, dst.encoding);
+            emitByte(0x0F);
+            emitByte(0x7E);
+            emitByte(0xC0 | encode);
+        } else {
+            throw new InternalError("should not reach here");
+        }
+    }
+
+    public final void movsbq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x0F);
+        emitByte(0xBE);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movsbq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xBE);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movslq(Register dst, int imm32) {
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xC7 | encode);
+        emitInt(imm32);
+        // dbx shows movslq(X86.rcx, 3) as movq $0x0000000049000000,(%X86.rbx)
+        // and movslq(X86.r8, 3); as movl $0x0000000048000000,(%X86.rbx)
+        // as a result we shouldn't use until tested at runtime...
+        throw new InternalError("untested");
+    }
+
+    public final void movslq(Address dst, int imm32) {
+        prefixq(dst);
+        emitByte(0xC7);
+        emitOperandHelper(rax, dst);
+        emitInt(imm32);
+    }
+
+    public final void movslq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x63);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movslq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x63);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movswq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x0F);
+        emitByte(0xBF);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movswq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xBF);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movzbq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x0F);
+        emitByte(0xB6);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movzbq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xB6);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void movzwq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x0F);
+        emitByte(0xB7);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void movzwq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x0F);
+        emitByte(0xB7);
+        emitByte(0xC0 | encode);
+    }
+
+    public final void negq(Register dst) {
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xF7);
+        emitByte(0xD8 | encode);
+    }
+
+    public final void notq(Register dst) {
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xF7);
+        emitByte(0xD0 | encode);
+    }
+
+    public final void orq(Address dst, int imm32) {
+        prefixq(dst);
+        emitByte(0x81);
+        emitOperandHelper(rcx, dst);
+        emitInt(imm32);
+    }
+
+    public final void orq(Register dst, int imm32) {
+        prefixqAndEncode(dst.encoding);
+        emitArith(0x81, 0xC8, dst, imm32);
+    }
+
+    public final void orq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x0B);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void orq(Register dst, Register src) {
+        prefixqAndEncode(dst.encoding, src.encoding);
+        emitArith(0x0B, 0xC0, dst, src);
+    }
+
+    public final void popq(Address dst) {
+        prefixq(dst);
+        emitByte(0x8F);
+        emitOperandHelper(rax, dst);
+    }
+
+    public final void pushq(Address src) {
+        prefixq(src);
+        emitByte(0xFF);
+        emitOperandHelper(rsi, src);
+    }
+
+    public final void rclq(Register dst, int imm8) {
+        assert isShiftCount(imm8 >> 1) : "illegal shift count";
+        int encode = prefixqAndEncode(dst.encoding);
+        if (imm8 == 1) {
+            emitByte(0xD1);
+            emitByte(0xD0 | encode);
+        } else {
+            emitByte(0xC1);
+            emitByte(0xD0 | encode);
+            emitByte(imm8);
+        }
+    }
+
+    public final void sarq(Register dst, int imm8) {
+        assert isShiftCount(imm8 >> 1) : "illegal shift count";
+        int encode = prefixqAndEncode(dst.encoding);
+        if (imm8 == 1) {
+            emitByte(0xD1);
+            emitByte(0xF8 | encode);
+        } else {
+            emitByte(0xC1);
+            emitByte(0xF8 | encode);
+            emitByte(imm8);
+        }
+    }
+
+    public final void sarq(Register dst) {
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xD3);
+        emitByte(0xF8 | encode);
+    }
+
+    public final void shlq(Register dst, int imm8) {
+        assert isShiftCount(imm8 >> 1) : "illegal shift count";
+        int encode = prefixqAndEncode(dst.encoding);
+        if (imm8 == 1) {
+            emitByte(0xD1);
+            emitByte(0xE0 | encode);
+        } else {
+            emitByte(0xC1);
+            emitByte(0xE0 | encode);
+            emitByte(imm8);
+        }
+    }
+
+    public final void shlq(Register dst) {
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xD3);
+        emitByte(0xE0 | encode);
+    }
+
+    public final void shrq(Register dst, int imm8) {
+        assert isShiftCount(imm8 >> 1) : "illegal shift count";
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xC1);
+        emitByte(0xE8 | encode);
+        emitByte(imm8);
+    }
+
+    public final void shrq(Register dst) {
+        int encode = prefixqAndEncode(dst.encoding);
+        emitByte(0xD3);
+        emitByte(0xE8 | encode);
+    }
+
+    public final void sqrtsd(Register dst, Address src) {
+        assert dst.isFpu();
+
+        emitByte(0xF2);
+        prefix(src, dst);
+        emitByte(0x0F);
+        emitByte(0x51);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void subq(Address dst, int imm32) {
+        prefixq(dst);
+        if (isByte(imm32)) {
+            emitByte(0x83);
+            emitOperandHelper(rbp, dst);
+            emitByte(imm32 & 0xFF);
+        } else {
+            emitByte(0x81);
+            emitOperandHelper(rbp, dst);
+            emitInt(imm32);
+        }
+    }
+
+    public final void subq(Register dst, int imm32) {
+        prefixqAndEncode(dst.encoding);
+        emitArith(0x81, 0xE8, dst, imm32);
+    }
+
+    public final void subq(Address dst, Register src) {
+        prefixq(dst, src);
+        emitByte(0x29);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void subq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x2B);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void subq(Register dst, Register src) {
+        prefixqAndEncode(dst.encoding, src.encoding);
+        emitArith(0x2B, 0xC0, dst, src);
+    }
+
+    public final void testq(Register dst, int imm32) {
+        // not using emitArith because test
+        // doesn't support sign-extension of
+        // 8bit operands
+        int encode = dst.encoding;
+        if (encode == 0) {
+            emitByte(Prefix.REXW);
+            emitByte(0xA9);
+        } else {
+            encode = prefixqAndEncode(encode);
+            emitByte(0xF7);
+            emitByte(0xC0 | encode);
+        }
+        emitInt(imm32);
+    }
+
+    public final void testq(Register dst, Register src) {
+        prefixqAndEncode(dst.encoding, src.encoding);
+        emitArith(0x85, 0xC0, dst, src);
+    }
+
+    public final void xaddq(Address dst, Register src) {
+        prefixq(dst, src);
+        emitByte(0x0F);
+        emitByte(0xC1);
+        emitOperandHelper(src, dst);
+    }
+
+    public final void xchgq(Register dst, Address src) {
+        prefixq(src, dst);
+        emitByte(0x87);
+        emitOperandHelper(dst, src);
+    }
+
+    public final void xchgq(Register dst, Register src) {
+        int encode = prefixqAndEncode(dst.encoding, src.encoding);
+        emitByte(0x87);
+        emitByte(0xc0 | encode);
+    }
+
+    public final void xorq(Register dst, int imm32) {
+        prefixqAndEncode(dst.encoding);
+        emitArith(0x81, 0xF0, dst, imm32);
+    }
+
+    public final void xorq(Register dst, Register src) {
+        prefixqAndEncode(dst.encoding, src.encoding);
+        emitArith(0x33, 0xC0, dst, src);
+    }
+
+    public final void xorq(Register dst, Address src) {
+
+        prefixq(src, dst);
+        emitByte(0x33);
+        emitOperandHelper(dst, src);
+
+    }
+
+    public final void membar(int barriers) {
+        if (target.isMP) {
+            // We only have to handle StoreLoad
+            if ((barriers & STORE_LOAD) != 0) {
+                // All usable chips support "locked" instructions which suffice
+                // as barriers, and are much faster than the alternative of
+                // using cpuid instruction. We use here a locked add [rsp],0.
+                // This is conveniently otherwise a no-op except for blowing
+                // flags.
+                // Any change to this code may need to revisit other places in
+                // the code where this idiom is used, in particular the
+                // orderAccess code.
+                lock();
+                addl(new Address(Word, RSP, 0), 0); // Assert the lock# signal here
+            }
+        }
+    }
+
+    @Override
+    protected final void patchJumpTarget(int branch, int branchTarget) {
+        int op = codeBuffer.getByte(branch);
+        assert op == 0xE8 // call
+            || op == 0x00 // jump table entry
+            || op == 0xE9 // jmp
+            || op == 0xEB // short jmp
+            || (op & 0xF0) == 0x70 // short jcc
+            || op == 0x0F && (codeBuffer.getByte(branch + 1) & 0xF0) == 0x80 // jcc
+        : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
+
+        if (op == 0x00) {
+            int offsetToJumpTableBase = codeBuffer.getShort(branch + 1);
+            int jumpTableBase = branch - offsetToJumpTableBase;
+            int imm32 = branchTarget - jumpTableBase;
+            codeBuffer.emitInt(imm32, branch);
+        } else if (op == 0xEB || (op & 0xF0) == 0x70) {
+
+            // short offset operators (jmp and jcc)
+            int imm8 = branchTarget - (branch + 2);
+            codeBuffer.emitByte(imm8, branch + 1);
+
+        } else {
+
+            int off = 1;
+            if (op == 0x0F) {
+                off = 2;
+            }
+
+            int imm32 = branchTarget - (branch + 4 + off);
+            codeBuffer.emitInt(imm32, branch + off);
+        }
+    }
+
+    public void nullCheck(Register r) {
+        testl(AMD64.rax, new Address(Word, r.asValue(Word), 0));
+    }
+
+    @Override
+    public void align(int modulus) {
+        if (codeBuffer.position() % modulus != 0) {
+            nop(modulus - (codeBuffer.position() % modulus));
+        }
+    }
+
+    public void pushfq() {
+        emitByte(0x9c);
+    }
+
+    public void popfq() {
+        emitByte(0x9D);
+    }
+
+    /**
+     * Makes sure that a subsequent {@linkplain #call} does not fail the alignment check.
+     */
+    public final void alignForPatchableDirectCall() {
+        int dispStart = codeBuffer.position() + 1;
+        int mask = target.wordSize - 1;
+        if ((dispStart & ~mask) != ((dispStart + 3) & ~mask)) {
+            nop(target.wordSize - (dispStart & mask));
+            assert ((codeBuffer.position() + 1) & mask) == 0;
+        }
+    }
+
+    /**
+     * Emits a direct call instruction. Note that the actual call target is not specified, because all calls
+     * need patching anyway. Therefore, 0 is emitted as the call target, and the user is responsible
+     * to add the call address to the appropriate patching tables.
+     */
+    public final void call() {
+        emitByte(0xE8);
+        emitInt(0);
+    }
+
+    public final void call(Register src) {
+        int encode = prefixAndEncode(src.encoding);
+        emitByte(0xFF);
+        emitByte(0xD0 | encode);
+    }
+
+    public void int3() {
+        emitByte(0xCC);
+    }
+
+    public void enter(short imm16, byte imm8) {
+        emitByte(0xC8);
+        // appended:
+        emitByte(imm16 & 0xff);
+        emitByte((imm16 >> 8) & 0xff);
+        emitByte(imm8);
+    }
+
+    private void emitx87(int b1, int b2, int i) {
+        assert 0 <= i && i < 8 : "illegal stack offset";
+        emitByte(b1);
+        emitByte(b2 + i);
+    }
+
+    public void fld(Address src) {
+        emitByte(0xDD);
+        emitOperandHelper(rax, src);
+    }
+
+    public void fld(int i) {
+        emitx87(0xD9, 0xC0, i);
+    }
+
+    public void fldln2() {
+        emitByte(0xD9);
+        emitByte(0xED);
+    }
+
+    public void fldlg2() {
+        emitByte(0xD9);
+        emitByte(0xEC);
+    }
+
+    public void fyl2x() {
+        emitByte(0xD9);
+        emitByte(0xF1);
+    }
+
+    public void fstp(Address src) {
+        emitByte(0xDD);
+        emitOperandHelper(rbx, src);
+    }
+
+    public void fsin() {
+        emitByte(0xD9);
+        emitByte(0xFE);
+    }
+
+    public void fcos() {
+        emitByte(0xD9);
+        emitByte(0xFF);
+    }
+
+    public void fptan() {
+        emitByte(0xD9);
+        emitByte(0xF2);
+    }
+
+    public void fstp(int i) {
+        emitx87(0xDD, 0xD8, i);
+    }
+
+    @Override
+    public void bangStack(int disp) {
+        movq(new Address(target.wordKind, AMD64.RSP, -disp), AMD64.rax);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64MacroAssembler.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,392 @@
+/*
+ * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm.amd64;
+
+import static com.oracle.max.asm.amd64.AMD64AsmOptions.*;
+
+import com.oracle.graal.api.code.*;
+import com.oracle.graal.api.meta.*;
+import com.oracle.max.asm.*;
+
+/**
+ * This class implements commonly used X86 code patterns.
+ */
+public class AMD64MacroAssembler extends AMD64Assembler {
+
+    public AMD64MacroAssembler(TargetDescription target, RegisterConfig registerConfig) {
+        super(target, registerConfig);
+    }
+
+    public void pushptr(Address src) {
+        pushq(src);
+    }
+
+    public void popptr(Address src) {
+        popq(src);
+    }
+
+    public void xorptr(Register dst, Register src) {
+        xorq(dst, src);
+    }
+
+    public void xorptr(Register dst, Address src) {
+        xorq(dst, src);
+    }
+
+    // 64 bit versions
+
+
+    public void decrementq(Register reg, int value) {
+        if (value == Integer.MIN_VALUE) {
+            subq(reg, value);
+            return;
+        }
+        if (value < 0) {
+            incrementq(reg, -value);
+            return;
+        }
+        if (value == 0) {
+            return;
+        }
+        if (value == 1 && UseIncDec) {
+            decq(reg);
+        } else {
+            subq(reg, value);
+        }
+    }
+
+    public void incrementq(Register reg, int value) {
+        if (value == Integer.MIN_VALUE) {
+            addq(reg, value);
+            return;
+        }
+        if (value < 0) {
+            decrementq(reg, -value);
+            return;
+        }
+        if (value == 0) {
+            return;
+        }
+        if (value == 1 && UseIncDec) {
+            incq(reg);
+        } else {
+            addq(reg, value);
+        }
+    }
+
+    // These are mostly for initializing null
+    public void movptr(Address dst, int src) {
+        movslq(dst, src);
+    }
+
+    public final void cmp32(Register src1, int imm) {
+        cmpl(src1, imm);
+    }
+
+    public final void cmp32(Register src1, Address src2) {
+        cmpl(src1, src2);
+    }
+
+    public void cmpsd2int(Register opr1, Register opr2, Register dst, boolean unorderedIsLess) {
+        assert opr1.isFpu() && opr2.isFpu();
+        ucomisd(opr1, opr2);
+
+        Label l = new Label();
+        if (unorderedIsLess) {
+            movl(dst, -1);
+            jcc(AMD64Assembler.ConditionFlag.parity, l);
+            jcc(AMD64Assembler.ConditionFlag.below, l);
+            movl(dst, 0);
+            jcc(AMD64Assembler.ConditionFlag.equal, l);
+            incrementl(dst, 1);
+        } else { // unordered is greater
+            movl(dst, 1);
+            jcc(AMD64Assembler.ConditionFlag.parity, l);
+            jcc(AMD64Assembler.ConditionFlag.above, l);
+            movl(dst, 0);
+            jcc(AMD64Assembler.ConditionFlag.equal, l);
+            decrementl(dst, 1);
+        }
+        bind(l);
+    }
+
+    public void cmpss2int(Register opr1, Register opr2, Register dst, boolean unorderedIsLess) {
+        assert opr1.isFpu();
+        assert opr2.isFpu();
+        ucomiss(opr1, opr2);
+
+        Label l = new Label();
+        if (unorderedIsLess) {
+            movl(dst, -1);
+            jcc(AMD64Assembler.ConditionFlag.parity, l);
+            jcc(AMD64Assembler.ConditionFlag.below, l);
+            movl(dst, 0);
+            jcc(AMD64Assembler.ConditionFlag.equal, l);
+            incrementl(dst, 1);
+        } else { // unordered is greater
+            movl(dst, 1);
+            jcc(AMD64Assembler.ConditionFlag.parity, l);
+            jcc(AMD64Assembler.ConditionFlag.above, l);
+            movl(dst, 0);
+            jcc(AMD64Assembler.ConditionFlag.equal, l);
+            decrementl(dst, 1);
+        }
+        bind(l);
+    }
+
+    public void cmpptr(Register src1, Register src2) {
+        cmpq(src1, src2);
+    }
+
+    public void cmpptr(Register src1, Address src2) {
+        cmpq(src1, src2);
+    }
+
+    public void cmpptr(Register src1, int src2) {
+        cmpq(src1, src2);
+    }
+
+    public void cmpptr(Address src1, int src2) {
+        cmpq(src1, src2);
+    }
+
+    public void decrementl(Register reg, int value) {
+        if (value == Integer.MIN_VALUE) {
+            subl(reg, value);
+            return;
+        }
+        if (value < 0) {
+            incrementl(reg, -value);
+            return;
+        }
+        if (value == 0) {
+            return;
+        }
+        if (value == 1 && UseIncDec) {
+            decl(reg);
+        } else {
+            subl(reg, value);
+        }
+    }
+
+    public void decrementl(Address dst, int value) {
+        if (value == Integer.MIN_VALUE) {
+            subl(dst, value);
+            return;
+        }
+        if (value < 0) {
+            incrementl(dst, -value);
+            return;
+        }
+        if (value == 0) {
+            return;
+        }
+        if (value == 1 && UseIncDec) {
+            decl(dst);
+        } else {
+            subl(dst, value);
+        }
+    }
+
+    public void incrementl(Register reg, int value) {
+        if (value == Integer.MIN_VALUE) {
+            addl(reg, value);
+            return;
+        }
+        if (value < 0) {
+            decrementl(reg, -value);
+            return;
+        }
+        if (value == 0) {
+            return;
+        }
+        if (value == 1 && UseIncDec) {
+            incl(reg);
+        } else {
+            addl(reg, value);
+        }
+    }
+
+    public void incrementl(Address dst, int value) {
+        if (value == Integer.MIN_VALUE) {
+            addl(dst, value);
+            return;
+        }
+        if (value < 0) {
+            decrementl(dst, -value);
+            return;
+        }
+        if (value == 0) {
+            return;
+        }
+        if (value == 1 && UseIncDec) {
+            incl(dst);
+        } else {
+            addl(dst, value);
+        }
+    }
+
+    public void signExtendByte(Register reg) {
+        if (reg.isByte()) {
+            movsxb(reg, reg); // movsxb
+        } else {
+            shll(reg, 24);
+            sarl(reg, 24);
+        }
+    }
+
+    public void signExtendShort(Register reg) {
+        movsxw(reg, reg); // movsxw
+    }
+
+    // Support optimal SSE move instructions.
+    public void movflt(Register dst, Register src) {
+        assert dst.isFpu() && src.isFpu();
+        if (UseXmmRegToRegMoveAll) {
+            movaps(dst, src);
+        } else {
+            movss(dst, src);
+        }
+    }
+
+    public void movflt(Register dst, Address src) {
+        assert dst.isFpu();
+        movss(dst, src);
+    }
+
+    public void movflt(Address dst, Register src) {
+        assert src.isFpu();
+        movss(dst, src);
+    }
+
+    public void movdbl(Register dst, Register src) {
+        assert dst.isFpu() && src.isFpu();
+        if (UseXmmRegToRegMoveAll) {
+            movapd(dst, src);
+        } else {
+            movsd(dst, src);
+        }
+    }
+
+    public void movdbl(Register dst, Address src) {
+        assert dst.isFpu();
+        if (UseXmmLoadAndClearUpper) {
+            movsd(dst, src);
+        } else {
+            movlpd(dst, src);
+        }
+    }
+
+    public void movdbl(Address dst, Register src) {
+        assert src.isFpu();
+        movsd(dst, src);
+    }
+
+    /**
+     * Non-atomic write of a 64-bit constant to memory. Do not use
+     * if the address might be a volatile field!
+     */
+    public void movlong(Address dst, long src) {
+        Address high = new Address(dst.getKind(), dst.getBase(), dst.getIndex(), dst.getScale(), dst.getDisplacement() + 4);
+        movl(dst, (int) (src & 0xFFFFFFFF));
+        movl(high, (int) (src >> 32));
+    }
+
+    public void xchgptr(Register src1, Register src2) {
+        xchgq(src1, src2);
+    }
+
+    public void flog(Register dest, Register value, boolean base10) {
+        assert value.spillSlotSize == dest.spillSlotSize;
+
+        Address tmp = new Address(Kind.Double, AMD64.RSP);
+        if (base10) {
+            fldlg2();
+        } else {
+            fldln2();
+        }
+        subq(AMD64.rsp, value.spillSlotSize);
+        movsd(tmp, value);
+        fld(tmp);
+        fyl2x();
+        fstp(tmp);
+        movsd(dest, tmp);
+        addq(AMD64.rsp, dest.spillSlotSize);
+    }
+
+    public void fsin(Register dest, Register value) {
+        ftrig(dest, value, 's');
+    }
+
+    public void fcos(Register dest, Register value) {
+        ftrig(dest, value, 'c');
+    }
+
+    public void ftan(Register dest, Register value) {
+        ftrig(dest, value, 't');
+    }
+
+    private void ftrig(Register dest, Register value, char op) {
+        assert value.spillSlotSize == dest.spillSlotSize;
+
+        Address tmp = new Address(Kind.Double, AMD64.RSP);
+        subq(AMD64.rsp, value.spillSlotSize);
+        movsd(tmp, value);
+        fld(tmp);
+        if (op == 's') {
+            fsin();
+        } else if (op == 'c') {
+            fcos();
+        } else if (op == 't') {
+            fptan();
+            fstp(0); // ftan pushes 1.0 in addition to the actual result, pop
+        } else {
+            throw new InternalError("should not reach here");
+        }
+        fstp(tmp);
+        movsd(dest, tmp);
+        addq(AMD64.rsp, dest.spillSlotSize);
+    }
+
+    /**
+     * Emit code to save a given set of callee save registers in the
+     * {@linkplain CalleeSaveLayout CSA} within the frame.
+     * @param csl the description of the CSA
+     * @param frameToCSA offset from the frame pointer to the CSA
+     */
+    public void save(CalleeSaveLayout csl, int frameToCSA) {
+        RegisterValue frame = frameRegister.asValue();
+        for (Register r : csl.registers) {
+            int offset = csl.offsetOf(r);
+            movq(new Address(target.wordKind, frame, frameToCSA + offset), r);
+        }
+    }
+
+    public void restore(CalleeSaveLayout csl, int frameToCSA) {
+        RegisterValue frame = frameRegister.asValue();
+        for (Register r : csl.registers) {
+            int offset = csl.offsetOf(r);
+            movq(r, new Address(target.wordKind, frame, frameToCSA + offset));
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/X86InstructionDecoder.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm.amd64;
+
+
+public final class X86InstructionDecoder {
+
+    private boolean targetIs64Bit;
+    private byte[] code;
+    private int currentEndOfInstruction;
+    private int currentDisplacementPosition;
+
+    private static class Prefix {
+
+        // segment overrides
+        public static final int CSSegment = 0x2e;
+        public static final int SSSegment = 0x36;
+        public static final int DSSegment = 0x3e;
+        public static final int ESSegment = 0x26;
+        public static final int FSSegment = 0x64;
+        public static final int GSSegment = 0x65;
+        public static final int REX = 0x40;
+        public static final int REXB = 0x41;
+        public static final int REXX = 0x42;
+        public static final int REXXB = 0x43;
+        public static final int REXR = 0x44;
+        public static final int REXRB = 0x45;
+        public static final int REXRX = 0x46;
+        public static final int REXRXB = 0x47;
+        public static final int REXW = 0x48;
+        public static final int REXWB = 0x49;
+        public static final int REXWX = 0x4A;
+        public static final int REXWXB = 0x4B;
+        public static final int REXWR = 0x4C;
+        public static final int REXWRB = 0x4D;
+        public static final int REXWRX = 0x4E;
+        public static final int REXWRXB = 0x4F;
+    }
+
+    private X86InstructionDecoder(byte[] code, boolean targetIs64Bit) {
+        this.code = code;
+        this.targetIs64Bit = targetIs64Bit;
+    }
+
+    public int currentEndOfInstruction() {
+        return currentEndOfInstruction;
+    }
+
+    public int currentDisplacementPosition() {
+        return currentDisplacementPosition;
+    }
+
+    public void decodePosition(int inst) {
+
+        assert inst >= 0 && inst < code.length;
+
+        // Decode the given instruction, and return the Pointer of
+        // an embedded 32-bit operand word.
+
+        // If "which" is WhichOperand.disp32operand, selects the displacement portion
+        // of an effective Pointer specifier.
+        // If "which" is imm64Operand, selects the trailing immediate constant.
+        // If "which" is WhichOperand.call32operand, selects the displacement of a call or jump.
+        // Caller is responsible for ensuring that there is such an operand,
+        // and that it is 32/64 bits wide.
+
+        // If "which" is endPcOperand, find the end of the instruction.
+
+        int ip = inst;
+        boolean is64bit = false;
+
+        boolean hasDisp32 = false;
+        int tailSize = 0; // other random bytes (#32, #16, etc.) at end of insn
+
+        boolean againAfterPrefix = true;
+
+        while (againAfterPrefix) {
+            againAfterPrefix = false;
+            switch (0xFF & code[ip++]) {
+
+                // These convenience macros generate groups of "case" labels for the switch.
+
+                case Prefix.CSSegment:
+                case Prefix.SSSegment:
+                case Prefix.DSSegment:
+                case Prefix.ESSegment:
+                case Prefix.FSSegment:
+                case Prefix.GSSegment:
+                    // Seems dubious
+                    assert !targetIs64Bit : "shouldn't have that prefix";
+                    assert ip == inst + 1 : "only one prefix allowed";
+                    againAfterPrefix = true;
+                    break;
+
+                case 0x67:
+                case Prefix.REX:
+                case Prefix.REXB:
+                case Prefix.REXX:
+                case Prefix.REXXB:
+                case Prefix.REXR:
+                case Prefix.REXRB:
+                case Prefix.REXRX:
+                case Prefix.REXRXB:
+                    assert targetIs64Bit : "64bit prefixes";
+                    againAfterPrefix = true;
+                    break;
+
+                case Prefix.REXW:
+                case Prefix.REXWB:
+                case Prefix.REXWX:
+                case Prefix.REXWXB:
+                case Prefix.REXWR:
+                case Prefix.REXWRB:
+                case Prefix.REXWRX:
+                case Prefix.REXWRXB:
+                    assert targetIs64Bit : "64bit prefixes";
+                    is64bit = true;
+                    againAfterPrefix = true;
+                    break;
+
+                case 0xFF: // pushq a; decl a; incl a; call a; jmp a
+                case 0x88: // movb a, r
+                case 0x89: // movl a, r
+                case 0x8A: // movb r, a
+                case 0x8B: // movl r, a
+                case 0x8F: // popl a
+                    hasDisp32 = true;
+                    break;
+
+                case 0x68: // pushq #32
+                    currentEndOfInstruction = ip + 4;
+                    currentDisplacementPosition = ip;
+                    return; // not produced by emitOperand
+
+                case 0x66: // movw ... (size prefix)
+                    boolean againAfterSizePrefix2 = true;
+                    while (againAfterSizePrefix2) {
+                        againAfterSizePrefix2 = false;
+                        switch (0xFF & code[ip++]) {
+                            case Prefix.REX:
+                            case Prefix.REXB:
+                            case Prefix.REXX:
+                            case Prefix.REXXB:
+                            case Prefix.REXR:
+                            case Prefix.REXRB:
+                            case Prefix.REXRX:
+                            case Prefix.REXRXB:
+                            case Prefix.REXW:
+                            case Prefix.REXWB:
+                            case Prefix.REXWX:
+                            case Prefix.REXWXB:
+                            case Prefix.REXWR:
+                            case Prefix.REXWRB:
+                            case Prefix.REXWRX:
+                            case Prefix.REXWRXB:
+                                assert targetIs64Bit : "64bit prefix found";
+                                againAfterSizePrefix2 = true;
+                                break;
+                            case 0x8B: // movw r, a
+                            case 0x89: // movw a, r
+                                hasDisp32 = true;
+                                break;
+                            case 0xC7: // movw a, #16
+                                hasDisp32 = true;
+                                tailSize = 2; // the imm16
+                                break;
+                            case 0x0F: // several SSE/SSE2 variants
+                                ip--; // reparse the 0x0F
+                                againAfterPrefix = true;
+                                break;
+                            default:
+                                throw new InternalError("should not reach here");
+                        }
+                    }
+                    break;
+
+                case 0xB8: // movl/q r, #32/#64(oop?)
+                case 0xB9:
+                case 0xBA:
+                case 0xBB:
+                case 0xBC:
+                case 0xBD:
+                case 0xBE:
+                case 0xBF:
+                    currentEndOfInstruction = ip + (is64bit ? 8 : 4);
+                    currentDisplacementPosition = ip;
+                    return;
+
+                case 0x69: // imul r, a, #32
+                case 0xC7: // movl a, #32(oop?)
+                    tailSize = 4;
+                    hasDisp32 = true; // has both kinds of operands!
+                    break;
+
+                case 0x0F: // movx..., etc.
+                    switch (0xFF & code[ip++]) {
+                        case 0x12: // movlps
+                        case 0x28: // movaps
+                        case 0x2E: // ucomiss
+                        case 0x2F: // comiss
+                        case 0x54: // andps
+                        case 0x55: // andnps
+                        case 0x56: // orps
+                        case 0x57: // xorps
+                        case 0x6E: // movd
+                        case 0x7E: // movd
+                        case 0xAE: // ldmxcsr a
+                            // 64bit side says it these have both operands but that doesn't
+                            // appear to be true
+                            hasDisp32 = true;
+                            break;
+
+                        case 0xAD: // shrd r, a, %cl
+                        case 0xAF: // imul r, a
+                        case 0xBE: // movsbl r, a (movsxb)
+                        case 0xBF: // movswl r, a (movsxw)
+                        case 0xB6: // movzbl r, a (movzxb)
+                        case 0xB7: // movzwl r, a (movzxw)
+                        case 0x40: // cmovl cc, r, a
+                        case 0x41:
+                        case 0x42:
+                        case 0x43:
+                        case 0x44:
+                        case 0x45:
+                        case 0x46:
+                        case 0x47:
+                        case 0x48:
+                        case 0x49:
+                        case 0x4A:
+                        case 0x4B:
+                        case 0x4C:
+                        case 0x4D:
+                        case 0x4E:
+                        case 0x4F:
+                        case 0xB0: // cmpxchgb
+                        case 0xB1: // cmpxchg
+                        case 0xC1: // xaddl
+                        case 0xC7: // cmpxchg8
+                        case 0x90: // setcc a
+                        case 0x91:
+                        case 0x92:
+                        case 0x93:
+                        case 0x94:
+                        case 0x95:
+                        case 0x96:
+                        case 0x97:
+                        case 0x98:
+                        case 0x99:
+                        case 0x9A:
+                        case 0x9B:
+                        case 0x9C:
+                        case 0x9D:
+                        case 0x9E:
+                        case 0x9F:
+                            hasDisp32 = true;
+                            // fall out of the switch to decode the Pointer
+                            break;
+
+                        case 0xAC: // shrd r, a, #8
+                            hasDisp32 = true;
+                            tailSize = 1; // the imm8
+                            break;
+
+                        case 0x80: // jcc rdisp32
+                        case 0x81:
+                        case 0x82:
+                        case 0x83:
+                        case 0x84:
+                        case 0x85:
+                        case 0x86:
+                        case 0x87:
+                        case 0x88:
+                        case 0x89:
+                        case 0x8A:
+                        case 0x8B:
+                        case 0x8C:
+                        case 0x8D:
+                        case 0x8E:
+                        case 0x8F:
+                            currentEndOfInstruction = ip + 4;
+                            currentDisplacementPosition = ip;
+                            return;
+                        default:
+                            throw new InternalError("should not reach here");
+                    }
+                    break;
+
+                case 0x81: // addl a, #32; addl r, #32
+                    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
+                    // on 32bit in the case of cmpl, the imm might be an oop
+                    tailSize = 4;
+                    hasDisp32 = true; // has both kinds of operands!
+                    break;
+
+                case 0x83: // addl a, #8; addl r, #8
+                    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
+                    hasDisp32 = true; // has both kinds of operands!
+                    tailSize = 1;
+                    break;
+
+                case 0x9B:
+                    switch (0xFF & code[ip++]) {
+                        case 0xD9: // fnstcw a
+                            hasDisp32 = true;
+                            break;
+                        default:
+                            throw new InternalError("should not reach here");
+                    }
+                    break;
+
+                case 0x00: // addb a, r; addl a, r; addb r, a; addl r, a
+                case 0x01:
+                case 0x02:
+                case 0x03:
+                case 0x10: // adc...
+                case 0x11:
+                case 0x12:
+                case 0x13:
+                case 0x20: // and...
+                case 0x21:
+                case 0x22:
+                case 0x23:
+                case 0x30: // xor...
+                case 0x31:
+                case 0x32:
+                case 0x33:
+                case 0x08: // or...
+                case 0x09:
+                case 0x0a:
+                case 0x0b:
+                case 0x18: // sbb...
+                case 0x19:
+                case 0x1a:
+                case 0x1b:
+                case 0x28: // sub...
+                case 0x29:
+                case 0x2a:
+                case 0x2b:
+                case 0xF7: // mull a
+                case 0x8D: // lea r, a
+                case 0x87: // xchg r, a
+                case 0x38: // cmp...
+                case 0x39:
+                case 0x3a:
+                case 0x3b:
+                case 0x85: // test r, a
+                    hasDisp32 = true; // has both kinds of operands!
+                    break;
+
+                case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
+                case 0xC6: // movb a, #8
+                case 0x80: // cmpb a, #8
+                case 0x6B: // imul r, a, #8
+                    hasDisp32 = true; // has both kinds of operands!
+                    tailSize = 1; // the imm8
+                    break;
+
+                case 0xE8: // call rdisp32
+                case 0xE9: // jmp rdisp32
+                    currentEndOfInstruction = ip + 4;
+                    currentDisplacementPosition = ip;
+                    return;
+
+                case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
+                case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
+                case 0xD9: // fldS a; fstS a; fstpS a; fldcw a
+                case 0xDD: // fldD a; fstD a; fstpD a
+                case 0xDB: // fildS a; fistpS a; fldX a; fstpX a
+                case 0xDF: // fildD a; fistpD a
+                case 0xD8: // faddS a; fsubrS a; fmulS a; fdivrS a; fcompS a
+                case 0xDC: // faddD a; fsubrD a; fmulD a; fdivrD a; fcompD a
+                case 0xDE: // faddpD a; fsubrpD a; fmulpD a; fdivrpD a; fcomppD a
+                    hasDisp32 = true;
+                    break;
+
+                case 0xF0: // Lock
+                    againAfterPrefix = true;
+                    break;
+
+                case 0xF3: // For SSE
+                case 0xF2: // For SSE2
+                    switch (0xFF & code[ip++]) {
+                        case Prefix.REX:
+                        case Prefix.REXB:
+                        case Prefix.REXX:
+                        case Prefix.REXXB:
+                        case Prefix.REXR:
+                        case Prefix.REXRB:
+                        case Prefix.REXRX:
+                        case Prefix.REXRXB:
+                        case Prefix.REXW:
+                        case Prefix.REXWB:
+                        case Prefix.REXWX:
+                        case Prefix.REXWXB:
+                        case Prefix.REXWR:
+                        case Prefix.REXWRB:
+                        case Prefix.REXWRX:
+                        case Prefix.REXWRXB:
+                            assert targetIs64Bit : "found 64bit prefix";
+                            ip++;
+                            // fall through
+                        default:
+                            ip++;
+                    }
+                    hasDisp32 = true; // has both kinds of operands!
+                    break;
+
+                default:
+                    throw new InternalError("should not reach here");
+            }
+        }
+
+        assert hasDisp32 : "(thomaswue) not sure if this holds: instruction has no disp32 field";
+
+        // parse the output of emitOperand
+        int op2 = 0xFF & code[ip++];
+        int base = op2 & 0x07;
+        int op3 = -1;
+        int b100 = 4;
+        int b101 = 5;
+        if (base == b100 && (op2 >> 6) != 3) {
+            op3 = 0xFF & code[ip++];
+            base = op3 & 0x07; // refetch the base
+        }
+        // now ip points at the disp (if any)
+
+        switch (op2 >> 6) {
+            case 0:
+                // [00 reg 100][ss index base]
+                // [00 reg 100][00 100 esp]
+                // [00 reg base]
+                // [00 reg 100][ss index 101][disp32]
+                // [00 reg 101] [disp32]
+
+                if (base == b101) {
+
+                    currentDisplacementPosition = ip;
+                    ip += 4; // skip the disp32
+                }
+                break;
+
+            case 1:
+                // [01 reg 100][ss index base][disp8]
+                // [01 reg 100][00 100 esp][disp8]
+                // [01 reg base] [disp8]
+                ip += 1; // skip the disp8
+                break;
+
+            case 2:
+                // [10 reg 100][ss index base][disp32]
+                // [10 reg 100][00 100 esp][disp32]
+                // [10 reg base] [disp32]
+                currentDisplacementPosition = ip;
+                ip += 4; // skip the disp32
+                break;
+
+            case 3:
+                // [11 reg base] (not a memory addressing mode)
+                break;
+        }
+
+        currentEndOfInstruction = ip + tailSize;
+    }
+
+    public static void patchRelativeInstruction(byte[] code, int codePos, int relative) {
+        X86InstructionDecoder decoder = new X86InstructionDecoder(code, true);
+        decoder.decodePosition(codePos);
+        int patchPos = decoder.currentDisplacementPosition();
+        int endOfInstruction = decoder.currentEndOfInstruction();
+        int offset = relative - endOfInstruction + codePos;
+        patchDisp32(code, patchPos, offset);
+    }
+
+    private static void patchDisp32(byte[] code, int pos, int offset) {
+        assert pos + 4 <= code.length;
+
+        assert code[pos] == 0;
+        assert code[pos + 1] == 0;
+        assert code[pos + 2] == 0;
+        assert code[pos + 3] == 0;
+
+        code[pos] = (byte) (offset & 0xFF);
+        code[pos + 1] = (byte) ((offset >> 8) & 0xFF);
+        code[pos + 2] = (byte) ((offset >> 16) & 0xFF);
+        code[pos + 3] = (byte) ((offset >> 24) & 0xFF);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm/overview.html	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,36 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head>
+<!--
+
+Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License version 2 only, as
+published by the Free Software Foundation.  Oracle designates this
+particular file as subject to the "Classpath" exception as provided
+by Oracle in the LICENSE file that accompanied this code.
+
+This code is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+version 2 for more details (a copy is included in the LICENSE file that
+accompanied this code).
+
+You should have received a copy of the GNU General Public License version
+2 along with this work; if not, write to the Free Software Foundation,
+Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+or visit www.oracle.com if you need additional information or have any
+questions.
+-->
+
+</head>
+<body>
+
+Documentation for the <code>com.oracle.max.asm</code> project.
+
+</body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm/src/com/oracle/max/asm/AbstractAssembler.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm;
+
+import com.oracle.graal.api.code.*;
+import com.oracle.graal.api.code.Architecture.*;
+
+/**
+ * The platform-independent base class for the assembler.
+ */
+public abstract class AbstractAssembler {
+    public final TargetDescription target;
+    public final Buffer codeBuffer;
+
+    public AbstractAssembler(TargetDescription target) {
+        this.target = target;
+
+        if (target.arch.byteOrder == ByteOrder.BigEndian) {
+            this.codeBuffer = new Buffer.BigEndian();
+        } else {
+            this.codeBuffer = new Buffer.LittleEndian();
+        }
+    }
+
+    public final void bind(Label l) {
+        assert !l.isBound() : "can bind label only once";
+        l.bind(codeBuffer.position());
+        l.patchInstructions(this);
+    }
+
+    public abstract void align(int modulus);
+
+    public abstract void jmp(Label l);
+
+    protected abstract void patchJumpTarget(int branch, int jumpTarget);
+
+    /**
+     * Emits instruction(s) that access an address specified by a given displacement from the stack pointer
+     * in the direction that the stack grows (which is down on most architectures).
+     *
+     * @param disp the displacement from the stack pointer at which the stack should be accessed
+     */
+    public abstract void bangStack(int disp);
+
+    protected final void emitByte(int x) {
+        codeBuffer.emitByte(x);
+    }
+
+    protected final void emitShort(int x) {
+        codeBuffer.emitShort(x);
+    }
+
+    protected final void emitInt(int x) {
+        codeBuffer.emitInt(x);
+    }
+
+    protected final void emitLong(long x) {
+        codeBuffer.emitLong(x);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm/src/com/oracle/max/asm/AsmOptions.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2011, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm;
+
+public class AsmOptions {
+    public static int     InitialCodeBufferSize         = 232;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm/src/com/oracle/max/asm/Buffer.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm;
+
+import java.util.*;
+
+/**
+ * Code buffer management for the assembler. Support for little endian and big endian architectures is implemented using subclasses.
+ */
+public abstract class Buffer {
+    protected byte[] data;
+    protected int position;
+
+    public Buffer() {
+        data = new byte[AsmOptions.InitialCodeBufferSize];
+    }
+
+    public void reset() {
+        position = 0;
+    }
+
+    public int position() {
+        return position;
+    }
+
+    public void setPosition(int position) {
+        assert position >= 0 && position <= data.length;
+        this.position = position;
+    }
+
+    /**
+     * Closes this buffer. No extra data can be written to this buffer after
+     * this call.
+     *
+     * @param trimmedCopy
+     *            if {@code true}, then a copy of the underlying byte array up
+     *            to (but not including) {@code position()} is returned
+     * @return the data in this buffer or a trimmed copy if {@code trimmedCopy}
+     *         is {@code true}
+     */
+    public byte[] close(boolean trimmedCopy) {
+        byte[] result = trimmedCopy ? Arrays.copyOf(data, position()) : data;
+        data = null;
+        return result;
+    }
+
+    public byte[] copyData(int start, int end) {
+        return Arrays.copyOfRange(data, start, end);
+    }
+
+    /**
+     * Copies the data from this buffer into a given array.
+     *
+     * @param dst
+     *            the destination array
+     * @param off
+     *            starting position in {@code dst}
+     * @param len
+     *            number of bytes to copy
+     */
+    public void copyInto(byte[] dst, int off, int len) {
+        System.arraycopy(data, 0, dst, off, len);
+    }
+
+    protected void ensureSize(int length) {
+        if (length >= data.length) {
+            data = Arrays.copyOf(data, length * 4);
+        }
+    }
+
+    public void emitBytes(byte[] arr, int off, int len) {
+        ensureSize(position + len);
+        System.arraycopy(arr, off, data, position, len);
+        position += len;
+    }
+
+    public void emitByte(int b) {
+        position = emitByte(b, position);
+    }
+
+    public void emitShort(int b) {
+        position = emitShort(b, position);
+    }
+
+    public void emitInt(int b) {
+        position = emitInt(b, position);
+    }
+
+    public void emitLong(long b) {
+        position = emitLong(b, position);
+    }
+
+    public int emitByte(int b, int pos) {
+        assert NumUtil.isUByte(b);
+        int newPos = pos + 1;
+        ensureSize(newPos);
+        data[pos] = (byte) (b & 0xFF);
+        return newPos;
+    }
+
+    public abstract int emitShort(int b, int pos);
+
+    public abstract int emitInt(int b, int pos);
+
+    public abstract int emitLong(long b, int pos);
+
+    public int getByte(int pos) {
+        return data[pos] & 0xff;
+    }
+
+    public abstract int getShort(int pos);
+
+    public abstract int getInt(int pos);
+
+    public static final class BigEndian extends Buffer {
+        @Override
+        public int emitShort(int b, int pos) {
+            assert NumUtil.isUShort(b);
+            int newPos = pos + 2;
+            ensureSize(pos + 2);
+            data[pos] = (byte) ((b >> 8) & 0xFF);
+            data[pos + 1] = (byte) (b & 0xFF);
+            return newPos;
+        }
+
+        @Override
+        public int emitInt(int b, int pos) {
+            int newPos = pos + 4;
+            ensureSize(newPos);
+            data[pos] = (byte) ((b >> 24) & 0xFF);
+            data[pos + 1] = (byte) ((b >> 16) & 0xFF);
+            data[pos + 2] = (byte) ((b >> 8) & 0xFF);
+            data[pos + 3] = (byte) (b & 0xFF);
+            return newPos;
+        }
+
+        @Override
+        public int emitLong(long b, int pos) {
+            int newPos = pos + 8;
+            ensureSize(newPos);
+            data[pos] = (byte) ((b >> 56) & 0xFF);
+            data[pos + 1] = (byte) ((b >> 48) & 0xFF);
+            data[pos + 2] = (byte) ((b >> 40) & 0xFF);
+            data[pos + 3] = (byte) ((b >> 32) & 0xFF);
+            data[pos + 4] = (byte) ((b >> 24) & 0xFF);
+            data[pos + 5] = (byte) ((b >> 16) & 0xFF);
+            data[pos + 6] = (byte) ((b >> 8) & 0xFF);
+            data[pos + 7] = (byte) (b & 0xFF);
+            return newPos;
+        }
+
+        @Override
+        public int getShort(int pos) {
+            return
+                (data[pos + 0] & 0xff) << 8 |
+                (data[pos + 1] & 0xff) << 0;
+        }
+
+        @Override
+        public int getInt(int pos) {
+            return
+                (data[pos + 0] & 0xff) << 24 |
+                (data[pos + 1] & 0xff) << 16 |
+                (data[pos + 2] & 0xff) << 8 |
+                (data[pos + 3] & 0xff) << 0;
+        }
+    }
+
+    public static final class LittleEndian extends Buffer {
+        @Override
+        public int emitShort(int b, int pos) {
+            assert NumUtil.isUShort(b);
+            int newPos = pos + 2;
+            ensureSize(newPos);
+            data[pos] = (byte) (b & 0xFF);
+            data[pos + 1] = (byte) ((b >> 8) & 0xFF);
+            return newPos;
+        }
+
+        @Override
+        public int emitInt(int b, int pos) {
+            int newPos = pos + 4;
+            ensureSize(newPos);
+            data[pos] = (byte) (b & 0xFF);
+            data[pos + 1] = (byte) ((b >> 8) & 0xFF);
+            data[pos + 2] = (byte) ((b >> 16) & 0xFF);
+            data[pos + 3] = (byte) ((b >> 24) & 0xFF);
+            return newPos;
+        }
+
+        @Override
+        public int emitLong(long b, int pos) {
+            int newPos = pos + 8;
+            ensureSize(newPos);
+            data[pos] = (byte) (b & 0xFF);
+            data[pos + 1] = (byte) ((b >> 8) & 0xFF);
+            data[pos + 2] = (byte) ((b >> 16) & 0xFF);
+            data[pos + 3] = (byte) ((b >> 24) & 0xFF);
+            data[pos + 4] = (byte) ((b >> 32) & 0xFF);
+            data[pos + 5] = (byte) ((b >> 40) & 0xFF);
+            data[pos + 6] = (byte) ((b >> 48) & 0xFF);
+            data[pos + 7] = (byte) ((b >> 56) & 0xFF);
+            return newPos;
+        }
+
+        @Override
+        public int getShort(int pos) {
+            return
+                (data[pos + 1] & 0xff) << 8 |
+                (data[pos + 0] & 0xff) << 0;
+        }
+
+        @Override
+        public int getInt(int pos) {
+            return
+                (data[pos + 3] & 0xff) << 24 |
+                (data[pos + 2] & 0xff) << 16 |
+                (data[pos + 1] & 0xff) << 8 |
+                (data[pos + 0] & 0xff) << 0;
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm/src/com/oracle/max/asm/Label.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm;
+
+import java.util.ArrayList;
+
+/**
+ * This class represents a label within assembly code.
+ */
+public final class Label {
+    private int position = -1;
+
+    /**
+     * References to instructions that jump to this unresolved label.
+     * These instructions need to be patched when the label is bound
+     * using the {@link #patchInstructions(AbstractAssembler)} method.
+     */
+    private ArrayList<Integer> patchPositions = new ArrayList<>(4);
+
+    /**
+     * Returns the position of this label in the code buffer.
+     * @return the position
+     */
+    public int position() {
+        assert position >= 0 : "Unbound label is being referenced";
+        return position;
+    }
+
+    public Label() {
+    }
+
+    /**
+     * Binds the label to the specified position.
+     * @param pos the position
+     */
+    protected void bind(int pos) {
+        this.position = pos;
+        assert isBound();
+    }
+
+    public boolean isBound() {
+        return position >= 0;
+    }
+
+    public void addPatchAt(int branchLocation) {
+        assert !isBound() : "Label is already bound";
+        patchPositions.add(branchLocation);
+    }
+
+    protected void patchInstructions(AbstractAssembler masm) {
+        assert isBound() : "Label should be bound";
+        int target = position;
+        for (int i = 0; i < patchPositions.size(); ++i) {
+            int pos = patchPositions.get(i);
+            masm.patchJumpTarget(pos, target);
+        }
+    }
+
+    @Override
+    public String toString() {
+        return isBound() ? String.valueOf(position()) : "?";
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graal/com.oracle.graal.asm/src/com/oracle/max/asm/NumUtil.java	Wed Oct 03 17:42:12 2012 +0200
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2011, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package com.oracle.max.asm;
+
+/**
+ * A collection of static utility functions that check ranges of numbers.
+ */
+public class NumUtil {
+    public static boolean isShiftCount(int x) {
+        return 0 <= x && x < 32;
+    }
+
+    /**
+     * Determines if a given {@code int} value is the range of unsigned byte
+     * values.
+     */
+    public static boolean isUByte(int x) {
+        return (x & 0xff) == x;
+    }
+
+    /**
+     * Determines if a given {@code int} value is the range of signed byte
+     * values.
+     */
+    public static boolean isByte(int x) {
+        return (byte) x == x;
+    }
+
+    /**
+     * Determines if a given {@code long} value is the range of unsigned byte
+     * values.
+     */
+    public static boolean isUByte(long x) {
+        return (x & 0xffL) == x;
+    }
+
+    /**
+     * Determines if a given {@code long} value is the range of signed byte
+     * values.
+     */
+    public static boolean isByte(long l) {
+        return (byte) l == l;
+    }
+
+    /**
+     * Determines if a given {@code long} value is the range of unsigned int
+     * values.
+     */
+    public static boolean isUInt(long x) {
+        return (x & 0xffffffffL) == x;
+    }
+
+    /**
+     * Determines if a given {@code long} value is the range of signed int
+     * values.
+     */
+    public static boolean isInt(long l) {
+        return (int) l == l;
+    }
+
+    /**
+     * Determines if a given {@code int} value is the range of signed short
+     * values.
+     */
+    public static boolean isShort(int x) {
+        return (short) x == x;
+    }
+
+    public static boolean isUShort(int s) {
+        return s == (s & 0xFFFF);
+    }
+
+    public static boolean is32bit(long x) {
+        return -0x80000000L <= x && x < 0x80000000L;
+    }
+
+    public static short safeToShort(int v) {
+        assert isShort(v);
+        return (short) v;
+    }
+
+    public static int roundUp(int number, int mod) {
+        return ((number + mod - 1) / mod) * mod;
+    }
+}
--- a/graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm.amd64;
-
-import static com.oracle.graal.api.code.Register.RegisterFlag.*;
-import static com.oracle.graal.api.meta.Kind.*;
-import static com.oracle.max.criutils.MemoryBarriers.*;
-
-import com.oracle.graal.api.code.*;
-import com.oracle.graal.api.code.Register.*;
-
-/**
- * Represents the AMD64 architecture.
- */
-public class AMD64 extends Architecture {
-
-    // General purpose CPU registers
-    public static final Register rax = new Register(0, 0, 8, "rax", CPU, RegisterFlag.Byte);
-    public static final Register rcx = new Register(1, 1, 8, "rcx", CPU, RegisterFlag.Byte);
-    public static final Register rdx = new Register(2, 2, 8, "rdx", CPU, RegisterFlag.Byte);
-    public static final Register rbx = new Register(3, 3, 8, "rbx", CPU, RegisterFlag.Byte);
-    public static final Register rsp = new Register(4, 4, 8, "rsp", CPU, RegisterFlag.Byte);
-    public static final Register rbp = new Register(5, 5, 8, "rbp", CPU, RegisterFlag.Byte);
-    public static final Register rsi = new Register(6, 6, 8, "rsi", CPU, RegisterFlag.Byte);
-    public static final Register rdi = new Register(7, 7, 8, "rdi", CPU, RegisterFlag.Byte);
-
-    public static final Register r8  = new Register(8,  8,  8, "r8", CPU, RegisterFlag.Byte);
-    public static final Register r9  = new Register(9,  9,  8, "r9", CPU, RegisterFlag.Byte);
-    public static final Register r10 = new Register(10, 10, 8, "r10", CPU, RegisterFlag.Byte);
-    public static final Register r11 = new Register(11, 11, 8, "r11", CPU, RegisterFlag.Byte);
-    public static final Register r12 = new Register(12, 12, 8, "r12", CPU, RegisterFlag.Byte);
-    public static final Register r13 = new Register(13, 13, 8, "r13", CPU, RegisterFlag.Byte);
-    public static final Register r14 = new Register(14, 14, 8, "r14", CPU, RegisterFlag.Byte);
-    public static final Register r15 = new Register(15, 15, 8, "r15", CPU, RegisterFlag.Byte);
-
-    public static final Register[] cpuRegisters = {
-        rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi,
-        r8, r9, r10, r11, r12, r13, r14, r15
-    };
-
-    // XMM registers
-    public static final Register xmm0 = new Register(16, 0, 8, "xmm0", FPU);
-    public static final Register xmm1 = new Register(17, 1, 8, "xmm1", FPU);
-    public static final Register xmm2 = new Register(18, 2, 8, "xmm2", FPU);
-    public static final Register xmm3 = new Register(19, 3, 8, "xmm3", FPU);
-    public static final Register xmm4 = new Register(20, 4, 8, "xmm4", FPU);
-    public static final Register xmm5 = new Register(21, 5, 8, "xmm5", FPU);
-    public static final Register xmm6 = new Register(22, 6, 8, "xmm6", FPU);
-    public static final Register xmm7 = new Register(23, 7, 8, "xmm7", FPU);
-
-    public static final Register xmm8 =  new Register(24,  8, 8, "xmm8",  FPU);
-    public static final Register xmm9 =  new Register(25,  9, 8, "xmm9",  FPU);
-    public static final Register xmm10 = new Register(26, 10, 8, "xmm10", FPU);
-    public static final Register xmm11 = new Register(27, 11, 8, "xmm11", FPU);
-    public static final Register xmm12 = new Register(28, 12, 8, "xmm12", FPU);
-    public static final Register xmm13 = new Register(29, 13, 8, "xmm13", FPU);
-    public static final Register xmm14 = new Register(30, 14, 8, "xmm14", FPU);
-    public static final Register xmm15 = new Register(31, 15, 8, "xmm15", FPU);
-
-    public static final Register[] xmmRegisters = {
-        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
-        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15
-    };
-
-    public static final Register[] cpuxmmRegisters = {
-        rax,  rcx,  rdx,   rbx,   rsp,   rbp,   rsi,   rdi,
-        r8,   r9,   r10,   r11,   r12,   r13,   r14,   r15,
-        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
-        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15
-    };
-
-    /**
-     * Register used to construct an instruction-relative address.
-     */
-    public static final Register rip = new Register(32, -1, 0, "rip");
-
-    public static final Register[] allRegisters = {
-        rax,  rcx,  rdx,   rbx,   rsp,   rbp,   rsi,   rdi,
-        r8,   r9,   r10,   r11,   r12,   r13,   r14,   r15,
-        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
-        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
-        rip
-    };
-
-    public static final RegisterValue RSP = rsp.asValue(Long);
-
-    public AMD64() {
-        super("AMD64",
-              8,
-              ByteOrder.LittleEndian,
-              allRegisters,
-              LOAD_STORE | STORE_STORE,
-              1,
-              r15.encoding + 1,
-              8);
-    }
-
-    @Override
-    public boolean isX86() {
-        return true;
-    }
-
-    @Override
-    public boolean twoOperandMode() {
-        return true;
-    }
-
-}
--- a/graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64AsmOptions.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm.amd64;
-
-public class AMD64AsmOptions {
-    public static int     Atomics                       = 0;
-    public static boolean UseNormalNop                  = true;
-    public static boolean UseAddressNop                 = true;
-    public static boolean UseIncDec                     = false;
-    public static boolean UseXmmLoadAndClearUpper       = true;
-    public static boolean UseXmmRegToRegMoveAll         = false;
-}
--- a/graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64Assembler.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3034 +0,0 @@
-/*
- * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm.amd64;
-
-import static com.oracle.graal.api.code.ValueUtil.*;
-import static com.oracle.max.asm.NumUtil.*;
-import static com.oracle.max.asm.amd64.AMD64.*;
-import static com.oracle.max.asm.amd64.AMD64AsmOptions.*;
-import static com.oracle.max.criutils.MemoryBarriers.*;
-
-import com.oracle.graal.api.code.*;
-import com.oracle.graal.api.meta.*;
-import com.oracle.max.asm.*;
-
-/**
- * This class implements an assembler that can encode most X86 instructions.
- */
-public class AMD64Assembler extends AbstractAssembler {
-    /**
-     * The kind for pointers and raw registers.  Since we know we are 64 bit here, we can hardcode it.
-     */
-    private static final Kind Word = Kind.Long;
-
-    private static final int MinEncodingNeedsRex = 8;
-
-    /**
-     * The x86 condition codes used for conditional jumps/moves.
-     */
-    public enum ConditionFlag {
-        zero(0x4, "|zero|"),
-        notZero(0x5, "|nzero|"),
-        equal(0x4, "="),
-        notEqual(0x5, "!="),
-        less(0xc, "<"),
-        lessEqual(0xe, "<="),
-        greater(0xf, ">"),
-        greaterEqual(0xd, ">="),
-        below(0x2, "|<|"),
-        belowEqual(0x6, "|<=|"),
-        above(0x7, "|>|"),
-        aboveEqual(0x3, "|>=|"),
-        overflow(0x0, "|of|"),
-        noOverflow(0x1, "|nof|"),
-        carrySet(0x2, "|carry|"),
-        carryClear(0x3, "|ncarry|"),
-        negative(0x8, "|neg|"),
-        positive(0x9, "|pos|"),
-        parity(0xa, "|par|"),
-        noParity(0xb, "|npar|");
-
-        public final int value;
-        public final String operator;
-
-        private ConditionFlag(int value, String operator) {
-            this.value = value;
-            this.operator = operator;
-        }
-
-        public ConditionFlag negate() {
-            switch(this) {
-                case zero: return notZero;
-                case notZero: return zero;
-                case equal: return notEqual;
-                case notEqual: return equal;
-                case less: return greaterEqual;
-                case lessEqual: return greater;
-                case greater: return lessEqual;
-                case greaterEqual: return less;
-                case below: return aboveEqual;
-                case belowEqual: return above;
-                case above: return belowEqual;
-                case aboveEqual: return below;
-                case overflow: return noOverflow;
-                case noOverflow: return overflow;
-                case carrySet: return carryClear;
-                case carryClear: return carrySet;
-                case negative: return positive;
-                case positive: return negative;
-                case parity: return noParity;
-                case noParity: return parity;
-            }
-            throw new IllegalArgumentException();
-        }
-    }
-
-    /**
-     * Constants for X86 prefix bytes.
-     */
-    private static class Prefix {
-        private static final int REX = 0x40;
-        private static final int REXB = 0x41;
-        private static final int REXX = 0x42;
-        private static final int REXXB = 0x43;
-        private static final int REXR = 0x44;
-        private static final int REXRB = 0x45;
-        private static final int REXRX = 0x46;
-        private static final int REXRXB = 0x47;
-        private static final int REXW = 0x48;
-        private static final int REXWB = 0x49;
-        private static final int REXWX = 0x4A;
-        private static final int REXWXB = 0x4B;
-        private static final int REXWR = 0x4C;
-        private static final int REXWRB = 0x4D;
-        private static final int REXWRX = 0x4E;
-        private static final int REXWRXB = 0x4F;
-    }
-
-    /**
-     * The register to which {@link Register#Frame} and {@link Register#CallerFrame} are bound.
-     */
-    public final Register frameRegister;
-
-    /**
-     * Constructs an assembler for the AMD64 architecture.
-     *
-     * @param registerConfig the register configuration used to bind {@link Register#Frame} and
-     *            {@link Register#CallerFrame} to physical registers. This value can be null if this assembler
-     *            instance will not be used to assemble instructions using these logical registers.
-     */
-    public AMD64Assembler(TargetDescription target, RegisterConfig registerConfig) {
-        super(target);
-        this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister();
-    }
-
-    private static int encode(Register r) {
-        assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
-        return r.encoding & 0x7;
-    }
-
-    private void emitArithB(int op1, int op2, Register dst, int imm8) {
-        assert dst.isByte() : "must have byte register";
-        assert isUByte(op1) && isUByte(op2) : "wrong opcode";
-        assert isUByte(imm8) : "not a byte";
-        assert (op1 & 0x01) == 0 : "should be 8bit operation";
-        emitByte(op1);
-        emitByte(op2 | encode(dst));
-        emitByte(imm8);
-    }
-
-    private void emitArith(int op1, int op2, Register dst, int imm32) {
-        assert isUByte(op1) && isUByte(op2) : "wrong opcode";
-        assert (op1 & 0x01) == 1 : "should be 32bit operation";
-        assert (op1 & 0x02) == 0 : "sign-extension bit should not be set";
-        if (isByte(imm32)) {
-            emitByte(op1 | 0x02); // set sign bit
-            emitByte(op2 | encode(dst));
-            emitByte(imm32 & 0xFF);
-        } else {
-            emitByte(op1);
-            emitByte(op2 | encode(dst));
-            emitInt(imm32);
-        }
-    }
-
-    // immediate-to-memory forms
-    private void emitArithOperand(int op1, Register rm, Address adr, int imm32) {
-        assert (op1 & 0x01) == 1 : "should be 32bit operation";
-        assert (op1 & 0x02) == 0 : "sign-extension bit should not be set";
-        if (isByte(imm32)) {
-            emitByte(op1 | 0x02); // set sign bit
-            emitOperandHelper(rm, adr);
-            emitByte(imm32 & 0xFF);
-        } else {
-            emitByte(op1);
-            emitOperandHelper(rm, adr);
-            emitInt(imm32);
-        }
-    }
-
-    private void emitArith(int op1, int op2, Register dst, Register src) {
-        assert isUByte(op1) && isUByte(op2) : "wrong opcode";
-        emitByte(op1);
-        emitByte(op2 | encode(dst) << 3 | encode(src));
-    }
-
-    private void emitOperandHelper(Register reg, Address addr) {
-        Register base = isLegal(addr.getBase()) ? asRegister(addr.getBase()) : Register.None;
-        Register index = isLegal(addr.getIndex()) ? asRegister(addr.getIndex()) : Register.None;
-
-        Address.Scale scale = addr.getScale();
-        int disp = addr.getDisplacement();
-
-        if (base == Register.Frame) {
-            assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration";
-            base = frameRegister;
-//        } else if (base == Register.CallerFrame) {
-//            assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration";
-//            base = frameRegister;
-//            disp += targetMethod.frameSize() + 8;
-        }
-
-        // Encode the registers as needed in the fields they are used in
-
-        assert reg != Register.None;
-        int regenc = encode(reg) << 3;
-
-        if (base == AMD64.rip) {
-            // [00 000 101] disp32
-            emitByte(0x05 | regenc);
-            emitInt(disp);
-        } else if (addr == Address.Placeholder) {
-            // [00 000 101] disp32
-            emitByte(0x05 | regenc);
-            emitInt(0);
-
-        } else if (base.isValid()) {
-            int baseenc = base.isValid() ? encode(base) : 0;
-            if (index.isValid()) {
-                int indexenc = encode(index) << 3;
-                // [base + indexscale + disp]
-                if (disp == 0 && base != rbp && (base != r13)) {
-                    // [base + indexscale]
-                    // [00 reg 100][ss index base]
-                    assert index != rsp : "illegal addressing mode";
-                    emitByte(0x04 | regenc);
-                    emitByte(scale.log2 << 6 | indexenc | baseenc);
-                } else if (isByte(disp)) {
-                    // [base + indexscale + imm8]
-                    // [01 reg 100][ss index base] imm8
-                    assert index != rsp : "illegal addressing mode";
-                    emitByte(0x44 | regenc);
-                    emitByte(scale.log2 << 6 | indexenc | baseenc);
-                    emitByte(disp & 0xFF);
-                } else {
-                    // [base + indexscale + disp32]
-                    // [10 reg 100][ss index base] disp32
-                    assert index != rsp : "illegal addressing mode";
-                    emitByte(0x84 | regenc);
-                    emitByte(scale.log2 << 6 | indexenc | baseenc);
-                    emitInt(disp);
-                }
-            } else if (base == rsp || (base == r12)) {
-                // [rsp + disp]
-                if (disp == 0) {
-                    // [rsp]
-                    // [00 reg 100][00 100 100]
-                    emitByte(0x04 | regenc);
-                    emitByte(0x24);
-                } else if (isByte(disp)) {
-                    // [rsp + imm8]
-                    // [01 reg 100][00 100 100] disp8
-                    emitByte(0x44 | regenc);
-                    emitByte(0x24);
-                    emitByte(disp & 0xFF);
-                } else {
-                    // [rsp + imm32]
-                    // [10 reg 100][00 100 100] disp32
-                    emitByte(0x84 | regenc);
-                    emitByte(0x24);
-                    emitInt(disp);
-                }
-            } else {
-                // [base + disp]
-                assert base != rsp && (base != r12) : "illegal addressing mode";
-                if (disp == 0 && base != rbp && (base != r13)) {
-                    // [base]
-                    // [00 reg base]
-                    emitByte(0x00 | regenc | baseenc);
-                } else if (isByte(disp)) {
-                    // [base + disp8]
-                    // [01 reg base] disp8
-                    emitByte(0x40 | regenc | baseenc);
-                    emitByte(disp & 0xFF);
-                } else {
-                    // [base + disp32]
-                    // [10 reg base] disp32
-                    emitByte(0x80 | regenc | baseenc);
-                    emitInt(disp);
-                }
-            }
-        } else {
-            if (index.isValid()) {
-                int indexenc = encode(index) << 3;
-                // [indexscale + disp]
-                // [00 reg 100][ss index 101] disp32
-                assert index != rsp : "illegal addressing mode";
-                emitByte(0x04 | regenc);
-                emitByte(scale.log2 << 6 | indexenc | 0x05);
-                emitInt(disp);
-            } else {
-                // [disp] ABSOLUTE
-                // [00 reg 100][00 100 101] disp32
-                emitByte(0x04 | regenc);
-                emitByte(0x25);
-                emitInt(disp);
-            }
-        }
-    }
-
-    public final void addl(Address dst, int imm32) {
-        prefix(dst);
-        emitArithOperand(0x81, rax, dst, imm32);
-    }
-
-    public final void addl(Address dst, Register src) {
-        prefix(dst, src);
-        emitByte(0x01);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void addl(Register dst, int imm32) {
-        prefix(dst);
-        emitArith(0x81, 0xC0, dst, imm32);
-    }
-
-    public final void addl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x03);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void addl(Register dst, Register src) {
-        prefixAndEncode(dst.encoding, src.encoding);
-        emitArith(0x03, 0xC0, dst, src);
-    }
-
-    private void addrNop4() {
-        // 4 bytes: NOP DWORD PTR [EAX+0]
-        emitByte(0x0F);
-        emitByte(0x1F);
-        emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
-        emitByte(0); // 8-bits offset (1 byte)
-    }
-
-    private void addrNop5() {
-        // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
-        emitByte(0x0F);
-        emitByte(0x1F);
-        emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
-        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
-        emitByte(0); // 8-bits offset (1 byte)
-    }
-
-    private void addrNop7() {
-        // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
-        emitByte(0x0F);
-        emitByte(0x1F);
-        emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
-        emitInt(0); // 32-bits offset (4 bytes)
-    }
-
-    private void addrNop8() {
-        // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
-        emitByte(0x0F);
-        emitByte(0x1F);
-        emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
-        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
-        emitInt(0); // 32-bits offset (4 bytes)
-    }
-
-    public final void addsd(Register dst, Register src) {
-        assert dst.isFpu() && src.isFpu();
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x58);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void addsd(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0xF2);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x58);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void addss(Register dst, Register src) {
-        assert dst.isFpu() && src.isFpu();
-        emitByte(0xF3);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x58);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void addss(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0xF3);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x58);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void andl(Register dst, int imm32) {
-        prefix(dst);
-        emitArith(0x81, 0xE0, dst, imm32);
-    }
-
-    public final void andl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x23);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void andl(Register dst, Register src) {
-        prefixAndEncode(dst.encoding, src.encoding);
-        emitArith(0x23, 0xC0, dst, src);
-    }
-
-    public final void bsfq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xBC);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void bsfq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0xBC);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void bsrq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xBD);
-        emitByte(0xC0 | encode);
-    }
-
-
-    public final void bsrq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0xBD);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void bsrl(Register dst, Register src) {
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xBD);
-        emitByte(0xC0 | encode);
-    }
-
-
-    public final void bsrl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0xBD);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void bswapl(Register reg) { // bswap
-        int encode = prefixAndEncode(reg.encoding);
-        emitByte(0x0F);
-        emitByte(0xC8 | encode);
-    }
-
-    public final void btli(Address src, int imm8) {
-        prefixq(src);
-        emitByte(0x0F);
-        emitByte(0xBA);
-        emitOperandHelper(rsp, src);
-        emitByte(imm8);
-    }
-
-    public final void cdql() {
-        emitByte(0x99);
-    }
-
-    public final void cmovl(ConditionFlag cc, Register dst, Register src) {
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x40 | cc.value);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cmovl(ConditionFlag cc, Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x40 | cc.value);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void cmpb(Address dst, int imm8) {
-        prefix(dst);
-        emitByte(0x80);
-        emitOperandHelper(rdi, dst);
-        emitByte(imm8);
-    }
-
-    public final void cmpl(Address dst, int imm32) {
-        prefix(dst);
-        emitByte(0x81);
-        emitOperandHelper(rdi, dst);
-        emitInt(imm32);
-    }
-
-    public final void cmpl(Register dst, int imm32) {
-        prefix(dst);
-        emitArith(0x81, 0xF8, dst, imm32);
-    }
-
-    public final void cmpl(Register dst, Register src) {
-        prefixAndEncode(dst.encoding, src.encoding);
-        emitArith(0x3B, 0xC0, dst, src);
-    }
-
-    public final void cmpl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x3B);
-        emitOperandHelper(dst, src);
-    }
-
-    // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
-    // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
-    // The ZF is set if the compared values were equal, and cleared otherwise.
-    public final void cmpxchgl(Register reg, Address adr) { // cmpxchg
-        if ((Atomics & 2) != 0) {
-            // caveat: no instructionmark, so this isn't relocatable.
-            // Emit a synthetic, non-atomic, CAS equivalent.
-            // Beware. The synthetic form sets all ICCs, not just ZF.
-            // cmpxchg r,[m] is equivalent to X86.rax, = CAS (m, X86.rax, r)
-            cmpl(rax, adr);
-            movl(rax, adr);
-            if (reg != rax) {
-                Label l = new Label();
-                jcc(ConditionFlag.notEqual, l);
-                movl(adr, reg);
-                bind(l);
-            }
-        } else {
-
-            prefix(adr, reg);
-            emitByte(0x0F);
-            emitByte(0xB1);
-            emitOperandHelper(reg, adr);
-        }
-    }
-
-    public final void comisd(Register dst, Address src) {
-        assert dst.isFpu();
-        // NOTE: dbx seems to decode this as comiss even though the
-        // 0x66 is there. Strangly ucomisd comes out correct
-        emitByte(0x66);
-        comiss(dst, src);
-    }
-
-    public final void comiss(Register dst, Address src) {
-        assert dst.isFpu();
-
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x2F);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void cvtdq2pd(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-
-        emitByte(0xF3);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xE6);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvtdq2ps(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x5B);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvtsd2ss(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x5A);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvtsi2sdl(Register dst, Register src) {
-        assert dst.isFpu();
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x2A);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvtsi2ssl(Register dst, Register src) {
-        assert dst.isFpu();
-        emitByte(0xF3);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x2A);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvtss2sd(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0xF3);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x5A);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvttsd2sil(Register dst, Register src) {
-        assert src.isFpu();
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x2C);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvttss2sil(Register dst, Register src) {
-        assert src.isFpu();
-        emitByte(0xF3);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x2C);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void decl(Address dst) {
-        // Don't use it directly. Use Macrodecrement() instead.
-        prefix(dst);
-        emitByte(0xFF);
-        emitOperandHelper(rcx, dst);
-    }
-
-    public final void divsd(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0xF2);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x5E);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void divsd(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x5E);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void divss(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0xF3);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x5E);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void divss(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0xF3);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x5E);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void hlt() {
-        emitByte(0xF4);
-    }
-
-    public final void idivl(Register src) {
-        int encode = prefixAndEncode(src.encoding);
-        emitByte(0xF7);
-        emitByte(0xF8 | encode);
-    }
-
-    public final void divl(Register src) {
-        int encode = prefixAndEncode(src.encoding);
-        emitByte(0xF7);
-        emitByte(0xF0 | encode);
-    }
-
-    public final void imull(Register dst, Register src) {
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xAF);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void imull(Register dst, Register src, int value) {
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        if (isByte(value)) {
-            emitByte(0x6B);
-            emitByte(0xC0 | encode);
-            emitByte(value & 0xFF);
-        } else {
-            emitByte(0x69);
-            emitByte(0xC0 | encode);
-            emitInt(value);
-        }
-    }
-
-    public final void incl(Address dst) {
-        // Don't use it directly. Use Macroincrement() instead.
-        prefix(dst);
-        emitByte(0xFF);
-        emitOperandHelper(rax, dst);
-    }
-
-    public final void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
-        int shortSize = 2;
-        int longSize = 6;
-        long disp = jumpTarget - codeBuffer.position();
-        if (!forceDisp32 && isByte(disp - shortSize)) {
-            // 0111 tttn #8-bit disp
-            emitByte(0x70 | cc.value);
-            emitByte((int) ((disp - shortSize) & 0xFF));
-        } else {
-            // 0000 1111 1000 tttn #32-bit disp
-            assert isInt(disp - longSize) : "must be 32bit offset (call4)";
-            emitByte(0x0F);
-            emitByte(0x80 | cc.value);
-            emitInt((int) (disp - longSize));
-        }
-    }
-
-    public final void jcc(ConditionFlag cc, Label l) {
-        assert (0 <= cc.value) && (cc.value < 16) : "illegal cc";
-        if (l.isBound()) {
-            jcc(cc, l.position(), false);
-        } else {
-            // Note: could eliminate cond. jumps to this jump if condition
-            // is the same however, seems to be rather unlikely case.
-            // Note: use jccb() if label to be bound is very close to get
-            // an 8-bit displacement
-            l.addPatchAt(codeBuffer.position());
-            emitByte(0x0F);
-            emitByte(0x80 | cc.value);
-            emitInt(0);
-        }
-
-    }
-
-    public final void jccb(ConditionFlag cc, Label l) {
-        if (l.isBound()) {
-            int shortSize = 2;
-            int entry = l.position();
-            assert isByte(entry - (codeBuffer.position() + shortSize)) : "Dispacement too large for a short jmp";
-            long disp = entry - codeBuffer.position();
-            // 0111 tttn #8-bit disp
-            emitByte(0x70 | cc.value);
-            emitByte((int) ((disp - shortSize) & 0xFF));
-        } else {
-
-            l.addPatchAt(codeBuffer.position());
-            emitByte(0x70 | cc.value);
-            emitByte(0);
-        }
-    }
-
-    public final void jmp(Address adr) {
-        prefix(adr);
-        emitByte(0xFF);
-        emitOperandHelper(rsp, adr);
-    }
-
-    public final void jmp(int jumpTarget, boolean forceDisp32) {
-        int shortSize = 2;
-        int longSize = 5;
-        long disp = jumpTarget - codeBuffer.position();
-        if (!forceDisp32 && isByte(disp - shortSize)) {
-            emitByte(0xEB);
-            emitByte((int) ((disp - shortSize) & 0xFF));
-        } else {
-            emitByte(0xE9);
-            emitInt((int) (disp - longSize));
-        }
-    }
-
-    @Override
-    public final void jmp(Label l) {
-        if (l.isBound()) {
-            jmp(l.position(), false);
-        } else {
-            // By default, forward jumps are always 32-bit displacements, since
-            // we can't yet know where the label will be bound. If you're sure that
-            // the forward jump will not run beyond 256 bytes, use jmpb to
-            // force an 8-bit displacement.
-
-            l.addPatchAt(codeBuffer.position());
-            emitByte(0xE9);
-            emitInt(0);
-        }
-    }
-
-    public final void jmp(Register entry) {
-        int encode = prefixAndEncode(entry.encoding);
-        emitByte(0xFF);
-        emitByte(0xE0 | encode);
-    }
-
-    public final void jmpb(Label l) {
-        if (l.isBound()) {
-            int shortSize = 2;
-            int entry = l.position();
-            assert isByte((entry - codeBuffer.position()) + shortSize) : "Dispacement too large for a short jmp";
-            long offs = entry - codeBuffer.position();
-            emitByte(0xEB);
-            emitByte((int) ((offs - shortSize) & 0xFF));
-        } else {
-
-            l.addPatchAt(codeBuffer.position());
-            emitByte(0xEB);
-            emitByte(0);
-        }
-    }
-
-    public final void leaq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x8D);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void enter(int imm16, int imm8) {
-        emitByte(0xC8);
-        emitShort(imm16);
-        emitByte(imm8);
-    }
-
-    public final void leave() {
-        emitByte(0xC9);
-    }
-
-    public final void lock() {
-        if ((Atomics & 1) != 0) {
-            // Emit either nothing, a NOP, or a NOP: prefix
-            emitByte(0x90);
-        } else {
-            emitByte(0xF0);
-        }
-    }
-
-    // Emit mfence instruction
-    public final void mfence() {
-        emitByte(0x0F);
-        emitByte(0xAE);
-        emitByte(0xF0);
-    }
-
-    public final void mov(Register dst, Register src) {
-        movq(dst, src);
-    }
-
-    public final void movapd(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        int dstenc = dst.encoding;
-        int srcenc = src.encoding;
-        emitByte(0x66);
-        if (dstenc < 8) {
-            if (srcenc >= 8) {
-                emitByte(Prefix.REXB);
-                srcenc -= 8;
-            }
-        } else {
-            if (srcenc < 8) {
-                emitByte(Prefix.REXR);
-            } else {
-                emitByte(Prefix.REXRB);
-                srcenc -= 8;
-            }
-            dstenc -= 8;
-        }
-        emitByte(0x0F);
-        emitByte(0x28);
-        emitByte(0xC0 | dstenc << 3 | srcenc);
-    }
-
-    public final void movaps(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        int dstenc = dst.encoding;
-        int srcenc = src.encoding;
-        if (dstenc < 8) {
-            if (srcenc >= 8) {
-                emitByte(Prefix.REXB);
-                srcenc -= 8;
-            }
-        } else {
-            if (srcenc < 8) {
-                emitByte(Prefix.REXR);
-            } else {
-                emitByte(Prefix.REXRB);
-                srcenc -= 8;
-            }
-            dstenc -= 8;
-        }
-        emitByte(0x0F);
-        emitByte(0x28);
-        emitByte(0xC0 | dstenc << 3 | srcenc);
-    }
-
-    public final void movb(Register dst, Address src) {
-        prefix(src, dst); // , true)
-        emitByte(0x8A);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movb(Address dst, int imm8) {
-        prefix(dst);
-        emitByte(0xC6);
-        emitOperandHelper(rax, dst);
-        emitByte(imm8);
-    }
-
-    public final void movb(Address dst, Register src) {
-        assert src.isByte() : "must have byte register";
-        prefix(dst, src); // , true)
-        emitByte(0x88);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void movdl(Register dst, Register src) {
-        if (dst.isFpu()) {
-            assert !src.isFpu() : "does this hold?";
-            emitByte(0x66);
-            int encode = prefixAndEncode(dst.encoding, src.encoding);
-            emitByte(0x0F);
-            emitByte(0x6E);
-            emitByte(0xC0 | encode);
-        } else if (src.isFpu()) {
-            assert !dst.isFpu();
-            emitByte(0x66);
-            // swap src/dst to get correct prefix
-            int encode = prefixAndEncode(src.encoding, dst.encoding);
-            emitByte(0x0F);
-            emitByte(0x7E);
-            emitByte(0xC0 | encode);
-        }
-    }
-
-    public final void movdqa(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0x66);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x6F);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movdqa(Register dst, Register src) {
-        assert dst.isFpu();
-        emitByte(0x66);
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x6F);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movdqa(Address dst, Register src) {
-        assert src.isFpu();
-        emitByte(0x66);
-        prefix(dst, src);
-        emitByte(0x0F);
-        emitByte(0x7F);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void movdqu(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0xF3);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x6F);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movdqu(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-
-        emitByte(0xF3);
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x6F);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movdqu(Address dst, Register src) {
-        assert src.isFpu();
-
-        emitByte(0xF3);
-        prefix(dst, src);
-        emitByte(0x0F);
-        emitByte(0x7F);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void movl(Register dst, int imm32) {
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0xB8 | encode);
-        emitInt(imm32);
-    }
-
-    public final void movl(Register dst, Register src) {
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x8B);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x8B);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movl(Address dst, int imm32) {
-        prefix(dst);
-        emitByte(0xC7);
-        emitOperandHelper(rax, dst);
-        emitInt(imm32);
-    }
-
-    public final void movl(Address dst, Register src) {
-        prefix(dst, src);
-        emitByte(0x89);
-        emitOperandHelper(src, dst);
-    }
-
-    /**
-     * New CPUs require use of movsd and movss to avoid partial register stall
-     * when loading from memory. But for old Opteron use movlpd instead of movsd.
-     * The selection is done in {@link AMD64MacroAssembler#movdbl(Register, Address)}
-     * and {@link AMD64MacroAssembler#movflt(Register, Register)}.
-     */
-    public final void movlpd(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0x66);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x12);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movlpd(Address dst, Register src) {
-        assert src.isFpu();
-        emitByte(0x66);
-        prefix(dst, src);
-        emitByte(0x0F);
-        emitByte(0x13);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void movq(Register dst, Address src) {
-        if (dst.isFpu()) {
-            emitByte(0xF3);
-            prefixq(src, dst);
-            emitByte(0x0F);
-            emitByte(0x7E);
-            emitOperandHelper(dst, src);
-        } else {
-            prefixq(src, dst);
-            emitByte(0x8B);
-            emitOperandHelper(dst, src);
-        }
-    }
-
-    public final void movq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x8B);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movq(Address dst, Register src) {
-        if (src.isFpu()) {
-            emitByte(0x66);
-            prefixq(dst, src);
-            emitByte(0x0F);
-            emitByte(0xD6);
-            emitOperandHelper(src, dst);
-        } else {
-            prefixq(dst, src);
-            emitByte(0x89);
-            emitOperandHelper(src, dst);
-        }
-    }
-
-    public final void movsxb(Register dst, Address src) { // movsxb
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0xBE);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movsxb(Register dst, Register src) { // movsxb
-        int encode = prefixAndEncode(dst.encoding, src.encoding, true);
-        emitByte(0x0F);
-        emitByte(0xBE);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movsd(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x10);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movsd(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0xF2);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x10);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movsd(Address dst, Register src) {
-        assert src.isFpu();
-        emitByte(0xF2);
-        prefix(dst, src);
-        emitByte(0x0F);
-        emitByte(0x11);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void movss(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0xF3);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x10);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movss(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0xF3);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x10);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movss(Address dst, Register src) {
-        assert src.isFpu();
-        emitByte(0xF3);
-        prefix(dst, src);
-        emitByte(0x0F);
-        emitByte(0x11);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void movswl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0xBF);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movsxw(Register dst, Register src) { // movsxw
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xBF);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movsxw(Register dst, Address src) { // movsxw
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0xBF);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movzxd(Register dst, Register src) { // movzxd
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x63);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movzxd(Register dst, Address src) { // movzxd
-        prefix(src, dst);
-        emitByte(0x63);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movw(Address dst, int imm16) {
-        emitByte(0x66); // switch to 16-bit mode
-        prefix(dst);
-        emitByte(0xC7);
-        emitOperandHelper(rax, dst);
-        emitShort(imm16);
-    }
-
-    public final void movw(Register dst, Address src) {
-        emitByte(0x66);
-        prefix(src, dst);
-        emitByte(0x8B);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movw(Address dst, Register src) {
-        emitByte(0x66);
-        prefix(dst, src);
-        emitByte(0x89);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void movzxb(Register dst, Address src) { // movzxb
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0xB6);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movzxb(Register dst, Register src) { // movzxb
-        int encode = prefixAndEncode(dst.encoding, src.encoding, true);
-        emitByte(0x0F);
-        emitByte(0xB6);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movzxl(Register dst, Address src) { // movzxw
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0xB7);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movzxl(Register dst, Register src) { // movzxw
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xB7);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void mull(Address src) {
-        prefix(src);
-        emitByte(0xF7);
-        emitOperandHelper(rsp, src);
-    }
-
-    public final void mulsd(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0xF2);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x59);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void mulsd(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x59);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void mulss(Register dst, Address src) {
-        assert dst.isFpu();
-
-        emitByte(0xF3);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x59);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void mulss(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0xF3);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x59);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void negl(Register dst) {
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0xF7);
-        emitByte(0xD8 | encode);
-    }
-
-    public final void ensureUniquePC() {
-        nop();
-    }
-
-    public final void nop() {
-        nop(1);
-    }
-
-    public void nop(int count) {
-        int i = count;
-        if (UseNormalNop) {
-            assert i > 0 : " ";
-            // The fancy nops aren't currently recognized by debuggers making it a
-            // pain to disassemble code while debugging. If assert are on clearly
-            // speed is not an issue so simply use the single byte traditional nop
-            // to do alignment.
-
-            for (; i > 0; i--) {
-                emitByte(0x90);
-            }
-            return;
-        }
-
-        if (UseAddressNop) {
-            //
-            // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
-            // 1: 0x90
-            // 2: 0x66 0x90
-            // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
-            // 4: 0x0F 0x1F 0x40 0x00
-            // 5: 0x0F 0x1F 0x44 0x00 0x00
-            // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
-            // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
-            // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
-            // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
-            // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
-            // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
-
-            // The rest coding is AMD specific - use consecutive Address nops
-
-            // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
-            // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
-            // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
-            // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
-            // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
-            // Size prefixes (0x66) are added for larger sizes
-
-            while (i >= 22) {
-                i -= 11;
-                emitByte(0x66); // size prefix
-                emitByte(0x66); // size prefix
-                emitByte(0x66); // size prefix
-                addrNop8();
-            }
-            // Generate first nop for size between 21-12
-            switch (i) {
-                case 21:
-                    i -= 1;
-                    emitByte(0x66); // size prefix
-                    // fall through
-                case 20:
-                    // fall through
-                case 19:
-                    i -= 1;
-                    emitByte(0x66); // size prefix
-                    // fall through
-                case 18:
-                    // fall through
-                case 17:
-                    i -= 1;
-                    emitByte(0x66); // size prefix
-                    // fall through
-                case 16:
-                    // fall through
-                case 15:
-                    i -= 8;
-                    addrNop8();
-                    break;
-                case 14:
-                case 13:
-                    i -= 7;
-                    addrNop7();
-                    break;
-                case 12:
-                    i -= 6;
-                    emitByte(0x66); // size prefix
-                    addrNop5();
-                    break;
-                default:
-                    assert i < 12;
-            }
-
-            // Generate second nop for size between 11-1
-            switch (i) {
-                case 11:
-                    emitByte(0x66); // size prefix
-                    emitByte(0x66); // size prefix
-                    emitByte(0x66); // size prefix
-                    addrNop8();
-                    break;
-                case 10:
-                    emitByte(0x66); // size prefix
-                    emitByte(0x66); // size prefix
-                    addrNop8();
-                    break;
-                case 9:
-                    emitByte(0x66); // size prefix
-                    addrNop8();
-                    break;
-                case 8:
-                    addrNop8();
-                    break;
-                case 7:
-                    addrNop7();
-                    break;
-                case 6:
-                    emitByte(0x66); // size prefix
-                    addrNop5();
-                    break;
-                case 5:
-                    addrNop5();
-                    break;
-                case 4:
-                    addrNop4();
-                    break;
-                case 3:
-                    // Don't use "0x0F 0x1F 0x00" - need patching safe padding
-                    emitByte(0x66); // size prefix
-                    emitByte(0x66); // size prefix
-                    emitByte(0x90); // nop
-                    break;
-                case 2:
-                    emitByte(0x66); // size prefix
-                    emitByte(0x90); // nop
-                    break;
-                case 1:
-                    emitByte(0x90); // nop
-                    break;
-                default:
-                    assert i == 0;
-            }
-            return;
-        }
-
-        // Using nops with size prefixes "0x66 0x90".
-        // From AMD Optimization Guide:
-        // 1: 0x90
-        // 2: 0x66 0x90
-        // 3: 0x66 0x66 0x90
-        // 4: 0x66 0x66 0x66 0x90
-        // 5: 0x66 0x66 0x90 0x66 0x90
-        // 6: 0x66 0x66 0x90 0x66 0x66 0x90
-        // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
-        // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
-        // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
-        // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
-        //
-        while (i > 12) {
-            i -= 4;
-            emitByte(0x66); // size prefix
-            emitByte(0x66);
-            emitByte(0x66);
-            emitByte(0x90); // nop
-        }
-        // 1 - 12 nops
-        if (i > 8) {
-            if (i > 9) {
-                i -= 1;
-                emitByte(0x66);
-            }
-            i -= 3;
-            emitByte(0x66);
-            emitByte(0x66);
-            emitByte(0x90);
-        }
-        // 1 - 8 nops
-        if (i > 4) {
-            if (i > 6) {
-                i -= 1;
-                emitByte(0x66);
-            }
-            i -= 3;
-            emitByte(0x66);
-            emitByte(0x66);
-            emitByte(0x90);
-        }
-        switch (i) {
-            case 4:
-                emitByte(0x66);
-                emitByte(0x66);
-                emitByte(0x66);
-                emitByte(0x90);
-                break;
-            case 3:
-                emitByte(0x66);
-                emitByte(0x66);
-                emitByte(0x90);
-                break;
-            case 2:
-                emitByte(0x66);
-                emitByte(0x90);
-                break;
-            case 1:
-                emitByte(0x90);
-                break;
-            default:
-                assert i == 0;
-        }
-    }
-
-    public final void notl(Register dst) {
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0xF7);
-        emitByte(0xD0 | encode);
-    }
-
-    public final void orl(Address dst, int imm32) {
-        prefix(dst);
-        emitByte(0x81);
-        emitOperandHelper(rcx, dst);
-        emitInt(imm32);
-    }
-
-    public final void orl(Register dst, int imm32) {
-        prefix(dst);
-        emitArith(0x81, 0xC8, dst, imm32);
-    }
-
-    public final void orl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x0B);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void orl(Register dst, Register src) {
-        prefixAndEncode(dst.encoding, src.encoding);
-        emitArith(0x0B, 0xC0, dst, src);
-    }
-
-    // generic
-    public final void pop(Register dst) {
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0x58 | encode);
-    }
-
-    public final void prefetchPrefix(Address src) {
-        prefix(src);
-        emitByte(0x0F);
-    }
-
-    public final void prefetchnta(Address src) {
-        prefetchPrefix(src);
-        emitByte(0x18);
-        emitOperandHelper(rax, src); // 0, src
-    }
-
-    public final void prefetchr(Address src) {
-        prefetchPrefix(src);
-        emitByte(0x0D);
-        emitOperandHelper(rax, src); // 0, src
-    }
-
-    public final void prefetcht0(Address src) {
-        prefetchPrefix(src);
-        emitByte(0x18);
-        emitOperandHelper(rcx, src); // 1, src
-
-    }
-
-    public final void prefetcht1(Address src) {
-        prefetchPrefix(src);
-        emitByte(0x18);
-        emitOperandHelper(rdx, src); // 2, src
-    }
-
-    public final void prefetcht2(Address src) {
-        prefetchPrefix(src);
-        emitByte(0x18);
-        emitOperandHelper(rbx, src); // 3, src
-    }
-
-    public final void prefetchw(Address src) {
-        prefetchPrefix(src);
-        emitByte(0x0D);
-        emitOperandHelper(rcx, src); // 1, src
-    }
-
-    public final void pshufd(Register dst, Register src, int mode) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        assert isUByte(mode) : "invalid value";
-
-        emitByte(0x66);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x70);
-        emitByte(0xC0 | encode);
-        emitByte(mode & 0xFF);
-    }
-
-    public final void pshufd(Register dst, Address src, int mode) {
-        assert dst.isFpu();
-        assert isUByte(mode) : "invalid value";
-
-        emitByte(0x66);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x70);
-        emitOperandHelper(dst, src);
-        emitByte(mode & 0xFF);
-
-    }
-
-    public final void pshuflw(Register dst, Register src, int mode) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        assert isUByte(mode) : "invalid value";
-
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x70);
-        emitByte(0xC0 | encode);
-        emitByte(mode & 0xFF);
-    }
-
-    public final void pshuflw(Register dst, Address src, int mode) {
-        assert dst.isFpu();
-        assert isUByte(mode) : "invalid value";
-
-        emitByte(0xF2);
-        prefix(src, dst); // QQ new
-        emitByte(0x0F);
-        emitByte(0x70);
-        emitOperandHelper(dst, src);
-        emitByte(mode & 0xFF);
-    }
-
-    public final void psrlq(Register dst, int shift) {
-        assert dst.isFpu();
-        // HMM Table D-1 says sse2 or mmx
-
-        int encode = prefixqAndEncode(xmm2.encoding, dst.encoding);
-        emitByte(0x66);
-        emitByte(0x0F);
-        emitByte(0x73);
-        emitByte(0xC0 | encode);
-        emitByte(shift);
-    }
-
-    public final void punpcklbw(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0x66);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x60);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void push(int imm32) {
-        // in 64bits we push 64bits onto the stack but only
-        // take a 32bit immediate
-        emitByte(0x68);
-        emitInt(imm32);
-    }
-
-    public final void push(Register src) {
-        int encode = prefixAndEncode(src.encoding);
-        emitByte(0x50 | encode);
-    }
-
-    public final void pushf() {
-        emitByte(0x9C);
-    }
-
-    public final void pxor(Register dst, Address src) {
-        assert dst.isFpu();
-
-        emitByte(0x66);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0xEF);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void pxor(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-
-        emitByte(0x66);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xEF);
-        emitByte(0xC0 | encode);
-
-    }
-
-    public final void rcll(Register dst, int imm8) {
-        assert isShiftCount(imm8) : "illegal shift count";
-        int encode = prefixAndEncode(dst.encoding);
-        if (imm8 == 1) {
-            emitByte(0xD1);
-            emitByte(0xD0 | encode);
-        } else {
-            emitByte(0xC1);
-            emitByte(0xD0 | encode);
-            emitByte(imm8);
-        }
-    }
-
-    public final void pause() {
-        emitByte(0xF3);
-        emitByte(0x90);
-    }
-
-    // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx heap words.
-    public final void repeatMoveWords() {
-        emitByte(0xF3);
-        emitByte(Prefix.REXW);
-        emitByte(0xA5);
-    }
-
-    // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx bytes.
-    public final void repeatMoveBytes() {
-        emitByte(0xF3);
-        emitByte(Prefix.REXW);
-        emitByte(0xA4);
-    }
-
-    // sets X86.rcx pointer sized words with X86.rax, value at [edi]
-    // generic
-    public final void repSet() { // repSet
-        emitByte(0xF3);
-        // STOSQ
-        emitByte(Prefix.REXW);
-        emitByte(0xAB);
-    }
-
-    // scans X86.rcx pointer sized words at [edi] for occurance of X86.rax,
-    // generic
-    public final void repneScan() { // repneScan
-        emitByte(0xF2);
-        // SCASQ
-        emitByte(Prefix.REXW);
-        emitByte(0xAF);
-    }
-
-    // scans X86.rcx 4 byte words at [edi] for occurance of X86.rax,
-    // generic
-    public final void repneScanl() { // repneScan
-        emitByte(0xF2);
-        // SCASL
-        emitByte(0xAF);
-    }
-
-    public final void ret(int imm16) {
-        if (imm16 == 0) {
-            emitByte(0xC3);
-        } else {
-            emitByte(0xC2);
-            emitShort(imm16);
-        }
-    }
-
-    public final void sarl(Register dst, int imm8) {
-        int encode = prefixAndEncode(dst.encoding);
-        assert isShiftCount(imm8) : "illegal shift count";
-        if (imm8 == 1) {
-            emitByte(0xD1);
-            emitByte(0xF8 | encode);
-        } else {
-            emitByte(0xC1);
-            emitByte(0xF8 | encode);
-            emitByte(imm8);
-        }
-    }
-
-    public final void sarl(Register dst) {
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0xD3);
-        emitByte(0xF8 | encode);
-    }
-
-    public final void sbbl(Address dst, int imm32) {
-        prefix(dst);
-        emitArithOperand(0x81, rbx, dst, imm32);
-    }
-
-    public final void sbbl(Register dst, int imm32) {
-        prefix(dst);
-        emitArith(0x81, 0xD8, dst, imm32);
-    }
-
-    public final void sbbl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x1B);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void sbbl(Register dst, Register src) {
-        prefixAndEncode(dst.encoding, src.encoding);
-        emitArith(0x1B, 0xC0, dst, src);
-    }
-
-    public final void setb(ConditionFlag cc, Register dst) {
-        assert 0 <= cc.value && cc.value < 16 : "illegal cc";
-        int encode = prefixAndEncode(dst.encoding, true);
-        emitByte(0x0F);
-        emitByte(0x90 | cc.value);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void shll(Register dst, int imm8) {
-        assert isShiftCount(imm8) : "illegal shift count";
-        int encode = prefixAndEncode(dst.encoding);
-        if (imm8 == 1) {
-            emitByte(0xD1);
-            emitByte(0xE0 | encode);
-        } else {
-            emitByte(0xC1);
-            emitByte(0xE0 | encode);
-            emitByte(imm8);
-        }
-    }
-
-    public final void shll(Register dst) {
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0xD3);
-        emitByte(0xE0 | encode);
-    }
-
-    public final void shrl(Register dst, int imm8) {
-        assert isShiftCount(imm8) : "illegal shift count";
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0xC1);
-        emitByte(0xE8 | encode);
-        emitByte(imm8);
-    }
-
-    public final void shrl(Register dst) {
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0xD3);
-        emitByte(0xE8 | encode);
-    }
-
-    // copies a single word from [esi] to [edi]
-    public final void smovl() {
-        emitByte(0xA5);
-    }
-
-    public final void sqrtsd(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        // HMM Table D-1 says sse2
-        // assert is64 || target.supportsSSE();
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x51);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void subl(Address dst, int imm32) {
-        prefix(dst);
-        if (isByte(imm32)) {
-            emitByte(0x83);
-            emitOperandHelper(rbp, dst);
-            emitByte(imm32 & 0xFF);
-        } else {
-            emitByte(0x81);
-            emitOperandHelper(rbp, dst);
-            emitInt(imm32);
-        }
-    }
-
-    public final void subl(Register dst, int imm32) {
-        prefix(dst);
-        emitArith(0x81, 0xE8, dst, imm32);
-    }
-
-    public final void subl(Address dst, Register src) {
-        prefix(dst, src);
-        emitByte(0x29);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void subl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x2B);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void subl(Register dst, Register src) {
-        prefixAndEncode(dst.encoding, src.encoding);
-        emitArith(0x2B, 0xC0, dst, src);
-    }
-
-    public final void subsd(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0xF2);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x5C);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void subsd(Register dst, Address src) {
-        assert dst.isFpu();
-
-        emitByte(0xF2);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x5C);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void subss(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0xF3);
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x5C);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void subss(Register dst, Address src) {
-        assert dst.isFpu();
-
-        emitByte(0xF3);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x5C);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void testb(Register dst, int imm8) {
-        prefixAndEncode(dst.encoding, true);
-        emitArithB(0xF6, 0xC0, dst, imm8);
-    }
-
-    public final void testl(Register dst, int imm32) {
-        // not using emitArith because test
-        // doesn't support sign-extension of
-        // 8bit operands
-        int encode = dst.encoding;
-        if (encode == 0) {
-            emitByte(0xA9);
-        } else {
-            encode = prefixAndEncode(encode);
-            emitByte(0xF7);
-            emitByte(0xC0 | encode);
-        }
-        emitInt(imm32);
-    }
-
-    public final void testl(Register dst, Register src) {
-        prefixAndEncode(dst.encoding, src.encoding);
-        emitArith(0x85, 0xC0, dst, src);
-    }
-
-    public final void testl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x85);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void ucomisd(Register dst, Address src) {
-        assert dst.isFpu();
-        emitByte(0x66);
-        ucomiss(dst, src);
-    }
-
-    public final void ucomisd(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        emitByte(0x66);
-        ucomiss(dst, src);
-    }
-
-    public final void ucomiss(Register dst, Address src) {
-        assert dst.isFpu();
-
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x2E);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void ucomiss(Register dst, Register src) {
-        assert dst.isFpu();
-        assert src.isFpu();
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x2E);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void xaddl(Address dst, Register src) {
-        assert src.isFpu();
-
-        prefix(dst, src);
-        emitByte(0x0F);
-        emitByte(0xC1);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void xchgl(Register dst, Address src) { // xchg
-        prefix(src, dst);
-        emitByte(0x87);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void xchgl(Register dst, Register src) {
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x87);
-        emitByte(0xc0 | encode);
-    }
-
-    public final void xorl(Register dst, int imm32) {
-        prefix(dst);
-        emitArith(0x81, 0xF0, dst, imm32);
-    }
-
-    public final void xorl(Register dst, Address src) {
-        prefix(src, dst);
-        emitByte(0x33);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void xorl(Register dst, Register src) {
-        prefixAndEncode(dst.encoding, src.encoding);
-        emitArith(0x33, 0xC0, dst, src);
-    }
-
-    public final void andpd(Register dst, Register src) {
-        emitByte(0x66);
-        andps(dst, src);
-    }
-
-    public final void andpd(Register dst, Address src) {
-        emitByte(0x66);
-        andps(dst, src);
-    }
-
-    public final void andps(Register dst, Register src) {
-        assert dst.isFpu() && src.isFpu();
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x54);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void andps(Register dst, Address src) {
-        assert dst.isFpu();
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x54);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void orpd(Register dst, Register src) {
-        emitByte(0x66);
-        orps(dst, src);
-    }
-
-    public final void orpd(Register dst, Address src) {
-        emitByte(0x66);
-        orps(dst, src);
-    }
-
-    public final void orps(Register dst, Register src) {
-        assert dst.isFpu() && src.isFpu();
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x56);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void orps(Register dst, Address src) {
-        assert dst.isFpu();
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x56);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void xorpd(Register dst, Register src) {
-        emitByte(0x66);
-        xorps(dst, src);
-    }
-
-    public final void xorpd(Register dst, Address src) {
-        emitByte(0x66);
-        xorps(dst, src);
-    }
-
-    public final void xorps(Register dst, Register src) {
-        assert dst.isFpu() && src.isFpu();
-        int encode = prefixAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x57);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void xorps(Register dst, Address src) {
-        assert dst.isFpu();
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x57);
-        emitOperandHelper(dst, src);
-    }
-
-    // 32bit only pieces of the assembler
-
-    public final void decl(Register dst) {
-        // Don't use it directly. Use Macrodecrementl() instead.
-        // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0xFF);
-        emitByte(0xC8 | encode);
-    }
-
-    public final void incl(Register dst) {
-        // Don't use it directly. Use Macroincrementl() instead.
-        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
-        int encode = prefixAndEncode(dst.encoding);
-        emitByte(0xFF);
-        emitByte(0xC0 | encode);
-    }
-
-    int prefixAndEncode(int regEnc) {
-        return prefixAndEncode(regEnc, false);
-    }
-
-    int prefixAndEncode(int regEnc, boolean byteinst) {
-        if (regEnc >= 8) {
-            emitByte(Prefix.REXB);
-            return regEnc - 8;
-        } else if (byteinst && regEnc >= 4) {
-            emitByte(Prefix.REX);
-        }
-        return regEnc;
-    }
-
-    int prefixqAndEncode(int regEnc) {
-        if (regEnc < 8) {
-            emitByte(Prefix.REXW);
-            return regEnc;
-        } else {
-            emitByte(Prefix.REXWB);
-            return regEnc - 8;
-        }
-    }
-
-    int prefixAndEncode(int dstEnc, int srcEnc) {
-        return prefixAndEncode(dstEnc, srcEnc, false);
-    }
-
-    int prefixAndEncode(int dstEncoding, int srcEncoding, boolean byteinst) {
-        int srcEnc = srcEncoding;
-        int dstEnc = dstEncoding;
-        if (dstEnc < 8) {
-            if (srcEnc >= 8) {
-                emitByte(Prefix.REXB);
-                srcEnc -= 8;
-            } else if (byteinst && srcEnc >= 4) {
-                emitByte(Prefix.REX);
-            }
-        } else {
-            if (srcEnc < 8) {
-                emitByte(Prefix.REXR);
-            } else {
-                emitByte(Prefix.REXRB);
-                srcEnc -= 8;
-            }
-            dstEnc -= 8;
-        }
-        return dstEnc << 3 | srcEnc;
-    }
-
-    /**
-     * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand prefix. If the given
-     * operands exceed 3 bits, the 4th bit is encoded in the prefix.
-     *
-     * @param regEncoding the encoding of the register part of the ModRM-Byte
-     * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
-     * @return the lower 6 bits of the ModRM-Byte that should be emitted
-     */
-    private int prefixqAndEncode(int regEncoding, int rmEncoding) {
-        int rmEnc = rmEncoding;
-        int regEnc = regEncoding;
-        if (regEnc < 8) {
-            if (rmEnc < 8) {
-                emitByte(Prefix.REXW);
-            } else {
-                emitByte(Prefix.REXWB);
-                rmEnc -= 8;
-            }
-        } else {
-            if (rmEnc < 8) {
-                emitByte(Prefix.REXWR);
-            } else {
-                emitByte(Prefix.REXWRB);
-                rmEnc -= 8;
-            }
-            regEnc -= 8;
-        }
-        return regEnc << 3 | rmEnc;
-    }
-
-    private void prefix(Register reg) {
-        if (reg.encoding >= 8) {
-            emitByte(Prefix.REXB);
-        }
-    }
-
-    private static boolean needsRex(Value value) {
-        return isRegister(value) && asRegister(value).encoding >= MinEncodingNeedsRex;
-    }
-
-
-    private void prefix(Address adr) {
-        if (needsRex(adr.getBase())) {
-            if (needsRex(adr.getIndex())) {
-                emitByte(Prefix.REXXB);
-            } else {
-                emitByte(Prefix.REXB);
-            }
-        } else {
-            if (needsRex(adr.getIndex())) {
-                emitByte(Prefix.REXX);
-            }
-        }
-    }
-
-    private void prefixq(Address adr) {
-        if (needsRex(adr.getBase())) {
-            if (needsRex(adr.getIndex())) {
-                emitByte(Prefix.REXWXB);
-            } else {
-                emitByte(Prefix.REXWB);
-            }
-        } else {
-            if (needsRex(adr.getIndex())) {
-                emitByte(Prefix.REXWX);
-            } else {
-                emitByte(Prefix.REXW);
-            }
-        }
-    }
-
-    private void prefix(Address adr, Register reg) {
-        if (reg.encoding < 8) {
-            if (needsRex(adr.getBase())) {
-                if (needsRex(adr.getIndex())) {
-                    emitByte(Prefix.REXXB);
-                } else {
-                    emitByte(Prefix.REXB);
-                }
-            } else {
-                if (needsRex(adr.getIndex())) {
-                    emitByte(Prefix.REXX);
-                } else if (reg.encoding >= 4) {
-                    emitByte(Prefix.REX);
-                }
-            }
-        } else {
-            if (needsRex(adr.getBase())) {
-                if (needsRex(adr.getIndex())) {
-                    emitByte(Prefix.REXRXB);
-                } else {
-                    emitByte(Prefix.REXRB);
-                }
-            } else {
-                if (needsRex(adr.getIndex())) {
-                    emitByte(Prefix.REXRX);
-                } else {
-                    emitByte(Prefix.REXR);
-                }
-            }
-        }
-    }
-
-    private void prefixq(Address adr, Register src) {
-        if (src.encoding < 8) {
-            if (needsRex(adr.getBase())) {
-                if (needsRex(adr.getIndex())) {
-                    emitByte(Prefix.REXWXB);
-                } else {
-                    emitByte(Prefix.REXWB);
-                }
-            } else {
-                if (needsRex(adr.getIndex())) {
-                    emitByte(Prefix.REXWX);
-                } else {
-                    emitByte(Prefix.REXW);
-                }
-            }
-        } else {
-            if (needsRex(adr.getBase())) {
-                if (needsRex(adr.getIndex())) {
-                    emitByte(Prefix.REXWRXB);
-                } else {
-                    emitByte(Prefix.REXWRB);
-                }
-            } else {
-                if (needsRex(adr.getIndex())) {
-                    emitByte(Prefix.REXWRX);
-                } else {
-                    emitByte(Prefix.REXWR);
-                }
-            }
-        }
-    }
-
-    public final void addq(Address dst, int imm32) {
-        prefixq(dst);
-        emitArithOperand(0x81, rax, dst, imm32);
-    }
-
-    public final void addq(Address dst, Register src) {
-        prefixq(dst, src);
-        emitByte(0x01);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void addq(Register dst, int imm32) {
-        prefixqAndEncode(dst.encoding);
-        emitArith(0x81, 0xC0, dst, imm32);
-    }
-
-    public final void addq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x03);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void addq(Register dst, Register src) {
-        prefixqAndEncode(dst.encoding, src.encoding);
-        emitArith(0x03, 0xC0, dst, src);
-    }
-
-    public final void andq(Register dst, int imm32) {
-        prefixqAndEncode(dst.encoding);
-        emitArith(0x81, 0xE0, dst, imm32);
-    }
-
-    public final void andq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x23);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void andq(Register dst, Register src) {
-        prefixqAndEncode(dst.encoding, src.encoding);
-        emitArith(0x23, 0xC0, dst, src);
-    }
-
-    public final void bswapq(Register reg) {
-        int encode = prefixqAndEncode(reg.encoding);
-        emitByte(0x0F);
-        emitByte(0xC8 | encode);
-    }
-
-    public final void cdqq() {
-        emitByte(Prefix.REXW);
-        emitByte(0x99);
-    }
-
-    public final void cmovq(ConditionFlag cc, Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x40 | cc.value);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cmovq(ConditionFlag cc, Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x0F);
-        emitByte(0x40 | cc.value);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void cmpq(Address dst, int imm32) {
-        prefixq(dst);
-        emitByte(0x81);
-        emitOperandHelper(rdi, dst);
-        emitInt(imm32);
-    }
-
-    public final void cmpq(Register dst, int imm32) {
-        prefixqAndEncode(dst.encoding);
-        emitArith(0x81, 0xF8, dst, imm32);
-    }
-
-    public final void cmpq(Address dst, Register src) {
-        prefixq(dst, src);
-        emitByte(0x3B);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void cmpq(Register dst, Register src) {
-        prefixqAndEncode(dst.encoding, src.encoding);
-        emitArith(0x3B, 0xC0, dst, src);
-    }
-
-    public final void cmpq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x3B);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void cmpxchgq(Register reg, Address adr) {
-        prefixq(adr, reg);
-        emitByte(0x0F);
-        emitByte(0xB1);
-        emitOperandHelper(reg, adr);
-    }
-
-    public final void cvtsi2sdq(Register dst, Register src) {
-        assert dst.isFpu();
-        emitByte(0xF2);
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x2A);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvtsi2ssq(Register dst, Register src) {
-        assert dst.isFpu();
-        emitByte(0xF3);
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x2A);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvttsd2siq(Register dst, Register src) {
-        assert src.isFpu();
-        emitByte(0xF2);
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x2C);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void cvttss2siq(Register dst, Register src) {
-        assert src.isFpu();
-        emitByte(0xF3);
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0x2C);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void decq(Register dst) {
-        // Don't use it directly. Use Macrodecrementq() instead.
-        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xFF);
-        emitByte(0xC8 | encode);
-    }
-
-    public final void decq(Address dst) {
-        // Don't use it directly. Use Macrodecrementq() instead.
-        prefixq(dst);
-        emitByte(0xFF);
-        emitOperandHelper(rcx, dst);
-    }
-
-    public final void divq(Register src) {
-        int encode = prefixqAndEncode(src.encoding);
-        emitByte(0xF7);
-        emitByte(0xF0 | encode);
-    }
-
-    public final void idivq(Register src) {
-        int encode = prefixqAndEncode(src.encoding);
-        emitByte(0xF7);
-        emitByte(0xF8 | encode);
-    }
-
-    public final void imulq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xAF);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void imulq(Register dst, Register src, int value) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        if (isByte(value)) {
-            emitByte(0x6B);
-            emitByte(0xC0 | encode);
-            emitByte(value);
-        } else {
-            emitByte(0x69);
-            emitByte(0xC0 | encode);
-            emitInt(value);
-        }
-    }
-
-    public final void incq(Register dst) {
-        // Don't use it directly. Use Macroincrementq() instead.
-        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xFF);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void incq(Address dst) {
-        // Don't use it directly. Use Macroincrementq() instead.
-        prefixq(dst);
-        emitByte(0xFF);
-        emitOperandHelper(rax, dst);
-    }
-
-    public final void movq(Register dst, long imm64) {
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xB8 | encode);
-        emitLong(imm64);
-    }
-
-    public final void movdq(Register dst, Register src) {
-
-        // table D-1 says MMX/SSE2
-        emitByte(0x66);
-
-        if (dst.isFpu()) {
-            assert dst.isFpu();
-            int encode = prefixqAndEncode(dst.encoding, src.encoding);
-            emitByte(0x0F);
-            emitByte(0x6E);
-            emitByte(0xC0 | encode);
-        } else if (src.isFpu()) {
-
-            // swap src/dst to get correct prefix
-            int encode = prefixqAndEncode(src.encoding, dst.encoding);
-            emitByte(0x0F);
-            emitByte(0x7E);
-            emitByte(0xC0 | encode);
-        } else {
-            throw new InternalError("should not reach here");
-        }
-    }
-
-    public final void movsbq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x0F);
-        emitByte(0xBE);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movsbq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xBE);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movslq(Register dst, int imm32) {
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xC7 | encode);
-        emitInt(imm32);
-        // dbx shows movslq(X86.rcx, 3) as movq $0x0000000049000000,(%X86.rbx)
-        // and movslq(X86.r8, 3); as movl $0x0000000048000000,(%X86.rbx)
-        // as a result we shouldn't use until tested at runtime...
-        throw new InternalError("untested");
-    }
-
-    public final void movslq(Address dst, int imm32) {
-        prefixq(dst);
-        emitByte(0xC7);
-        emitOperandHelper(rax, dst);
-        emitInt(imm32);
-    }
-
-    public final void movslq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x63);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movslq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x63);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movswq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x0F);
-        emitByte(0xBF);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movswq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xBF);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movzbq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x0F);
-        emitByte(0xB6);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movzbq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xB6);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void movzwq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x0F);
-        emitByte(0xB7);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void movzwq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x0F);
-        emitByte(0xB7);
-        emitByte(0xC0 | encode);
-    }
-
-    public final void negq(Register dst) {
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xF7);
-        emitByte(0xD8 | encode);
-    }
-
-    public final void notq(Register dst) {
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xF7);
-        emitByte(0xD0 | encode);
-    }
-
-    public final void orq(Address dst, int imm32) {
-        prefixq(dst);
-        emitByte(0x81);
-        emitOperandHelper(rcx, dst);
-        emitInt(imm32);
-    }
-
-    public final void orq(Register dst, int imm32) {
-        prefixqAndEncode(dst.encoding);
-        emitArith(0x81, 0xC8, dst, imm32);
-    }
-
-    public final void orq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x0B);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void orq(Register dst, Register src) {
-        prefixqAndEncode(dst.encoding, src.encoding);
-        emitArith(0x0B, 0xC0, dst, src);
-    }
-
-    public final void popq(Address dst) {
-        prefixq(dst);
-        emitByte(0x8F);
-        emitOperandHelper(rax, dst);
-    }
-
-    public final void pushq(Address src) {
-        prefixq(src);
-        emitByte(0xFF);
-        emitOperandHelper(rsi, src);
-    }
-
-    public final void rclq(Register dst, int imm8) {
-        assert isShiftCount(imm8 >> 1) : "illegal shift count";
-        int encode = prefixqAndEncode(dst.encoding);
-        if (imm8 == 1) {
-            emitByte(0xD1);
-            emitByte(0xD0 | encode);
-        } else {
-            emitByte(0xC1);
-            emitByte(0xD0 | encode);
-            emitByte(imm8);
-        }
-    }
-
-    public final void sarq(Register dst, int imm8) {
-        assert isShiftCount(imm8 >> 1) : "illegal shift count";
-        int encode = prefixqAndEncode(dst.encoding);
-        if (imm8 == 1) {
-            emitByte(0xD1);
-            emitByte(0xF8 | encode);
-        } else {
-            emitByte(0xC1);
-            emitByte(0xF8 | encode);
-            emitByte(imm8);
-        }
-    }
-
-    public final void sarq(Register dst) {
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xD3);
-        emitByte(0xF8 | encode);
-    }
-
-    public final void shlq(Register dst, int imm8) {
-        assert isShiftCount(imm8 >> 1) : "illegal shift count";
-        int encode = prefixqAndEncode(dst.encoding);
-        if (imm8 == 1) {
-            emitByte(0xD1);
-            emitByte(0xE0 | encode);
-        } else {
-            emitByte(0xC1);
-            emitByte(0xE0 | encode);
-            emitByte(imm8);
-        }
-    }
-
-    public final void shlq(Register dst) {
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xD3);
-        emitByte(0xE0 | encode);
-    }
-
-    public final void shrq(Register dst, int imm8) {
-        assert isShiftCount(imm8 >> 1) : "illegal shift count";
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xC1);
-        emitByte(0xE8 | encode);
-        emitByte(imm8);
-    }
-
-    public final void shrq(Register dst) {
-        int encode = prefixqAndEncode(dst.encoding);
-        emitByte(0xD3);
-        emitByte(0xE8 | encode);
-    }
-
-    public final void sqrtsd(Register dst, Address src) {
-        assert dst.isFpu();
-
-        emitByte(0xF2);
-        prefix(src, dst);
-        emitByte(0x0F);
-        emitByte(0x51);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void subq(Address dst, int imm32) {
-        prefixq(dst);
-        if (isByte(imm32)) {
-            emitByte(0x83);
-            emitOperandHelper(rbp, dst);
-            emitByte(imm32 & 0xFF);
-        } else {
-            emitByte(0x81);
-            emitOperandHelper(rbp, dst);
-            emitInt(imm32);
-        }
-    }
-
-    public final void subq(Register dst, int imm32) {
-        prefixqAndEncode(dst.encoding);
-        emitArith(0x81, 0xE8, dst, imm32);
-    }
-
-    public final void subq(Address dst, Register src) {
-        prefixq(dst, src);
-        emitByte(0x29);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void subq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x2B);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void subq(Register dst, Register src) {
-        prefixqAndEncode(dst.encoding, src.encoding);
-        emitArith(0x2B, 0xC0, dst, src);
-    }
-
-    public final void testq(Register dst, int imm32) {
-        // not using emitArith because test
-        // doesn't support sign-extension of
-        // 8bit operands
-        int encode = dst.encoding;
-        if (encode == 0) {
-            emitByte(Prefix.REXW);
-            emitByte(0xA9);
-        } else {
-            encode = prefixqAndEncode(encode);
-            emitByte(0xF7);
-            emitByte(0xC0 | encode);
-        }
-        emitInt(imm32);
-    }
-
-    public final void testq(Register dst, Register src) {
-        prefixqAndEncode(dst.encoding, src.encoding);
-        emitArith(0x85, 0xC0, dst, src);
-    }
-
-    public final void xaddq(Address dst, Register src) {
-        prefixq(dst, src);
-        emitByte(0x0F);
-        emitByte(0xC1);
-        emitOperandHelper(src, dst);
-    }
-
-    public final void xchgq(Register dst, Address src) {
-        prefixq(src, dst);
-        emitByte(0x87);
-        emitOperandHelper(dst, src);
-    }
-
-    public final void xchgq(Register dst, Register src) {
-        int encode = prefixqAndEncode(dst.encoding, src.encoding);
-        emitByte(0x87);
-        emitByte(0xc0 | encode);
-    }
-
-    public final void xorq(Register dst, int imm32) {
-        prefixqAndEncode(dst.encoding);
-        emitArith(0x81, 0xF0, dst, imm32);
-    }
-
-    public final void xorq(Register dst, Register src) {
-        prefixqAndEncode(dst.encoding, src.encoding);
-        emitArith(0x33, 0xC0, dst, src);
-    }
-
-    public final void xorq(Register dst, Address src) {
-
-        prefixq(src, dst);
-        emitByte(0x33);
-        emitOperandHelper(dst, src);
-
-    }
-
-    public final void membar(int barriers) {
-        if (target.isMP) {
-            // We only have to handle StoreLoad
-            if ((barriers & STORE_LOAD) != 0) {
-                // All usable chips support "locked" instructions which suffice
-                // as barriers, and are much faster than the alternative of
-                // using cpuid instruction. We use here a locked add [rsp],0.
-                // This is conveniently otherwise a no-op except for blowing
-                // flags.
-                // Any change to this code may need to revisit other places in
-                // the code where this idiom is used, in particular the
-                // orderAccess code.
-                lock();
-                addl(new Address(Word, RSP, 0), 0); // Assert the lock# signal here
-            }
-        }
-    }
-
-    @Override
-    protected final void patchJumpTarget(int branch, int branchTarget) {
-        int op = codeBuffer.getByte(branch);
-        assert op == 0xE8 // call
-            || op == 0x00 // jump table entry
-            || op == 0xE9 // jmp
-            || op == 0xEB // short jmp
-            || (op & 0xF0) == 0x70 // short jcc
-            || op == 0x0F && (codeBuffer.getByte(branch + 1) & 0xF0) == 0x80 // jcc
-        : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
-
-        if (op == 0x00) {
-            int offsetToJumpTableBase = codeBuffer.getShort(branch + 1);
-            int jumpTableBase = branch - offsetToJumpTableBase;
-            int imm32 = branchTarget - jumpTableBase;
-            codeBuffer.emitInt(imm32, branch);
-        } else if (op == 0xEB || (op & 0xF0) == 0x70) {
-
-            // short offset operators (jmp and jcc)
-            int imm8 = branchTarget - (branch + 2);
-            codeBuffer.emitByte(imm8, branch + 1);
-
-        } else {
-
-            int off = 1;
-            if (op == 0x0F) {
-                off = 2;
-            }
-
-            int imm32 = branchTarget - (branch + 4 + off);
-            codeBuffer.emitInt(imm32, branch + off);
-        }
-    }
-
-    public void nullCheck(Register r) {
-        testl(AMD64.rax, new Address(Word, r.asValue(Word), 0));
-    }
-
-    @Override
-    public void align(int modulus) {
-        if (codeBuffer.position() % modulus != 0) {
-            nop(modulus - (codeBuffer.position() % modulus));
-        }
-    }
-
-    public void pushfq() {
-        emitByte(0x9c);
-    }
-
-    public void popfq() {
-        emitByte(0x9D);
-    }
-
-    /**
-     * Makes sure that a subsequent {@linkplain #call} does not fail the alignment check.
-     */
-    public final void alignForPatchableDirectCall() {
-        int dispStart = codeBuffer.position() + 1;
-        int mask = target.wordSize - 1;
-        if ((dispStart & ~mask) != ((dispStart + 3) & ~mask)) {
-            nop(target.wordSize - (dispStart & mask));
-            assert ((codeBuffer.position() + 1) & mask) == 0;
-        }
-    }
-
-    /**
-     * Emits a direct call instruction. Note that the actual call target is not specified, because all calls
-     * need patching anyway. Therefore, 0 is emitted as the call target, and the user is responsible
-     * to add the call address to the appropriate patching tables.
-     */
-    public final void call() {
-        emitByte(0xE8);
-        emitInt(0);
-    }
-
-    public final void call(Register src) {
-        int encode = prefixAndEncode(src.encoding);
-        emitByte(0xFF);
-        emitByte(0xD0 | encode);
-    }
-
-    public void int3() {
-        emitByte(0xCC);
-    }
-
-    public void enter(short imm16, byte imm8) {
-        emitByte(0xC8);
-        // appended:
-        emitByte(imm16 & 0xff);
-        emitByte((imm16 >> 8) & 0xff);
-        emitByte(imm8);
-    }
-
-    private void emitx87(int b1, int b2, int i) {
-        assert 0 <= i && i < 8 : "illegal stack offset";
-        emitByte(b1);
-        emitByte(b2 + i);
-    }
-
-    public void fld(Address src) {
-        emitByte(0xDD);
-        emitOperandHelper(rax, src);
-    }
-
-    public void fld(int i) {
-        emitx87(0xD9, 0xC0, i);
-    }
-
-    public void fldln2() {
-        emitByte(0xD9);
-        emitByte(0xED);
-    }
-
-    public void fldlg2() {
-        emitByte(0xD9);
-        emitByte(0xEC);
-    }
-
-    public void fyl2x() {
-        emitByte(0xD9);
-        emitByte(0xF1);
-    }
-
-    public void fstp(Address src) {
-        emitByte(0xDD);
-        emitOperandHelper(rbx, src);
-    }
-
-    public void fsin() {
-        emitByte(0xD9);
-        emitByte(0xFE);
-    }
-
-    public void fcos() {
-        emitByte(0xD9);
-        emitByte(0xFF);
-    }
-
-    public void fptan() {
-        emitByte(0xD9);
-        emitByte(0xF2);
-    }
-
-    public void fstp(int i) {
-        emitx87(0xDD, 0xD8, i);
-    }
-
-    @Override
-    public void bangStack(int disp) {
-        movq(new Address(target.wordKind, AMD64.RSP, -disp), AMD64.rax);
-    }
-}
--- a/graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64MacroAssembler.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,392 +0,0 @@
-/*
- * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm.amd64;
-
-import static com.oracle.max.asm.amd64.AMD64AsmOptions.*;
-
-import com.oracle.graal.api.code.*;
-import com.oracle.graal.api.meta.*;
-import com.oracle.max.asm.*;
-
-/**
- * This class implements commonly used X86 code patterns.
- */
-public class AMD64MacroAssembler extends AMD64Assembler {
-
-    public AMD64MacroAssembler(TargetDescription target, RegisterConfig registerConfig) {
-        super(target, registerConfig);
-    }
-
-    public void pushptr(Address src) {
-        pushq(src);
-    }
-
-    public void popptr(Address src) {
-        popq(src);
-    }
-
-    public void xorptr(Register dst, Register src) {
-        xorq(dst, src);
-    }
-
-    public void xorptr(Register dst, Address src) {
-        xorq(dst, src);
-    }
-
-    // 64 bit versions
-
-
-    public void decrementq(Register reg, int value) {
-        if (value == Integer.MIN_VALUE) {
-            subq(reg, value);
-            return;
-        }
-        if (value < 0) {
-            incrementq(reg, -value);
-            return;
-        }
-        if (value == 0) {
-            return;
-        }
-        if (value == 1 && UseIncDec) {
-            decq(reg);
-        } else {
-            subq(reg, value);
-        }
-    }
-
-    public void incrementq(Register reg, int value) {
-        if (value == Integer.MIN_VALUE) {
-            addq(reg, value);
-            return;
-        }
-        if (value < 0) {
-            decrementq(reg, -value);
-            return;
-        }
-        if (value == 0) {
-            return;
-        }
-        if (value == 1 && UseIncDec) {
-            incq(reg);
-        } else {
-            addq(reg, value);
-        }
-    }
-
-    // These are mostly for initializing null
-    public void movptr(Address dst, int src) {
-        movslq(dst, src);
-    }
-
-    public final void cmp32(Register src1, int imm) {
-        cmpl(src1, imm);
-    }
-
-    public final void cmp32(Register src1, Address src2) {
-        cmpl(src1, src2);
-    }
-
-    public void cmpsd2int(Register opr1, Register opr2, Register dst, boolean unorderedIsLess) {
-        assert opr1.isFpu() && opr2.isFpu();
-        ucomisd(opr1, opr2);
-
-        Label l = new Label();
-        if (unorderedIsLess) {
-            movl(dst, -1);
-            jcc(AMD64Assembler.ConditionFlag.parity, l);
-            jcc(AMD64Assembler.ConditionFlag.below, l);
-            movl(dst, 0);
-            jcc(AMD64Assembler.ConditionFlag.equal, l);
-            incrementl(dst, 1);
-        } else { // unordered is greater
-            movl(dst, 1);
-            jcc(AMD64Assembler.ConditionFlag.parity, l);
-            jcc(AMD64Assembler.ConditionFlag.above, l);
-            movl(dst, 0);
-            jcc(AMD64Assembler.ConditionFlag.equal, l);
-            decrementl(dst, 1);
-        }
-        bind(l);
-    }
-
-    public void cmpss2int(Register opr1, Register opr2, Register dst, boolean unorderedIsLess) {
-        assert opr1.isFpu();
-        assert opr2.isFpu();
-        ucomiss(opr1, opr2);
-
-        Label l = new Label();
-        if (unorderedIsLess) {
-            movl(dst, -1);
-            jcc(AMD64Assembler.ConditionFlag.parity, l);
-            jcc(AMD64Assembler.ConditionFlag.below, l);
-            movl(dst, 0);
-            jcc(AMD64Assembler.ConditionFlag.equal, l);
-            incrementl(dst, 1);
-        } else { // unordered is greater
-            movl(dst, 1);
-            jcc(AMD64Assembler.ConditionFlag.parity, l);
-            jcc(AMD64Assembler.ConditionFlag.above, l);
-            movl(dst, 0);
-            jcc(AMD64Assembler.ConditionFlag.equal, l);
-            decrementl(dst, 1);
-        }
-        bind(l);
-    }
-
-    public void cmpptr(Register src1, Register src2) {
-        cmpq(src1, src2);
-    }
-
-    public void cmpptr(Register src1, Address src2) {
-        cmpq(src1, src2);
-    }
-
-    public void cmpptr(Register src1, int src2) {
-        cmpq(src1, src2);
-    }
-
-    public void cmpptr(Address src1, int src2) {
-        cmpq(src1, src2);
-    }
-
-    public void decrementl(Register reg, int value) {
-        if (value == Integer.MIN_VALUE) {
-            subl(reg, value);
-            return;
-        }
-        if (value < 0) {
-            incrementl(reg, -value);
-            return;
-        }
-        if (value == 0) {
-            return;
-        }
-        if (value == 1 && UseIncDec) {
-            decl(reg);
-        } else {
-            subl(reg, value);
-        }
-    }
-
-    public void decrementl(Address dst, int value) {
-        if (value == Integer.MIN_VALUE) {
-            subl(dst, value);
-            return;
-        }
-        if (value < 0) {
-            incrementl(dst, -value);
-            return;
-        }
-        if (value == 0) {
-            return;
-        }
-        if (value == 1 && UseIncDec) {
-            decl(dst);
-        } else {
-            subl(dst, value);
-        }
-    }
-
-    public void incrementl(Register reg, int value) {
-        if (value == Integer.MIN_VALUE) {
-            addl(reg, value);
-            return;
-        }
-        if (value < 0) {
-            decrementl(reg, -value);
-            return;
-        }
-        if (value == 0) {
-            return;
-        }
-        if (value == 1 && UseIncDec) {
-            incl(reg);
-        } else {
-            addl(reg, value);
-        }
-    }
-
-    public void incrementl(Address dst, int value) {
-        if (value == Integer.MIN_VALUE) {
-            addl(dst, value);
-            return;
-        }
-        if (value < 0) {
-            decrementl(dst, -value);
-            return;
-        }
-        if (value == 0) {
-            return;
-        }
-        if (value == 1 && UseIncDec) {
-            incl(dst);
-        } else {
-            addl(dst, value);
-        }
-    }
-
-    public void signExtendByte(Register reg) {
-        if (reg.isByte()) {
-            movsxb(reg, reg); // movsxb
-        } else {
-            shll(reg, 24);
-            sarl(reg, 24);
-        }
-    }
-
-    public void signExtendShort(Register reg) {
-        movsxw(reg, reg); // movsxw
-    }
-
-    // Support optimal SSE move instructions.
-    public void movflt(Register dst, Register src) {
-        assert dst.isFpu() && src.isFpu();
-        if (UseXmmRegToRegMoveAll) {
-            movaps(dst, src);
-        } else {
-            movss(dst, src);
-        }
-    }
-
-    public void movflt(Register dst, Address src) {
-        assert dst.isFpu();
-        movss(dst, src);
-    }
-
-    public void movflt(Address dst, Register src) {
-        assert src.isFpu();
-        movss(dst, src);
-    }
-
-    public void movdbl(Register dst, Register src) {
-        assert dst.isFpu() && src.isFpu();
-        if (UseXmmRegToRegMoveAll) {
-            movapd(dst, src);
-        } else {
-            movsd(dst, src);
-        }
-    }
-
-    public void movdbl(Register dst, Address src) {
-        assert dst.isFpu();
-        if (UseXmmLoadAndClearUpper) {
-            movsd(dst, src);
-        } else {
-            movlpd(dst, src);
-        }
-    }
-
-    public void movdbl(Address dst, Register src) {
-        assert src.isFpu();
-        movsd(dst, src);
-    }
-
-    /**
-     * Non-atomic write of a 64-bit constant to memory. Do not use
-     * if the address might be a volatile field!
-     */
-    public void movlong(Address dst, long src) {
-        Address high = new Address(dst.getKind(), dst.getBase(), dst.getIndex(), dst.getScale(), dst.getDisplacement() + 4);
-        movl(dst, (int) (src & 0xFFFFFFFF));
-        movl(high, (int) (src >> 32));
-    }
-
-    public void xchgptr(Register src1, Register src2) {
-        xchgq(src1, src2);
-    }
-
-    public void flog(Register dest, Register value, boolean base10) {
-        assert value.spillSlotSize == dest.spillSlotSize;
-
-        Address tmp = new Address(Kind.Double, AMD64.RSP);
-        if (base10) {
-            fldlg2();
-        } else {
-            fldln2();
-        }
-        subq(AMD64.rsp, value.spillSlotSize);
-        movsd(tmp, value);
-        fld(tmp);
-        fyl2x();
-        fstp(tmp);
-        movsd(dest, tmp);
-        addq(AMD64.rsp, dest.spillSlotSize);
-    }
-
-    public void fsin(Register dest, Register value) {
-        ftrig(dest, value, 's');
-    }
-
-    public void fcos(Register dest, Register value) {
-        ftrig(dest, value, 'c');
-    }
-
-    public void ftan(Register dest, Register value) {
-        ftrig(dest, value, 't');
-    }
-
-    private void ftrig(Register dest, Register value, char op) {
-        assert value.spillSlotSize == dest.spillSlotSize;
-
-        Address tmp = new Address(Kind.Double, AMD64.RSP);
-        subq(AMD64.rsp, value.spillSlotSize);
-        movsd(tmp, value);
-        fld(tmp);
-        if (op == 's') {
-            fsin();
-        } else if (op == 'c') {
-            fcos();
-        } else if (op == 't') {
-            fptan();
-            fstp(0); // ftan pushes 1.0 in addition to the actual result, pop
-        } else {
-            throw new InternalError("should not reach here");
-        }
-        fstp(tmp);
-        movsd(dest, tmp);
-        addq(AMD64.rsp, dest.spillSlotSize);
-    }
-
-    /**
-     * Emit code to save a given set of callee save registers in the
-     * {@linkplain CalleeSaveLayout CSA} within the frame.
-     * @param csl the description of the CSA
-     * @param frameToCSA offset from the frame pointer to the CSA
-     */
-    public void save(CalleeSaveLayout csl, int frameToCSA) {
-        RegisterValue frame = frameRegister.asValue();
-        for (Register r : csl.registers) {
-            int offset = csl.offsetOf(r);
-            movq(new Address(target.wordKind, frame, frameToCSA + offset), r);
-        }
-    }
-
-    public void restore(CalleeSaveLayout csl, int frameToCSA) {
-        RegisterValue frame = frameRegister.asValue();
-        for (Register r : csl.registers) {
-            int offset = csl.offsetOf(r);
-            movq(r, new Address(target.wordKind, frame, frameToCSA + offset));
-        }
-    }
-}
--- a/graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/X86InstructionDecoder.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,507 +0,0 @@
-/*
- * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm.amd64;
-
-
-public final class X86InstructionDecoder {
-
-    private boolean targetIs64Bit;
-    private byte[] code;
-    private int currentEndOfInstruction;
-    private int currentDisplacementPosition;
-
-    private static class Prefix {
-
-        // segment overrides
-        public static final int CSSegment = 0x2e;
-        public static final int SSSegment = 0x36;
-        public static final int DSSegment = 0x3e;
-        public static final int ESSegment = 0x26;
-        public static final int FSSegment = 0x64;
-        public static final int GSSegment = 0x65;
-        public static final int REX = 0x40;
-        public static final int REXB = 0x41;
-        public static final int REXX = 0x42;
-        public static final int REXXB = 0x43;
-        public static final int REXR = 0x44;
-        public static final int REXRB = 0x45;
-        public static final int REXRX = 0x46;
-        public static final int REXRXB = 0x47;
-        public static final int REXW = 0x48;
-        public static final int REXWB = 0x49;
-        public static final int REXWX = 0x4A;
-        public static final int REXWXB = 0x4B;
-        public static final int REXWR = 0x4C;
-        public static final int REXWRB = 0x4D;
-        public static final int REXWRX = 0x4E;
-        public static final int REXWRXB = 0x4F;
-    }
-
-    private X86InstructionDecoder(byte[] code, boolean targetIs64Bit) {
-        this.code = code;
-        this.targetIs64Bit = targetIs64Bit;
-    }
-
-    public int currentEndOfInstruction() {
-        return currentEndOfInstruction;
-    }
-
-    public int currentDisplacementPosition() {
-        return currentDisplacementPosition;
-    }
-
-    public void decodePosition(int inst) {
-
-        assert inst >= 0 && inst < code.length;
-
-        // Decode the given instruction, and return the Pointer of
-        // an embedded 32-bit operand word.
-
-        // If "which" is WhichOperand.disp32operand, selects the displacement portion
-        // of an effective Pointer specifier.
-        // If "which" is imm64Operand, selects the trailing immediate constant.
-        // If "which" is WhichOperand.call32operand, selects the displacement of a call or jump.
-        // Caller is responsible for ensuring that there is such an operand,
-        // and that it is 32/64 bits wide.
-
-        // If "which" is endPcOperand, find the end of the instruction.
-
-        int ip = inst;
-        boolean is64bit = false;
-
-        boolean hasDisp32 = false;
-        int tailSize = 0; // other random bytes (#32, #16, etc.) at end of insn
-
-        boolean againAfterPrefix = true;
-
-        while (againAfterPrefix) {
-            againAfterPrefix = false;
-            switch (0xFF & code[ip++]) {
-
-                // These convenience macros generate groups of "case" labels for the switch.
-
-                case Prefix.CSSegment:
-                case Prefix.SSSegment:
-                case Prefix.DSSegment:
-                case Prefix.ESSegment:
-                case Prefix.FSSegment:
-                case Prefix.GSSegment:
-                    // Seems dubious
-                    assert !targetIs64Bit : "shouldn't have that prefix";
-                    assert ip == inst + 1 : "only one prefix allowed";
-                    againAfterPrefix = true;
-                    break;
-
-                case 0x67:
-                case Prefix.REX:
-                case Prefix.REXB:
-                case Prefix.REXX:
-                case Prefix.REXXB:
-                case Prefix.REXR:
-                case Prefix.REXRB:
-                case Prefix.REXRX:
-                case Prefix.REXRXB:
-                    assert targetIs64Bit : "64bit prefixes";
-                    againAfterPrefix = true;
-                    break;
-
-                case Prefix.REXW:
-                case Prefix.REXWB:
-                case Prefix.REXWX:
-                case Prefix.REXWXB:
-                case Prefix.REXWR:
-                case Prefix.REXWRB:
-                case Prefix.REXWRX:
-                case Prefix.REXWRXB:
-                    assert targetIs64Bit : "64bit prefixes";
-                    is64bit = true;
-                    againAfterPrefix = true;
-                    break;
-
-                case 0xFF: // pushq a; decl a; incl a; call a; jmp a
-                case 0x88: // movb a, r
-                case 0x89: // movl a, r
-                case 0x8A: // movb r, a
-                case 0x8B: // movl r, a
-                case 0x8F: // popl a
-                    hasDisp32 = true;
-                    break;
-
-                case 0x68: // pushq #32
-                    currentEndOfInstruction = ip + 4;
-                    currentDisplacementPosition = ip;
-                    return; // not produced by emitOperand
-
-                case 0x66: // movw ... (size prefix)
-                    boolean againAfterSizePrefix2 = true;
-                    while (againAfterSizePrefix2) {
-                        againAfterSizePrefix2 = false;
-                        switch (0xFF & code[ip++]) {
-                            case Prefix.REX:
-                            case Prefix.REXB:
-                            case Prefix.REXX:
-                            case Prefix.REXXB:
-                            case Prefix.REXR:
-                            case Prefix.REXRB:
-                            case Prefix.REXRX:
-                            case Prefix.REXRXB:
-                            case Prefix.REXW:
-                            case Prefix.REXWB:
-                            case Prefix.REXWX:
-                            case Prefix.REXWXB:
-                            case Prefix.REXWR:
-                            case Prefix.REXWRB:
-                            case Prefix.REXWRX:
-                            case Prefix.REXWRXB:
-                                assert targetIs64Bit : "64bit prefix found";
-                                againAfterSizePrefix2 = true;
-                                break;
-                            case 0x8B: // movw r, a
-                            case 0x89: // movw a, r
-                                hasDisp32 = true;
-                                break;
-                            case 0xC7: // movw a, #16
-                                hasDisp32 = true;
-                                tailSize = 2; // the imm16
-                                break;
-                            case 0x0F: // several SSE/SSE2 variants
-                                ip--; // reparse the 0x0F
-                                againAfterPrefix = true;
-                                break;
-                            default:
-                                throw new InternalError("should not reach here");
-                        }
-                    }
-                    break;
-
-                case 0xB8: // movl/q r, #32/#64(oop?)
-                case 0xB9:
-                case 0xBA:
-                case 0xBB:
-                case 0xBC:
-                case 0xBD:
-                case 0xBE:
-                case 0xBF:
-                    currentEndOfInstruction = ip + (is64bit ? 8 : 4);
-                    currentDisplacementPosition = ip;
-                    return;
-
-                case 0x69: // imul r, a, #32
-                case 0xC7: // movl a, #32(oop?)
-                    tailSize = 4;
-                    hasDisp32 = true; // has both kinds of operands!
-                    break;
-
-                case 0x0F: // movx..., etc.
-                    switch (0xFF & code[ip++]) {
-                        case 0x12: // movlps
-                        case 0x28: // movaps
-                        case 0x2E: // ucomiss
-                        case 0x2F: // comiss
-                        case 0x54: // andps
-                        case 0x55: // andnps
-                        case 0x56: // orps
-                        case 0x57: // xorps
-                        case 0x6E: // movd
-                        case 0x7E: // movd
-                        case 0xAE: // ldmxcsr a
-                            // 64bit side says it these have both operands but that doesn't
-                            // appear to be true
-                            hasDisp32 = true;
-                            break;
-
-                        case 0xAD: // shrd r, a, %cl
-                        case 0xAF: // imul r, a
-                        case 0xBE: // movsbl r, a (movsxb)
-                        case 0xBF: // movswl r, a (movsxw)
-                        case 0xB6: // movzbl r, a (movzxb)
-                        case 0xB7: // movzwl r, a (movzxw)
-                        case 0x40: // cmovl cc, r, a
-                        case 0x41:
-                        case 0x42:
-                        case 0x43:
-                        case 0x44:
-                        case 0x45:
-                        case 0x46:
-                        case 0x47:
-                        case 0x48:
-                        case 0x49:
-                        case 0x4A:
-                        case 0x4B:
-                        case 0x4C:
-                        case 0x4D:
-                        case 0x4E:
-                        case 0x4F:
-                        case 0xB0: // cmpxchgb
-                        case 0xB1: // cmpxchg
-                        case 0xC1: // xaddl
-                        case 0xC7: // cmpxchg8
-                        case 0x90: // setcc a
-                        case 0x91:
-                        case 0x92:
-                        case 0x93:
-                        case 0x94:
-                        case 0x95:
-                        case 0x96:
-                        case 0x97:
-                        case 0x98:
-                        case 0x99:
-                        case 0x9A:
-                        case 0x9B:
-                        case 0x9C:
-                        case 0x9D:
-                        case 0x9E:
-                        case 0x9F:
-                            hasDisp32 = true;
-                            // fall out of the switch to decode the Pointer
-                            break;
-
-                        case 0xAC: // shrd r, a, #8
-                            hasDisp32 = true;
-                            tailSize = 1; // the imm8
-                            break;
-
-                        case 0x80: // jcc rdisp32
-                        case 0x81:
-                        case 0x82:
-                        case 0x83:
-                        case 0x84:
-                        case 0x85:
-                        case 0x86:
-                        case 0x87:
-                        case 0x88:
-                        case 0x89:
-                        case 0x8A:
-                        case 0x8B:
-                        case 0x8C:
-                        case 0x8D:
-                        case 0x8E:
-                        case 0x8F:
-                            currentEndOfInstruction = ip + 4;
-                            currentDisplacementPosition = ip;
-                            return;
-                        default:
-                            throw new InternalError("should not reach here");
-                    }
-                    break;
-
-                case 0x81: // addl a, #32; addl r, #32
-                    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
-                    // on 32bit in the case of cmpl, the imm might be an oop
-                    tailSize = 4;
-                    hasDisp32 = true; // has both kinds of operands!
-                    break;
-
-                case 0x83: // addl a, #8; addl r, #8
-                    // also: orl, adcl, sbbl, andl, subl, xorl, cmpl
-                    hasDisp32 = true; // has both kinds of operands!
-                    tailSize = 1;
-                    break;
-
-                case 0x9B:
-                    switch (0xFF & code[ip++]) {
-                        case 0xD9: // fnstcw a
-                            hasDisp32 = true;
-                            break;
-                        default:
-                            throw new InternalError("should not reach here");
-                    }
-                    break;
-
-                case 0x00: // addb a, r; addl a, r; addb r, a; addl r, a
-                case 0x01:
-                case 0x02:
-                case 0x03:
-                case 0x10: // adc...
-                case 0x11:
-                case 0x12:
-                case 0x13:
-                case 0x20: // and...
-                case 0x21:
-                case 0x22:
-                case 0x23:
-                case 0x30: // xor...
-                case 0x31:
-                case 0x32:
-                case 0x33:
-                case 0x08: // or...
-                case 0x09:
-                case 0x0a:
-                case 0x0b:
-                case 0x18: // sbb...
-                case 0x19:
-                case 0x1a:
-                case 0x1b:
-                case 0x28: // sub...
-                case 0x29:
-                case 0x2a:
-                case 0x2b:
-                case 0xF7: // mull a
-                case 0x8D: // lea r, a
-                case 0x87: // xchg r, a
-                case 0x38: // cmp...
-                case 0x39:
-                case 0x3a:
-                case 0x3b:
-                case 0x85: // test r, a
-                    hasDisp32 = true; // has both kinds of operands!
-                    break;
-
-                case 0xC1: // sal a, #8; sar a, #8; shl a, #8; shr a, #8
-                case 0xC6: // movb a, #8
-                case 0x80: // cmpb a, #8
-                case 0x6B: // imul r, a, #8
-                    hasDisp32 = true; // has both kinds of operands!
-                    tailSize = 1; // the imm8
-                    break;
-
-                case 0xE8: // call rdisp32
-                case 0xE9: // jmp rdisp32
-                    currentEndOfInstruction = ip + 4;
-                    currentDisplacementPosition = ip;
-                    return;
-
-                case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
-                case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
-                case 0xD9: // fldS a; fstS a; fstpS a; fldcw a
-                case 0xDD: // fldD a; fstD a; fstpD a
-                case 0xDB: // fildS a; fistpS a; fldX a; fstpX a
-                case 0xDF: // fildD a; fistpD a
-                case 0xD8: // faddS a; fsubrS a; fmulS a; fdivrS a; fcompS a
-                case 0xDC: // faddD a; fsubrD a; fmulD a; fdivrD a; fcompD a
-                case 0xDE: // faddpD a; fsubrpD a; fmulpD a; fdivrpD a; fcomppD a
-                    hasDisp32 = true;
-                    break;
-
-                case 0xF0: // Lock
-                    againAfterPrefix = true;
-                    break;
-
-                case 0xF3: // For SSE
-                case 0xF2: // For SSE2
-                    switch (0xFF & code[ip++]) {
-                        case Prefix.REX:
-                        case Prefix.REXB:
-                        case Prefix.REXX:
-                        case Prefix.REXXB:
-                        case Prefix.REXR:
-                        case Prefix.REXRB:
-                        case Prefix.REXRX:
-                        case Prefix.REXRXB:
-                        case Prefix.REXW:
-                        case Prefix.REXWB:
-                        case Prefix.REXWX:
-                        case Prefix.REXWXB:
-                        case Prefix.REXWR:
-                        case Prefix.REXWRB:
-                        case Prefix.REXWRX:
-                        case Prefix.REXWRXB:
-                            assert targetIs64Bit : "found 64bit prefix";
-                            ip++;
-                            // fall through
-                        default:
-                            ip++;
-                    }
-                    hasDisp32 = true; // has both kinds of operands!
-                    break;
-
-                default:
-                    throw new InternalError("should not reach here");
-            }
-        }
-
-        assert hasDisp32 : "(thomaswue) not sure if this holds: instruction has no disp32 field";
-
-        // parse the output of emitOperand
-        int op2 = 0xFF & code[ip++];
-        int base = op2 & 0x07;
-        int op3 = -1;
-        int b100 = 4;
-        int b101 = 5;
-        if (base == b100 && (op2 >> 6) != 3) {
-            op3 = 0xFF & code[ip++];
-            base = op3 & 0x07; // refetch the base
-        }
-        // now ip points at the disp (if any)
-
-        switch (op2 >> 6) {
-            case 0:
-                // [00 reg 100][ss index base]
-                // [00 reg 100][00 100 esp]
-                // [00 reg base]
-                // [00 reg 100][ss index 101][disp32]
-                // [00 reg 101] [disp32]
-
-                if (base == b101) {
-
-                    currentDisplacementPosition = ip;
-                    ip += 4; // skip the disp32
-                }
-                break;
-
-            case 1:
-                // [01 reg 100][ss index base][disp8]
-                // [01 reg 100][00 100 esp][disp8]
-                // [01 reg base] [disp8]
-                ip += 1; // skip the disp8
-                break;
-
-            case 2:
-                // [10 reg 100][ss index base][disp32]
-                // [10 reg 100][00 100 esp][disp32]
-                // [10 reg base] [disp32]
-                currentDisplacementPosition = ip;
-                ip += 4; // skip the disp32
-                break;
-
-            case 3:
-                // [11 reg base] (not a memory addressing mode)
-                break;
-        }
-
-        currentEndOfInstruction = ip + tailSize;
-    }
-
-    public static void patchRelativeInstruction(byte[] code, int codePos, int relative) {
-        X86InstructionDecoder decoder = new X86InstructionDecoder(code, true);
-        decoder.decodePosition(codePos);
-        int patchPos = decoder.currentDisplacementPosition();
-        int endOfInstruction = decoder.currentEndOfInstruction();
-        int offset = relative - endOfInstruction + codePos;
-        patchDisp32(code, patchPos, offset);
-    }
-
-    private static void patchDisp32(byte[] code, int pos, int offset) {
-        assert pos + 4 <= code.length;
-
-        assert code[pos] == 0;
-        assert code[pos + 1] == 0;
-        assert code[pos + 2] == 0;
-        assert code[pos + 3] == 0;
-
-        code[pos] = (byte) (offset & 0xFF);
-        code[pos + 1] = (byte) ((offset >> 8) & 0xFF);
-        code[pos + 2] = (byte) ((offset >> 16) & 0xFF);
-        code[pos + 3] = (byte) ((offset >> 24) & 0xFF);
-    }
-}
--- a/graal/com.oracle.max.asm/overview.html	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head>
-<!--
-
-Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
-DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License version 2 only, as
-published by the Free Software Foundation.  Oracle designates this
-particular file as subject to the "Classpath" exception as provided
-by Oracle in the LICENSE file that accompanied this code.
-
-This code is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-version 2 for more details (a copy is included in the LICENSE file that
-accompanied this code).
-
-You should have received a copy of the GNU General Public License version
-2 along with this work; if not, write to the Free Software Foundation,
-Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-
-Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-or visit www.oracle.com if you need additional information or have any
-questions.
--->
-
-</head>
-<body>
-
-Documentation for the <code>com.oracle.max.asm</code> project.
-
-</body>
-</html>
--- a/graal/com.oracle.max.asm/src/com/oracle/max/asm/AbstractAssembler.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm;
-
-import com.oracle.graal.api.code.*;
-import com.oracle.graal.api.code.Architecture.*;
-
-/**
- * The platform-independent base class for the assembler.
- */
-public abstract class AbstractAssembler {
-    public final TargetDescription target;
-    public final Buffer codeBuffer;
-
-    public AbstractAssembler(TargetDescription target) {
-        this.target = target;
-
-        if (target.arch.byteOrder == ByteOrder.BigEndian) {
-            this.codeBuffer = new Buffer.BigEndian();
-        } else {
-            this.codeBuffer = new Buffer.LittleEndian();
-        }
-    }
-
-    public final void bind(Label l) {
-        assert !l.isBound() : "can bind label only once";
-        l.bind(codeBuffer.position());
-        l.patchInstructions(this);
-    }
-
-    public abstract void align(int modulus);
-
-    public abstract void jmp(Label l);
-
-    protected abstract void patchJumpTarget(int branch, int jumpTarget);
-
-    /**
-     * Emits instruction(s) that access an address specified by a given displacement from the stack pointer
-     * in the direction that the stack grows (which is down on most architectures).
-     *
-     * @param disp the displacement from the stack pointer at which the stack should be accessed
-     */
-    public abstract void bangStack(int disp);
-
-    protected final void emitByte(int x) {
-        codeBuffer.emitByte(x);
-    }
-
-    protected final void emitShort(int x) {
-        codeBuffer.emitShort(x);
-    }
-
-    protected final void emitInt(int x) {
-        codeBuffer.emitInt(x);
-    }
-
-    protected final void emitLong(long x) {
-        codeBuffer.emitLong(x);
-    }
-}
--- a/graal/com.oracle.max.asm/src/com/oracle/max/asm/AsmOptions.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2011, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm;
-
-public class AsmOptions {
-    public static int     InitialCodeBufferSize         = 232;
-}
--- a/graal/com.oracle.max.asm/src/com/oracle/max/asm/Buffer.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,242 +0,0 @@
-/*
- * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm;
-
-import java.util.*;
-
-/**
- * Code buffer management for the assembler. Support for little endian and big endian architectures is implemented using subclasses.
- */
-public abstract class Buffer {
-    protected byte[] data;
-    protected int position;
-
-    public Buffer() {
-        data = new byte[AsmOptions.InitialCodeBufferSize];
-    }
-
-    public void reset() {
-        position = 0;
-    }
-
-    public int position() {
-        return position;
-    }
-
-    public void setPosition(int position) {
-        assert position >= 0 && position <= data.length;
-        this.position = position;
-    }
-
-    /**
-     * Closes this buffer. No extra data can be written to this buffer after
-     * this call.
-     *
-     * @param trimmedCopy
-     *            if {@code true}, then a copy of the underlying byte array up
-     *            to (but not including) {@code position()} is returned
-     * @return the data in this buffer or a trimmed copy if {@code trimmedCopy}
-     *         is {@code true}
-     */
-    public byte[] close(boolean trimmedCopy) {
-        byte[] result = trimmedCopy ? Arrays.copyOf(data, position()) : data;
-        data = null;
-        return result;
-    }
-
-    public byte[] copyData(int start, int end) {
-        return Arrays.copyOfRange(data, start, end);
-    }
-
-    /**
-     * Copies the data from this buffer into a given array.
-     *
-     * @param dst
-     *            the destination array
-     * @param off
-     *            starting position in {@code dst}
-     * @param len
-     *            number of bytes to copy
-     */
-    public void copyInto(byte[] dst, int off, int len) {
-        System.arraycopy(data, 0, dst, off, len);
-    }
-
-    protected void ensureSize(int length) {
-        if (length >= data.length) {
-            data = Arrays.copyOf(data, length * 4);
-        }
-    }
-
-    public void emitBytes(byte[] arr, int off, int len) {
-        ensureSize(position + len);
-        System.arraycopy(arr, off, data, position, len);
-        position += len;
-    }
-
-    public void emitByte(int b) {
-        position = emitByte(b, position);
-    }
-
-    public void emitShort(int b) {
-        position = emitShort(b, position);
-    }
-
-    public void emitInt(int b) {
-        position = emitInt(b, position);
-    }
-
-    public void emitLong(long b) {
-        position = emitLong(b, position);
-    }
-
-    public int emitByte(int b, int pos) {
-        assert NumUtil.isUByte(b);
-        int newPos = pos + 1;
-        ensureSize(newPos);
-        data[pos] = (byte) (b & 0xFF);
-        return newPos;
-    }
-
-    public abstract int emitShort(int b, int pos);
-
-    public abstract int emitInt(int b, int pos);
-
-    public abstract int emitLong(long b, int pos);
-
-    public int getByte(int pos) {
-        return data[pos] & 0xff;
-    }
-
-    public abstract int getShort(int pos);
-
-    public abstract int getInt(int pos);
-
-    public static final class BigEndian extends Buffer {
-        @Override
-        public int emitShort(int b, int pos) {
-            assert NumUtil.isUShort(b);
-            int newPos = pos + 2;
-            ensureSize(pos + 2);
-            data[pos] = (byte) ((b >> 8) & 0xFF);
-            data[pos + 1] = (byte) (b & 0xFF);
-            return newPos;
-        }
-
-        @Override
-        public int emitInt(int b, int pos) {
-            int newPos = pos + 4;
-            ensureSize(newPos);
-            data[pos] = (byte) ((b >> 24) & 0xFF);
-            data[pos + 1] = (byte) ((b >> 16) & 0xFF);
-            data[pos + 2] = (byte) ((b >> 8) & 0xFF);
-            data[pos + 3] = (byte) (b & 0xFF);
-            return newPos;
-        }
-
-        @Override
-        public int emitLong(long b, int pos) {
-            int newPos = pos + 8;
-            ensureSize(newPos);
-            data[pos] = (byte) ((b >> 56) & 0xFF);
-            data[pos + 1] = (byte) ((b >> 48) & 0xFF);
-            data[pos + 2] = (byte) ((b >> 40) & 0xFF);
-            data[pos + 3] = (byte) ((b >> 32) & 0xFF);
-            data[pos + 4] = (byte) ((b >> 24) & 0xFF);
-            data[pos + 5] = (byte) ((b >> 16) & 0xFF);
-            data[pos + 6] = (byte) ((b >> 8) & 0xFF);
-            data[pos + 7] = (byte) (b & 0xFF);
-            return newPos;
-        }
-
-        @Override
-        public int getShort(int pos) {
-            return
-                (data[pos + 0] & 0xff) << 8 |
-                (data[pos + 1] & 0xff) << 0;
-        }
-
-        @Override
-        public int getInt(int pos) {
-            return
-                (data[pos + 0] & 0xff) << 24 |
-                (data[pos + 1] & 0xff) << 16 |
-                (data[pos + 2] & 0xff) << 8 |
-                (data[pos + 3] & 0xff) << 0;
-        }
-    }
-
-    public static final class LittleEndian extends Buffer {
-        @Override
-        public int emitShort(int b, int pos) {
-            assert NumUtil.isUShort(b);
-            int newPos = pos + 2;
-            ensureSize(newPos);
-            data[pos] = (byte) (b & 0xFF);
-            data[pos + 1] = (byte) ((b >> 8) & 0xFF);
-            return newPos;
-        }
-
-        @Override
-        public int emitInt(int b, int pos) {
-            int newPos = pos + 4;
-            ensureSize(newPos);
-            data[pos] = (byte) (b & 0xFF);
-            data[pos + 1] = (byte) ((b >> 8) & 0xFF);
-            data[pos + 2] = (byte) ((b >> 16) & 0xFF);
-            data[pos + 3] = (byte) ((b >> 24) & 0xFF);
-            return newPos;
-        }
-
-        @Override
-        public int emitLong(long b, int pos) {
-            int newPos = pos + 8;
-            ensureSize(newPos);
-            data[pos] = (byte) (b & 0xFF);
-            data[pos + 1] = (byte) ((b >> 8) & 0xFF);
-            data[pos + 2] = (byte) ((b >> 16) & 0xFF);
-            data[pos + 3] = (byte) ((b >> 24) & 0xFF);
-            data[pos + 4] = (byte) ((b >> 32) & 0xFF);
-            data[pos + 5] = (byte) ((b >> 40) & 0xFF);
-            data[pos + 6] = (byte) ((b >> 48) & 0xFF);
-            data[pos + 7] = (byte) ((b >> 56) & 0xFF);
-            return newPos;
-        }
-
-        @Override
-        public int getShort(int pos) {
-            return
-                (data[pos + 1] & 0xff) << 8 |
-                (data[pos + 0] & 0xff) << 0;
-        }
-
-        @Override
-        public int getInt(int pos) {
-            return
-                (data[pos + 3] & 0xff) << 24 |
-                (data[pos + 2] & 0xff) << 16 |
-                (data[pos + 1] & 0xff) << 8 |
-                (data[pos + 0] & 0xff) << 0;
-        }
-    }
-}
--- a/graal/com.oracle.max.asm/src/com/oracle/max/asm/Label.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm;
-
-import java.util.ArrayList;
-
-/**
- * This class represents a label within assembly code.
- */
-public final class Label {
-    private int position = -1;
-
-    /**
-     * References to instructions that jump to this unresolved label.
-     * These instructions need to be patched when the label is bound
-     * using the {@link #patchInstructions(AbstractAssembler)} method.
-     */
-    private ArrayList<Integer> patchPositions = new ArrayList<>(4);
-
-    /**
-     * Returns the position of this label in the code buffer.
-     * @return the position
-     */
-    public int position() {
-        assert position >= 0 : "Unbound label is being referenced";
-        return position;
-    }
-
-    public Label() {
-    }
-
-    /**
-     * Binds the label to the specified position.
-     * @param pos the position
-     */
-    protected void bind(int pos) {
-        this.position = pos;
-        assert isBound();
-    }
-
-    public boolean isBound() {
-        return position >= 0;
-    }
-
-    public void addPatchAt(int branchLocation) {
-        assert !isBound() : "Label is already bound";
-        patchPositions.add(branchLocation);
-    }
-
-    protected void patchInstructions(AbstractAssembler masm) {
-        assert isBound() : "Label should be bound";
-        int target = position;
-        for (int i = 0; i < patchPositions.size(); ++i) {
-            int pos = patchPositions.get(i);
-            masm.patchJumpTarget(pos, target);
-        }
-    }
-
-    @Override
-    public String toString() {
-        return isBound() ? String.valueOf(position()) : "?";
-    }
-}
--- a/graal/com.oracle.max.asm/src/com/oracle/max/asm/NumUtil.java	Wed Oct 03 16:49:51 2012 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2011, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package com.oracle.max.asm;
-
-/**
- * A collection of static utility functions that check ranges of numbers.
- */
-public class NumUtil {
-    public static boolean isShiftCount(int x) {
-        return 0 <= x && x < 32;
-    }
-
-    /**
-     * Determines if a given {@code int} value is the range of unsigned byte
-     * values.
-     */
-    public static boolean isUByte(int x) {
-        return (x & 0xff) == x;
-    }
-
-    /**
-     * Determines if a given {@code int} value is the range of signed byte
-     * values.
-     */
-    public static boolean isByte(int x) {
-        return (byte) x == x;
-    }
-
-    /**
-     * Determines if a given {@code long} value is the range of unsigned byte
-     * values.
-     */
-    public static boolean isUByte(long x) {
-        return (x & 0xffL) == x;
-    }
-
-    /**
-     * Determines if a given {@code long} value is the range of signed byte
-     * values.
-     */
-    public static boolean isByte(long l) {
-        return (byte) l == l;
-    }
-
-    /**
-     * Determines if a given {@code long} value is the range of unsigned int
-     * values.
-     */
-    public static boolean isUInt(long x) {
-        return (x & 0xffffffffL) == x;
-    }
-
-    /**
-     * Determines if a given {@code long} value is the range of signed int
-     * values.
-     */
-    public static boolean isInt(long l) {
-        return (int) l == l;
-    }
-
-    /**
-     * Determines if a given {@code int} value is the range of signed short
-     * values.
-     */
-    public static boolean isShort(int x) {
-        return (short) x == x;
-    }
-
-    public static boolean isUShort(int s) {
-        return s == (s & 0xFFFF);
-    }
-
-    public static boolean is32bit(long x) {
-        return -0x80000000L <= x && x < 0x80000000L;
-    }
-
-    public static short safeToShort(int v) {
-        assert isShort(v);
-        return (short) v;
-    }
-
-    public static int roundUp(int number, int mod) {
-        return ((number + mod - 1) / mod) * mod;
-    }
-}
--- a/mx/projects	Wed Oct 03 16:49:51 2012 +0200
+++ b/mx/projects	Wed Oct 03 17:42:12 2012 +0200
@@ -98,14 +98,14 @@
 # graal.lir
 project@com.oracle.graal.lir@subDir=graal
 project@com.oracle.graal.lir@sourceDirs=src
-project@com.oracle.graal.lir@dependencies=com.oracle.max.asm,com.oracle.graal.nodes
+project@com.oracle.graal.lir@dependencies=com.oracle.graal.asm,com.oracle.graal.nodes
 project@com.oracle.graal.lir@checkstyle=com.oracle.graal.graph
 project@com.oracle.graal.lir@javaCompliance=1.7
 
 # graal.lir.amd64
 project@com.oracle.graal.lir.amd64@subDir=graal
 project@com.oracle.graal.lir.amd64@sourceDirs=src
-project@com.oracle.graal.lir.amd64@dependencies=com.oracle.graal.lir,com.oracle.max.asm.amd64
+project@com.oracle.graal.lir.amd64@dependencies=com.oracle.graal.lir,com.oracle.graal.asm.amd64
 project@com.oracle.graal.lir.amd64@checkstyle=com.oracle.graal.graph
 project@com.oracle.graal.lir.amd64@javaCompliance=1.7
 
@@ -227,19 +227,19 @@
 project@com.oracle.graal.jtt@checkstyle=com.oracle.graal.graph
 project@com.oracle.graal.jtt@javaCompliance=1.7
 
-# max.asm
-project@com.oracle.max.asm@subDir=graal
-project@com.oracle.max.asm@sourceDirs=src
-project@com.oracle.max.asm@dependencies=com.oracle.max.criutils
-project@com.oracle.max.asm@checkstyle=com.oracle.graal.graph
-project@com.oracle.max.asm@javaCompliance=1.7
+# graal.asm
+project@com.oracle.graal.asm@subDir=graal
+project@com.oracle.graal.asm@sourceDirs=src
+project@com.oracle.graal.asm@dependencies=com.oracle.max.criutils
+project@com.oracle.graal.asm@checkstyle=com.oracle.graal.graph
+project@com.oracle.graal.asm@javaCompliance=1.7
 
-# max.asm.amd64
-project@com.oracle.max.asm.amd64@subDir=graal
-project@com.oracle.max.asm.amd64@sourceDirs=src
-project@com.oracle.max.asm.amd64@dependencies=com.oracle.max.asm
-project@com.oracle.max.asm.amd64@checkstyle=com.oracle.graal.graph
-project@com.oracle.max.asm.amd64@javaCompliance=1.7
+# graal.asm.amd64
+project@com.oracle.graal.asm.amd64@subDir=graal
+project@com.oracle.graal.asm.amd64@sourceDirs=src
+project@com.oracle.graal.asm.amd64@dependencies=com.oracle.graal.asm
+project@com.oracle.graal.asm.amd64@checkstyle=com.oracle.graal.graph
+project@com.oracle.graal.asm.amd64@javaCompliance=1.7
 
 # max.criutils
 project@com.oracle.max.criutils@subDir=graal
--- a/src/share/vm/runtime/arguments.cpp	Wed Oct 03 16:49:51 2012 +0200
+++ b/src/share/vm/runtime/arguments.cpp	Wed Oct 03 17:42:12 2012 +0200
@@ -2148,7 +2148,7 @@
     // modify its entries, not its name or shape
     const char* graal_projects[] = {
 #ifdef AMD64
-        "com.oracle.max.asm.amd64",
+        "com.oracle.graal.asm.amd64",
         "com.oracle.graal.lir.amd64",
         "com.oracle.graal.compiler.amd64",
         "com.oracle.graal.hotspot.amd64",
@@ -2159,7 +2159,7 @@
         "com.oracle.graal.api.interpreter",
         "com.oracle.max.criutils",
         "com.oracle.graal.hotspot",
-        "com.oracle.max.asm",
+        "com.oracle.graal.asm",
         "com.oracle.graal.alloc",
         "com.oracle.graal.snippets",
         "com.oracle.graal.compiler",