truffle: graal/com.oracle.jvmci.asm.amd64/src/com/oracle/jvmci/asm/amd64/AMD64Assembler.java comparison

comparison graal/com.oracle.jvmci.asm.amd64/src/com/oracle/jvmci/asm/amd64/AMD64Assembler.java @ 21708:6df25b1418be

moved com.oracle.asm.** to jvmci-util.jar (JBS:GRAAL-53)

author	Doug Simon <doug.simon@oracle.com>
date	Wed, 03 Jun 2015 18:06:44 +0200
parents	graal/com.oracle.graal.asm.amd64/src/com/oracle/graal/asm/amd64/AMD64Assembler.java@5024c80224c7
children

comparison

equal deleted inserted replaced

-:e0f311284930
+:6df25b1418be
+/*
+* Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*/
+package com.oracle.jvmci.asm.amd64;
+import com.oracle.jvmci.amd64.*;
+import com.oracle.jvmci.amd64.AMD64.*;
+import com.oracle.jvmci.asm.*;
+import com.oracle.jvmci.code.Register;
+import com.oracle.jvmci.code.TargetDescription;
+import com.oracle.jvmci.code.RegisterConfig;
+import static com.oracle.jvmci.amd64.AMD64.*;
+import static com.oracle.jvmci.asm.NumUtil.*;
+import static com.oracle.jvmci.asm.amd64.AMD64AsmOptions.*;
+import static com.oracle.jvmci.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.*;
+import static com.oracle.jvmci.asm.amd64.AMD64Assembler.AMD64MOp.*;
+import static com.oracle.jvmci.asm.amd64.AMD64Assembler.OperandSize.*;
+import static com.oracle.jvmci.code.MemoryBarriers.*;
+import com.oracle.jvmci.code.Register.RegisterCategory;
+/**
+* This class implements an assembler that can encode most X86 instructions.
+*/
+public class AMD64Assembler extends Assembler {
+private static final int MinEncodingNeedsRex = 8;
+/**
+* A sentinel value used as a place holder in an instruction stream for an address that will be
+* patched.
+*/
+private static final AMD64Address Placeholder = new AMD64Address(rip);
+/**
+* The x86 condition codes used for conditional jumps/moves.
+*/
+public enum ConditionFlag {
+Zero(0x4, "|zero|"),
+NotZero(0x5, "|nzero|"),
+Equal(0x4, "="),
+NotEqual(0x5, "!="),
+Less(0xc, "<"),
+LessEqual(0xe, "<="),
+Greater(0xf, ">"),
+GreaterEqual(0xd, ">="),
+Below(0x2, "|<|"),
+BelowEqual(0x6, "|<=|"),
+Above(0x7, "|>|"),
+AboveEqual(0x3, "|>=|"),
+Overflow(0x0, "|of|"),
+NoOverflow(0x1, "|nof|"),
+CarrySet(0x2, "|carry|"),
+CarryClear(0x3, "|ncarry|"),
+Negative(0x8, "|neg|"),
+Positive(0x9, "|pos|"),
+Parity(0xa, "|par|"),
+NoParity(0xb, "|npar|");
+private final int value;
+private final String operator;
+private ConditionFlag(int value, String operator) {
+this.value = value;
+this.operator = operator;
+}
+public ConditionFlag negate() {
+switch (this) {
+case Zero:
+return NotZero;
+case NotZero:
+return Zero;
+case Equal:
+return NotEqual;
+case NotEqual:
+return Equal;
+case Less:
+return GreaterEqual;
+case LessEqual:
+return Greater;
+case Greater:
+return LessEqual;
+case GreaterEqual:
+return Less;
+case Below:
+return AboveEqual;
+case BelowEqual:
+return Above;
+case Above:
+return BelowEqual;
+case AboveEqual:
+return Below;
+case Overflow:
+return NoOverflow;
+case NoOverflow:
+return Overflow;
+case CarrySet:
+return CarryClear;
+case CarryClear:
+return CarrySet;
+case Negative:
+return Positive;
+case Positive:
+return Negative;
+case Parity:
+return NoParity;
+case NoParity:
+return Parity;
+}
+throw new IllegalArgumentException();
+}
+public int getValue() {
+return value;
+}
+@Override
+public String toString() {
+return operator;
+}
+}
+/**
+* Constants for X86 prefix bytes.
+*/
+private static class Prefix {
+private static final int REX = 0x40;
+private static final int REXB = 0x41;
+private static final int REXX = 0x42;
+private static final int REXXB = 0x43;
+private static final int REXR = 0x44;
+private static final int REXRB = 0x45;
+private static final int REXRX = 0x46;
+private static final int REXRXB = 0x47;
+private static final int REXW = 0x48;
+private static final int REXWB = 0x49;
+private static final int REXWX = 0x4A;
+private static final int REXWXB = 0x4B;
+private static final int REXWR = 0x4C;
+private static final int REXWRB = 0x4D;
+private static final int REXWRX = 0x4E;
+private static final int REXWRXB = 0x4F;
+}
+/**
+* The x86 operand sizes.
+*/
+public static enum OperandSize {
+BYTE(1) {
+@Override
+protected void emitImmediate(AMD64Assembler asm, int imm) {
+assert imm == (byte) imm;
+asm.emitByte(imm);
+}
+},
+WORD(2, 0x66) {
+@Override
+protected void emitImmediate(AMD64Assembler asm, int imm) {
+assert imm == (short) imm;
+asm.emitShort(imm);
+}
+},
+DWORD(4) {
+@Override
+protected void emitImmediate(AMD64Assembler asm, int imm) {
+asm.emitInt(imm);
+}
+},
+QWORD(8) {
+@Override
+protected void emitImmediate(AMD64Assembler asm, int imm) {
+asm.emitInt(imm);
+}
+},
+SS(4, 0xF3, true),
+SD(8, 0xF2, true),
+PS(16, true),
+PD(16, 0x66, true);
+private final int sizePrefix;
+private final int bytes;
+private final boolean xmm;
+private OperandSize(int bytes) {
+this(bytes, 0);
+}
+private OperandSize(int bytes, int sizePrefix) {
+this(bytes, sizePrefix, false);
+}
+private OperandSize(int bytes, boolean xmm) {
+this(bytes, 0, xmm);
+}
+private OperandSize(int bytes, int sizePrefix, boolean xmm) {
+this.sizePrefix = sizePrefix;
+this.bytes = bytes;
+this.xmm = xmm;
+}
+public int getBytes() {
+return bytes;
+}
+public boolean isXmmType() {
+return xmm;
+}
+/**
+* Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
+* as sign-extended 32-bit values.
+*
+* @param asm
+* @param imm
+*/
+protected void emitImmediate(AMD64Assembler asm, int imm) {
+assert false;
+}
+}
+/**
+* Operand size and register type constraints.
+*/
+private static enum OpAssertion {
+ByteAssertion(CPU, CPU, BYTE),
+IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD),
+No16BitAssertion(CPU, CPU, DWORD, QWORD),
+QwordOnlyAssertion(CPU, CPU, QWORD),
+FloatingAssertion(XMM, XMM, SS, SD, PS, PD),
+PackedFloatingAssertion(XMM, XMM, PS, PD),
+SingleAssertion(XMM, XMM, SS),
+DoubleAssertion(XMM, XMM, SD),
+IntToFloatingAssertion(XMM, CPU, DWORD, QWORD),
+FloatingToIntAssertion(CPU, XMM, DWORD, QWORD);
+private final RegisterCategory resultCategory;
+private final RegisterCategory inputCategory;
+private final OperandSize[] allowedSizes;
+private OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
+this.resultCategory = resultCategory;
+this.inputCategory = inputCategory;
+this.allowedSizes = allowedSizes;
+}
+protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
+assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
+assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
+for (OperandSize s : allowedSizes) {
+if (size == s) {
+return true;
+}
+}
+assert false : "invalid operand size " + size + " used in " + op;
+return false;
+}
+}
+/**
+* The register to which {@link Register#Frame} and {@link Register#CallerFrame} are bound.
+*/
+public final Register frameRegister;
+/**
+* Constructs an assembler for the AMD64 architecture.
+*
+* @param registerConfig the register configuration used to bind {@link Register#Frame} and
+*            {@link Register#CallerFrame} to physical registers. This value can be null if this
+*            assembler instance will not be used to assemble instructions using these logical
+*            registers.
+*/
+public AMD64Assembler(TargetDescription target, RegisterConfig registerConfig) {
+super(target);
+this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister();
+}
+private boolean supports(CPUFeature feature) {
+return ((AMD64) target.arch).getFeatures().contains(feature);
+}
+private static int encode(Register r) {
+assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
+return r.encoding & 0x7;
+}
+/**
+* Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
+* register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
+* field. The X bit must be 0.
+*/
+protected static int getRXB(Register reg, Register rm) {
+int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
+rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
+return rxb;
+}
+/**
+* Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
+* are two cases for the memory operand:<br>
+* ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.<br>
+* There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
+*/
+protected static int getRXB(Register reg, AMD64Address rm) {
+int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
+if (!rm.getIndex().equals(Register.None)) {
+rxb |= (rm.getIndex().encoding & 0x08) >> 2;
+}
+if (!rm.getBase().equals(Register.None)) {
+rxb |= (rm.getBase().encoding & 0x08) >> 3;
+}
+return rxb;
+}
+/**
+* Emit the ModR/M byte for one register operand and an opcode extension in the R field.
+* <p>
+* Format: [ 11 reg r/m ]
+*/
+protected void emitModRM(int reg, Register rm) {
+assert (reg & 0x07) == reg;
+emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
+}
+/**
+* Emit the ModR/M byte for two register operands.
+* <p>
+* Format: [ 11 reg r/m ]
+*/
+protected void emitModRM(Register reg, Register rm) {
+emitModRM(reg.encoding & 0x07, rm);
+}
+/**
+* Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
+*/
+protected void emitOperandHelper(Register reg, AMD64Address addr) {
+assert !reg.equals(Register.None);
+emitOperandHelper(encode(reg), addr);
+}
+/**
+* Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
+* extension in the R field.
+*/
+protected void emitOperandHelper(int reg, AMD64Address addr) {
+assert (reg & 0x07) == reg;
+int regenc = reg << 3;
+Register base = addr.getBase();
+Register index = addr.getIndex();
+AMD64Address.Scale scale = addr.getScale();
+int disp = addr.getDisplacement();
+if (base.equals(Register.Frame)) {
+assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration";
+base = frameRegister;
+}
+if (base.equals(AMD64.rip)) { // also matches Placeholder
+// [00 000 101] disp32
+assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
+emitByte(0x05 | regenc);
+emitInt(disp);
+} else if (base.isValid()) {
+int baseenc = base.isValid() ? encode(base) : 0;
+if (index.isValid()) {
+int indexenc = encode(index) << 3;
+// [base + indexscale + disp]
+if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
+// [base + indexscale]
+// [00 reg 100][ss index base]
+assert !index.equals(rsp) : "illegal addressing mode";
+emitByte(0x04 | regenc);
+emitByte(scale.log2 << 6 | indexenc | baseenc);
+} else if (isByte(disp)) {
+// [base + indexscale + imm8]
+// [01 reg 100][ss index base] imm8
+assert !index.equals(rsp) : "illegal addressing mode";
+emitByte(0x44 | regenc);
+emitByte(scale.log2 << 6 | indexenc | baseenc);
+emitByte(disp & 0xFF);
+} else {
+// [base + indexscale + disp32]
+// [10 reg 100][ss index base] disp32
+assert !index.equals(rsp) : "illegal addressing mode";
+emitByte(0x84 | regenc);
+emitByte(scale.log2 << 6 | indexenc | baseenc);
+emitInt(disp);
+}
+} else if (base.equals(rsp) || base.equals(r12)) {
+// [rsp + disp]
+if (disp == 0) {
+// [rsp]
+// [00 reg 100][00 100 100]
+emitByte(0x04 | regenc);
+emitByte(0x24);
+} else if (isByte(disp)) {
+// [rsp + imm8]
+// [01 reg 100][00 100 100] disp8
+emitByte(0x44 | regenc);
+emitByte(0x24);
+emitByte(disp & 0xFF);
+} else {
+// [rsp + imm32]
+// [10 reg 100][00 100 100] disp32
+emitByte(0x84 | regenc);
+emitByte(0x24);
+emitInt(disp);
+}
+} else {
+// [base + disp]
+assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
+if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
+// [base]
+// [00 reg base]
+emitByte(0x00 | regenc | baseenc);
+} else if (isByte(disp)) {
+// [base + disp8]
+// [01 reg base] disp8
+emitByte(0x40 | regenc | baseenc);
+emitByte(disp & 0xFF);
+} else {
+// [base + disp32]
+// [10 reg base] disp32
+emitByte(0x80 | regenc | baseenc);
+emitInt(disp);
+}
+}
+} else {
+if (index.isValid()) {
+int indexenc = encode(index) << 3;
+// [indexscale + disp]
+// [00 reg 100][ss index 101] disp32
+assert !index.equals(rsp) : "illegal addressing mode";
+emitByte(0x04 | regenc);
+emitByte(scale.log2 << 6 | indexenc | 0x05);
+emitInt(disp);
+} else {
+// [disp] ABSOLUTE
+// [00 reg 100][00 100 101] disp32
+emitByte(0x04 | regenc);
+emitByte(0x25);
+emitInt(disp);
+}
+}
+}
+/**
+* Base class for AMD64 opcodes.
+*/
+public static class AMD64Op {
+protected static final int P_0F = 0x0F;
+protected static final int P_0F38 = 0x380F;
+protected static final int P_0F3A = 0x3A0F;
+private final String opcode;
+private final int prefix1;
+private final int prefix2;
+private final int op;
+private final boolean dstIsByte;
+private final boolean srcIsByte;
+private final OpAssertion assertion;
+private final CPUFeature feature;
+protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
+this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
+}
+protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
+this.opcode = opcode;
+this.prefix1 = prefix1;
+this.prefix2 = prefix2;
+this.op = op;
+this.dstIsByte = dstIsByte;
+this.srcIsByte = srcIsByte;
+this.assertion = assertion;
+this.feature = feature;
+}
+protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
+if (prefix1 != 0) {
+asm.emitByte(prefix1);
+}
+if (size.sizePrefix != 0) {
+asm.emitByte(size.sizePrefix);
+}
+int rexPrefix = 0x40 | rxb;
+if (size == QWORD) {
+rexPrefix |= 0x08;
+}
+if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
+asm.emitByte(rexPrefix);
+}
+if (prefix2 > 0xFF) {
+asm.emitShort(prefix2);
+} else if (prefix2 > 0) {
+asm.emitByte(prefix2);
+}
+asm.emitByte(op);
+}
+protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
+assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
+assert assertion.checkOperands(this, size, resultReg, inputReg);
+return true;
+}
+@Override
+public String toString() {
+return opcode;
+}
+}
+/**
+* Base class for AMD64 opcodes with immediate operands.
+*/
+public static class AMD64ImmOp extends AMD64Op {
+private final boolean immIsByte;
+protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
+super(opcode, 0, prefix, op, assertion, null);
+this.immIsByte = immIsByte;
+}
+protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
+if (immIsByte) {
+assert imm == (byte) imm;
+asm.emitByte(imm);
+} else {
+size.emitImmediate(asm, imm);
+}
+}
+}
+/**
+* Opcode with operand order of either RM or MR.
+*/
+public abstract static class AMD64RROp extends AMD64Op {
+protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
+super(opcode, prefix1, prefix2, op, assertion, feature);
+}
+protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
+super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
+}
+public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
+}
+/**
+* Opcode with operand order of RM.
+*/
+public static class AMD64RMOp extends AMD64RROp {
+// @formatter:off
+public static final AMD64RMOp IMUL   = new AMD64RMOp("IMUL",         P_0F, 0xAF);
+public static final AMD64RMOp BSF    = new AMD64RMOp("BSF",          P_0F, 0xBC);
+public static final AMD64RMOp BSR    = new AMD64RMOp("BSR",          P_0F, 0xBD);
+public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
+public static final AMD64RMOp TZCNT  = new AMD64RMOp("TZCNT",  0xF3, P_0F, 0xBC, CPUFeature.BMI1);
+public static final AMD64RMOp LZCNT  = new AMD64RMOp("LZCNT",  0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
+public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB",       P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion);
+public static final AMD64RMOp MOVZX  = new AMD64RMOp("MOVZX",        P_0F, 0xB7, OpAssertion.No16BitAssertion);
+public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB",       P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion);
+public static final AMD64RMOp MOVSX  = new AMD64RMOp("MOVSX",        P_0F, 0xBF, OpAssertion.No16BitAssertion);
+public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD",             0x63, OpAssertion.QwordOnlyAssertion);
+public static final AMD64RMOp MOVB   = new AMD64RMOp("MOVB",               0x8A, OpAssertion.ByteAssertion);
+public static final AMD64RMOp MOV    = new AMD64RMOp("MOV",                0x8B);
+// MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
+public static final AMD64RMOp MOVD   = new AMD64RMOp("MOVD",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
+public static final AMD64RMOp MOVQ   = new AMD64RMOp("MOVQ",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
+public static final AMD64RMOp MOVSS  = new AMD64RMOp("MOVSS",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
+public static final AMD64RMOp MOVSD  = new AMD64RMOp("MOVSD",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
+// TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
+public static final AMD64RMOp TESTB  = new AMD64RMOp("TEST",               0x84, OpAssertion.ByteAssertion);
+public static final AMD64RMOp TEST   = new AMD64RMOp("TEST",               0x85);
+// @formatter:on
+protected AMD64RMOp(String opcode, int op) {
+this(opcode, 0, op);
+}
+protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
+this(opcode, 0, op, assertion);
+}
+protected AMD64RMOp(String opcode, int prefix, int op) {
+this(opcode, 0, prefix, op, null);
+}
+protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
+this(opcode, 0, prefix, op, assertion, null);
+}
+protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
+this(opcode, 0, prefix, op, assertion, feature);
+}
+protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
+super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
+}
+protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
+this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
+}
+protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
+super(opcode, prefix1, prefix2, op, assertion, feature);
+}
+@Override
+public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
+assert verify(asm, size, dst, src);
+emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
+asm.emitModRM(dst, src);
+}
+public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
+assert verify(asm, size, dst, null);
+emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
+asm.emitOperandHelper(dst, src);
+}
+}
+/**
+* Opcode with operand order of MR.
+*/
+public static class AMD64MROp extends AMD64RROp {
+// @formatter:off
+public static final AMD64MROp MOVB   = new AMD64MROp("MOVB",               0x88, OpAssertion.ByteAssertion);
+public static final AMD64MROp MOV    = new AMD64MROp("MOV",                0x89);
+// MOVD and MOVQ are the same opcode, just with different operand size prefix
+// Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
+public static final AMD64MROp MOVD   = new AMD64MROp("MOVD",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
+public static final AMD64MROp MOVQ   = new AMD64MROp("MOVQ",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
+// MOVSS and MOVSD are the same opcode, just with different operand size prefix
+public static final AMD64MROp MOVSS  = new AMD64MROp("MOVSS",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
+public static final AMD64MROp MOVSD  = new AMD64MROp("MOVSD",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
+// @formatter:on
+protected AMD64MROp(String opcode, int op) {
+this(opcode, 0, op);
+}
+protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
+this(opcode, 0, op, assertion);
+}
+protected AMD64MROp(String opcode, int prefix, int op) {
+this(opcode, prefix, op, OpAssertion.IntegerAssertion);
+}
+protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
+this(opcode, prefix, op, assertion, null);
+}
+protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
+this(opcode, 0, prefix, op, assertion, feature);
+}
+protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
+super(opcode, prefix1, prefix2, op, assertion, feature);
+}
+@Override
+public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
+assert verify(asm, size, src, dst);
+emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
+asm.emitModRM(src, dst);
+}
+public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
+assert verify(asm, size, null, src);
+emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
+asm.emitOperandHelper(src, dst);
+}
+}
+/**
+* Opcodes with operand order of M.
+*/
+public static class AMD64MOp extends AMD64Op {
+// @formatter:off
+public static final AMD64MOp NOT  = new AMD64MOp("NOT",  0xF7, 2);
+public static final AMD64MOp NEG  = new AMD64MOp("NEG",  0xF7, 3);
+public static final AMD64MOp MUL  = new AMD64MOp("MUL",  0xF7, 4);
+public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
+public static final AMD64MOp DIV  = new AMD64MOp("DIV",  0xF7, 6);
+public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
+public static final AMD64MOp INC  = new AMD64MOp("INC",  0xFF, 0);
+public static final AMD64MOp DEC  = new AMD64MOp("DEC",  0xFF, 1);
+// @formatter:on
+private final int ext;
+protected AMD64MOp(String opcode, int op, int ext) {
+this(opcode, 0, op, ext);
+}
+protected AMD64MOp(String opcode, int prefix, int op, int ext) {
+this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion);
+}
+protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
+super(opcode, 0, prefix, op, assertion, null);
+this.ext = ext;
+}
+public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
+assert verify(asm, size, dst, null);
+emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
+asm.emitModRM(ext, dst);
+}
+public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
+assert verify(asm, size, null, null);
+emitOpcode(asm, size, getRXB(null, dst), 0, 0);
+asm.emitOperandHelper(ext, dst);
+}
+}
+/**
+* Opcodes with operand order of MI.
+*/
+public static class AMD64MIOp extends AMD64ImmOp {
+// @formatter:off
+public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true,  0xC6, 0, OpAssertion.ByteAssertion);
+public static final AMD64MIOp MOV  = new AMD64MIOp("MOV",  false, 0xC7, 0);
+public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
+// @formatter:on
+private final int ext;
+protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
+this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion);
+}
+protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
+this(opcode, immIsByte, 0, op, ext, assertion);
+}
+protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
+super(opcode, immIsByte, prefix, op, assertion);
+this.ext = ext;
+}
+public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
+assert verify(asm, size, dst, null);
+emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
+asm.emitModRM(ext, dst);
+emitImmediate(asm, size, imm);
+}
+public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
+assert verify(asm, size, null, null);
+emitOpcode(asm, size, getRXB(null, dst), 0, 0);
+asm.emitOperandHelper(ext, dst);
+emitImmediate(asm, size, imm);
+}
+}
+/**
+* Opcodes with operand order of RMI.
+*/
+public static class AMD64RMIOp extends AMD64ImmOp {
+// @formatter:off
+public static final AMD64RMIOp IMUL    = new AMD64RMIOp("IMUL", false, 0x69);
+public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true,  0x6B);
+// @formatter:on
+protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
+this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion);
+}
+protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
+super(opcode, immIsByte, prefix, op, assertion);
+}
+public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
+assert verify(asm, size, dst, src);
+emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
+asm.emitModRM(dst, src);
+emitImmediate(asm, size, imm);
+}
+public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
+assert verify(asm, size, dst, null);
+emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
+asm.emitOperandHelper(dst, src);
+emitImmediate(asm, size, imm);
+}
+}
+public static class SSEOp extends AMD64RMOp {
+// @formatter:off
+public static final SSEOp CVTSI2SS  = new SSEOp("CVTSI2SS",  0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
+public static final SSEOp CVTSI2SD  = new SSEOp("CVTSI2SS",  0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
+public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
+public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
+public static final SSEOp UCOMIS    = new SSEOp("UCOMIS",          P_0F, 0x2E, OpAssertion.PackedFloatingAssertion);
+public static final SSEOp SQRT      = new SSEOp("SQRT",            P_0F, 0x51);
+public static final SSEOp AND       = new SSEOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
+public static final SSEOp ANDN      = new SSEOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
+public static final SSEOp OR        = new SSEOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
+public static final SSEOp XOR       = new SSEOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
+public static final SSEOp ADD       = new SSEOp("ADD",             P_0F, 0x58);
+public static final SSEOp MUL       = new SSEOp("MUL",             P_0F, 0x59);
+public static final SSEOp CVTSS2SD  = new SSEOp("CVTSS2SD",        P_0F, 0x5A, OpAssertion.SingleAssertion);
+public static final SSEOp CVTSD2SS  = new SSEOp("CVTSD2SS",        P_0F, 0x5A, OpAssertion.DoubleAssertion);
+public static final SSEOp SUB       = new SSEOp("SUB",             P_0F, 0x5C);
+public static final SSEOp MIN       = new SSEOp("MIN",             P_0F, 0x5D);
+public static final SSEOp DIV       = new SSEOp("DIV",             P_0F, 0x5E);
+public static final SSEOp MAX       = new SSEOp("MAX",             P_0F, 0x5F);
+// @formatter:on
+protected SSEOp(String opcode, int prefix, int op) {
+this(opcode, prefix, op, OpAssertion.FloatingAssertion);
+}
+protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
+this(opcode, 0, prefix, op, assertion);
+}
+protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
+super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
+}
+}
+/**
+* Arithmetic operation with operand order of RM, MR or MI.
+*/
+public static final class AMD64BinaryArithmetic {
+// @formatter:off
+public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
+public static final AMD64BinaryArithmetic OR  = new AMD64BinaryArithmetic("OR",  1);
+public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
+public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
+public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
+public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
+public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
+public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
+// @formatter:on
+private final AMD64MIOp byteImmOp;
+private final AMD64MROp byteMrOp;
+private final AMD64RMOp byteRmOp;
+private final AMD64MIOp immOp;
+private final AMD64MIOp immSxOp;
+private final AMD64MROp mrOp;
+private final AMD64RMOp rmOp;
+private AMD64BinaryArithmetic(String opcode, int code) {
+int baseOp = code << 3;
+byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
+byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
+byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
+immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion);
+immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion);
+mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion);
+rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion);
+}
+public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
+if (size == BYTE) {
+return byteImmOp;
+} else if (sx) {
+return immSxOp;
+} else {
+return immOp;
+}
+}
+public AMD64MROp getMROpcode(OperandSize size) {
+if (size == BYTE) {
+return byteMrOp;
+} else {
+return mrOp;
+}
+}
+public AMD64RMOp getRMOpcode(OperandSize size) {
+if (size == BYTE) {
+return byteRmOp;
+} else {
+return rmOp;
+}
+}
+}
+/**
+* Shift operation with operand order of M1, MC or MI.
+*/
+public static final class AMD64Shift {
+// @formatter:off
+public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
+public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
+public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
+public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
+public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
+public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
+public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
+// @formatter:on
+public final AMD64MOp m1Op;
+public final AMD64MOp mcOp;
+public final AMD64MIOp miOp;
+private AMD64Shift(String opcode, int code) {
+m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion);
+mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion);
+miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion);
+}
+}
+public final void addl(AMD64Address dst, int imm32) {
+ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
+}
+public final void addl(Register dst, int imm32) {
+ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
+}
+private void addrNop4() {
+// 4 bytes: NOP DWORD PTR [EAX+0]
+emitByte(0x0F);
+emitByte(0x1F);
+emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
+emitByte(0); // 8-bits offset (1 byte)
+}
+private void addrNop5() {
+// 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
+emitByte(0x0F);
+emitByte(0x1F);
+emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
+emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
+emitByte(0); // 8-bits offset (1 byte)
+}
+private void addrNop7() {
+// 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
+emitByte(0x0F);
+emitByte(0x1F);
+emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
+emitInt(0); // 32-bits offset (4 bytes)
+}
+private void addrNop8() {
+// 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
+emitByte(0x0F);
+emitByte(0x1F);
+emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
+emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
+emitInt(0); // 32-bits offset (4 bytes)
+}
+public final void andl(Register dst, int imm32) {
+AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
+}
+public final void bswapl(Register reg) {
+int encode = prefixAndEncode(reg.encoding);
+emitByte(0x0F);
+emitByte(0xC8 | encode);
+}
+public final void cdql() {
+emitByte(0x99);
+}
+public final void cmovl(ConditionFlag cc, Register dst, Register src) {
+int encode = prefixAndEncode(dst.encoding, src.encoding);
+emitByte(0x0F);
+emitByte(0x40 | cc.getValue());
+emitByte(0xC0 | encode);
+}
+public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
+prefix(src, dst);
+emitByte(0x0F);
+emitByte(0x40 | cc.getValue());
+emitOperandHelper(dst, src);
+}
+public final void cmpl(Register dst, int imm32) {
+CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
+}
+public final void cmpl(Register dst, Register src) {
+CMP.rmOp.emit(this, DWORD, dst, src);
+}
+public final void cmpl(Register dst, AMD64Address src) {
+CMP.rmOp.emit(this, DWORD, dst, src);
+}
+public final void cmpl(AMD64Address dst, int imm32) {
+CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
+}
+// The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
+// and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
+// The ZF is set if the compared values were equal, and cleared otherwise.
+public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
+prefix(adr, reg);
+emitByte(0x0F);
+emitByte(0xB1);
+emitOperandHelper(reg, adr);
+}
+protected final void decl(AMD64Address dst) {
+prefix(dst);
+emitByte(0xFF);
+emitOperandHelper(1, dst);
+}
+public final void hlt() {
+emitByte(0xF4);
+}
+public final void imull(Register dst, Register src, int value) {
+if (isByte(value)) {
+AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
+} else {
+AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
+}
+}
+protected final void incl(AMD64Address dst) {
+prefix(dst);
+emitByte(0xFF);
+emitOperandHelper(0, dst);
+}
+public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
+int shortSize = 2;
+int longSize = 6;
+long disp = jumpTarget - position();
+if (!forceDisp32 && isByte(disp - shortSize)) {
+// 0111 tttn #8-bit disp
+emitByte(0x70 | cc.getValue());
+emitByte((int) ((disp - shortSize) & 0xFF));
+} else {
+// 0000 1111 1000 tttn #32-bit disp
+assert isInt(disp - longSize) : "must be 32bit offset (call4)";
+emitByte(0x0F);
+emitByte(0x80 | cc.getValue());
+emitInt((int) (disp - longSize));
+}
+}
+public final void jcc(ConditionFlag cc, Label l) {
+assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
+if (l.isBound()) {
+jcc(cc, l.position(), false);
+} else {
+// Note: could eliminate cond. jumps to this jump if condition
+// is the same however, seems to be rather unlikely case.
+// Note: use jccb() if label to be bound is very close to get
+// an 8-bit displacement
+l.addPatchAt(position());
+emitByte(0x0F);
+emitByte(0x80 | cc.getValue());
+emitInt(0);
+}
+}
+public final void jccb(ConditionFlag cc, Label l) {
+if (l.isBound()) {
+int shortSize = 2;
+int entry = l.position();
+assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
+long disp = entry - position();
+// 0111 tttn #8-bit disp
+emitByte(0x70 | cc.getValue());
+emitByte((int) ((disp - shortSize) & 0xFF));
+} else {
+l.addPatchAt(position());
+emitByte(0x70 | cc.getValue());
+emitByte(0);
+}
+}
+public final void jmp(int jumpTarget, boolean forceDisp32) {
+int shortSize = 2;
+int longSize = 5;
+long disp = jumpTarget - position();
+if (!forceDisp32 && isByte(disp - shortSize)) {
+emitByte(0xEB);
+emitByte((int) ((disp - shortSize) & 0xFF));
+} else {
+emitByte(0xE9);
+emitInt((int) (disp - longSize));
+}
+}
+@Override
+public final void jmp(Label l) {
+if (l.isBound()) {
+jmp(l.position(), false);
+} else {
+// By default, forward jumps are always 32-bit displacements, since
+// we can't yet know where the label will be bound. If you're sure that
+// the forward jump will not run beyond 256 bytes, use jmpb to
+// force an 8-bit displacement.
+l.addPatchAt(position());
+emitByte(0xE9);
+emitInt(0);
+}
+}
+public final void jmp(Register entry) {
+int encode = prefixAndEncode(entry.encoding);
+emitByte(0xFF);
+emitByte(0xE0 | encode);
+}
+public final void jmpb(Label l) {
+if (l.isBound()) {
+int shortSize = 2;
+int entry = l.position();
+assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
+long offs = entry - position();
+emitByte(0xEB);
+emitByte((int) ((offs - shortSize) & 0xFF));
+} else {
+l.addPatchAt(position());
+emitByte(0xEB);
+emitByte(0);
+}
+}
+public final void leaq(Register dst, AMD64Address src) {
+prefixq(src, dst);
+emitByte(0x8D);
+emitOperandHelper(dst, src);
+}
+public final void leave() {
+emitByte(0xC9);
+}
+public final void lock() {
+emitByte(0xF0);
+}
+public final void movapd(Register dst, Register src) {
+assert dst.getRegisterCategory().equals(AMD64.XMM);
+assert src.getRegisterCategory().equals(AMD64.XMM);
+int dstenc = dst.encoding;
+int srcenc = src.encoding;
+emitByte(0x66);
+if (dstenc < 8) {
+if (srcenc >= 8) {
+emitByte(Prefix.REXB);
+srcenc -= 8;
+}
+} else {
+if (srcenc < 8) {
+emitByte(Prefix.REXR);
+} else {
+emitByte(Prefix.REXRB);
+srcenc -= 8;
+}
+dstenc -= 8;
+}
+emitByte(0x0F);
+emitByte(0x28);
+emitByte(0xC0 | dstenc << 3 | srcenc);
+}
+public final void movaps(Register dst, Register src) {
+assert dst.getRegisterCategory().equals(AMD64.XMM);
+assert src.getRegisterCategory().equals(AMD64.XMM);
+int dstenc = dst.encoding;
+int srcenc = src.encoding;
+if (dstenc < 8) {
+if (srcenc >= 8) {
+emitByte(Prefix.REXB);
+srcenc -= 8;
+}
+} else {
+if (srcenc < 8) {
+emitByte(Prefix.REXR);
+} else {
+emitByte(Prefix.REXRB);
+srcenc -= 8;
+}
+dstenc -= 8;
+}
+emitByte(0x0F);
+emitByte(0x28);
+emitByte(0xC0 | dstenc << 3 | srcenc);
+}
+public final void movb(AMD64Address dst, int imm8) {
+prefix(dst);
+emitByte(0xC6);
+emitOperandHelper(0, dst);
+emitByte(imm8);
+}
+public final void movb(AMD64Address dst, Register src) {
+assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
+prefix(dst, src, true);
+emitByte(0x88);
+emitOperandHelper(src, dst);
+}
+public final void movl(Register dst, int imm32) {
+int encode = prefixAndEncode(dst.encoding);
+emitByte(0xB8 | encode);
+emitInt(imm32);
+}
+public final void movl(Register dst, Register src) {
+int encode = prefixAndEncode(dst.encoding, src.encoding);
+emitByte(0x8B);
+emitByte(0xC0 | encode);
+}
+public final void movl(Register dst, AMD64Address src) {
+prefix(src, dst);
+emitByte(0x8B);
+emitOperandHelper(dst, src);
+}
+public final void movl(AMD64Address dst, int imm32) {
+prefix(dst);
+emitByte(0xC7);
+emitOperandHelper(0, dst);
+emitInt(imm32);
+}
+public final void movl(AMD64Address dst, Register src) {
+prefix(dst, src);
+emitByte(0x89);
+emitOperandHelper(src, dst);
+}
+/**
+* New CPUs require use of movsd and movss to avoid partial register stall when loading from
+* memory. But for old Opteron use movlpd instead of movsd. The selection is done in
+* {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
+* {@link AMD64MacroAssembler#movflt(Register, Register)}.
+*/
+public final void movlpd(Register dst, AMD64Address src) {
+assert dst.getRegisterCategory().equals(AMD64.XMM);
+emitByte(0x66);
+prefix(src, dst);
+emitByte(0x0F);
+emitByte(0x12);
+emitOperandHelper(dst, src);
+}
+public final void movq(Register dst, AMD64Address src) {
+if (dst.getRegisterCategory().equals(AMD64.XMM)) {
+emitByte(0xF3);
+prefixq(src, dst);
+emitByte(0x0F);
+emitByte(0x7E);
+emitOperandHelper(dst, src);
+} else {
+prefixq(src, dst);
+emitByte(0x8B);
+emitOperandHelper(dst, src);
+}
+}
+public final void movq(Register dst, Register src) {
+int encode = prefixqAndEncode(dst.encoding, src.encoding);
+emitByte(0x8B);
+emitByte(0xC0 | encode);
+}
+public final void movq(AMD64Address dst, Register src) {
+if (src.getRegisterCategory().equals(AMD64.XMM)) {
+emitByte(0x66);
+prefixq(dst, src);
+emitByte(0x0F);
+emitByte(0xD6);
+emitOperandHelper(src, dst);
+} else {
+prefixq(dst, src);
+emitByte(0x89);
+emitOperandHelper(src, dst);
+}
+}
+public final void movsbl(Register dst, AMD64Address src) {
+prefix(src, dst);
+emitByte(0x0F);
+emitByte(0xBE);
+emitOperandHelper(dst, src);
+}
+public final void movsbl(Register dst, Register src) {
+int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
+emitByte(0x0F);
+emitByte(0xBE);
+emitByte(0xC0 | encode);
+}
+public final void movsbq(Register dst, AMD64Address src) {
+prefixq(src, dst);
+emitByte(0x0F);
+emitByte(0xBE);
+emitOperandHelper(dst, src);
+}
+public final void movsbq(Register dst, Register src) {
+int encode = prefixqAndEncode(dst.encoding, src.encoding);
+emitByte(0x0F);
+emitByte(0xBE);
+emitByte(0xC0 | encode);
+}
+public final void movsd(Register dst, Register src) {
+assert dst.getRegisterCategory().equals(AMD64.XMM);
+assert src.getRegisterCategory().equals(AMD64.XMM);
+emitByte(0xF2);
+int encode = prefixAndEncode(dst.encoding, src.encoding);
+emitByte(0x0F);
+emitByte(0x10);
+emitByte(0xC0 | encode);
+}
+public final void movsd(Register dst, AMD64Address src) {
+assert dst.getRegisterCategory().equals(AMD64.XMM);
+emitByte(0xF2);
+prefix(src, dst);
+emitByte(0x0F);
+emitByte(0x10);
+emitOperandHelper(dst, src);
+}
+public final void movsd(AMD64Address dst, Register src) {
+assert src.getRegisterCategory().equals(AMD64.XMM);
+emitByte(0xF2);
+prefix(dst, src);
+emitByte(0x0F);
+emitByte(0x11);
+emitOperandHelper(src, dst);
+}
+public final void movss(Register dst, Register src) {
+assert dst.getRegisterCategory().equals(AMD64.XMM);
+assert src.getRegisterCategory().equals(AMD64.XMM);
+emitByte(0xF3);
+int encode = prefixAndEncode(dst.encoding, src.encoding);
+emitByte(0x0F);
+emitByte(0x10);
+emitByte(0xC0 | encode);
+}
+public final void movss(Register dst, AMD64Address src) {
+assert dst.getRegisterCategory().equals(AMD64.XMM);
+emitByte(0xF3);
+prefix(src, dst);
+emitByte(0x0F);
+emitByte(0x10);
+emitOperandHelper(dst, src);
+}
+public final void movss(AMD64Address dst, Register src) {
+assert src.getRegisterCategory().equals(AMD64.XMM);
+emitByte(0xF3);
+prefix(dst, src);
+emitByte(0x0F);
+emitByte(0x11);
+emitOperandHelper(src, dst);
+}
+public final void movswl(Register dst, AMD64Address src) {
+prefix(src, dst);
+emitByte(0x0F);
+emitByte(0xBF);
+emitOperandHelper(dst, src);
+}
+public final void movw(AMD64Address dst, int imm16) {
+emitByte(0x66); // switch to 16-bit mode
+prefix(dst);
+emitByte(0xC7);
+emitOperandHelper(0, dst);
+emitShort(imm16);
+}
+public final void movw(AMD64Address dst, Register src) {
+emitByte(0x66);
+prefix(dst, src);
+emitByte(0x89);
+emitOperandHelper(src, dst);
+}
+public final void movzbl(Register dst, AMD64Address src) {
+prefix(src, dst);
+emitByte(0x0F);
+emitByte(0xB6);
+emitOperandHelper(dst, src);
+}
+public final void movzwl(Register dst, AMD64Address src) {
+prefix(src, dst);
+emitByte(0x0F);
+emitByte(0xB7);
+emitOperandHelper(dst, src);
+}
+@Override
+public final void ensureUniquePC() {
+nop();
+}
+public final void nop() {
+nop(1);
+}
+public void nop(int count) {
+int i = count;
+if (UseNormalNop) {
+assert i > 0 : " ";
+// The fancy nops aren't currently recognized by debuggers making it a
+// pain to disassemble code while debugging. If assert are on clearly
+// speed is not an issue so simply use the single byte traditional nop
+// to do alignment.
+for (; i > 0; i--) {
+emitByte(0x90);
+}
+return;
+}
+if (UseAddressNop) {
+//
+// Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
+// 1: 0x90
+// 2: 0x66 0x90
+// 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
+// 4: 0x0F 0x1F 0x40 0x00
+// 5: 0x0F 0x1F 0x44 0x00 0x00
+// 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
+// 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
+// 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+// 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+// 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+// 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+// The rest coding is AMD specific - use consecutive Address nops
+// 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
+// 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
+// 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
+// 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
+// 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+// Size prefixes (0x66) are added for larger sizes
+while (i >= 22) {
+i -= 11;
+emitByte(0x66); // size prefix
+emitByte(0x66); // size prefix
+emitByte(0x66); // size prefix
+addrNop8();
+}
+// Generate first nop for size between 21-12
+switch (i) {
+case 21:
+i -= 1;
+emitByte(0x66); // size prefix
+// fall through
+case 20:
+// fall through
+case 19:
+i -= 1;
+emitByte(0x66); // size prefix
+// fall through
+case 18:
+// fall through
+case 17:
+i -= 1;
+emitByte(0x66); // size prefix
+// fall through
+case 16:
+// fall through
+case 15:
+i -= 8;
+addrNop8();
+break;
+case 14:
+case 13:
+i -= 7;
+addrNop7();
+break;
+case 12:
+i -= 6;
+emitByte(0x66); // size prefix
+addrNop5();
+break;
+default:
+assert i < 12;
+}
+// Generate second nop for size between 11-1
+switch (i) {
+case 11:
+emitByte(0x66); // size prefix
+emitByte(0x66); // size prefix
+emitByte(0x66); // size prefix
+addrNop8();
+break;
+case 10:
+emitByte(0x66); // size prefix
+emitByte(0x66); // size prefix
+addrNop8();
+break;
+case 9:
+emitByte(0x66); // size prefix
+addrNop8();
+break;
+case 8:
+addrNop8();
+break;
+case 7:
+addrNop7();
+break;
+case 6:
+emitByte(0x66); // size prefix
+addrNop5();
+break;
+case 5:
+addrNop5();
+break;
+case 4:
+addrNop4();
+break;
+case 3:
+// Don't use "0x0F 0x1F 0x00" - need patching safe padding
+emitByte(0x66); // size prefix
+emitByte(0x66); // size prefix
+emitByte(0x90); // nop
+break;
+case 2:
+emitByte(0x66); // size prefix
+emitByte(0x90); // nop
+break;
+case 1:
+emitByte(0x90); // nop
+break;
+default:
+assert i == 0;
+}
+return;
+}
+// Using nops with size prefixes "0x66 0x90".
+// From AMD Optimization Guide:
+// 1: 0x90
+// 2: 0x66 0x90
+// 3: 0x66 0x66 0x90
+// 4: 0x66 0x66 0x66 0x90
+// 5: 0x66 0x66 0x90 0x66 0x90
+// 6: 0x66 0x66 0x90 0x66 0x66 0x90
+// 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
+// 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
+// 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
+// 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
+//
+while (i > 12) {
+i -= 4;
+emitByte(0x66); // size prefix
+emitByte(0x66);
+emitByte(0x66);
+emitByte(0x90); // nop
+}
+// 1 - 12 nops
+if (i > 8) {
+if (i > 9) {
+i -= 1;
+emitByte(0x66);
+}
+i -= 3;
+emitByte(0x66);
+emitByte(0x66);
+emitByte(0x90);
+}
+// 1 - 8 nops
+if (i > 4) {
+if (i > 6) {
+i -= 1;
+emitByte(0x66);
+}
+i -= 3;
+emitByte(0x66);
+emitByte(0x66);
+emitByte(0x90);
+}
+switch (i) {
+case 4:
+emitByte(0x66);
+emitByte(0x66);
+emitByte(0x66);
+emitByte(0x90);
+break;
+case 3:
+emitByte(0x66);
+emitByte(0x66);
+emitByte(0x90);
+break;
+case 2:
+emitByte(0x66);
+emitByte(0x90);
+break;
+case 1:
+emitByte(0x90);
+break;
+default:
+assert i == 0;
+}
+}
+public final void pop(Register dst) {
+int encode = prefixAndEncode(dst.encoding);
+emitByte(0x58 | encode);
+}
+public void popfq() {
+emitByte(0x9D);
+}
+public final void ptest(Register dst, Register src) {
+assert supports(CPUFeature.SSE4_1);
+emitByte(0x66);
+int encode = prefixAndEncode(dst.encoding, src.encoding);
+emitByte(0x0F);
+emitByte(0x38);
+emitByte(0x17);
+emitByte(0xC0 | encode);
+}
+public final void push(Register src) {
+int encode = prefixAndEncode(src.encoding);
+emitByte(0x50 | encode);
+}
+public void pushfq() {
+emitByte(0x9c);
+}
+public final void pxor(Register dst, Register src) {
+emitByte(0x66);
+int encode = prefixAndEncode(dst.encoding, src.encoding);
+emitByte(0x0F);
+emitByte(0xEF);
+emitByte(0xC0 | encode);
+}
+public final void ret(int imm16) {
+if (imm16 == 0) {
+emitByte(0xC3);
+} else {
+emitByte(0xC2);
+emitShort(imm16);
+}
+}
+public final void subl(AMD64Address dst, int imm32) {
+SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
+}
+public final void subl(Register dst, int imm32) {
+SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
+}
+public final void testl(Register dst, int imm32) {
+// not using emitArith because test
+// doesn't support sign-extension of
+// 8bit operands
+int encode = dst.encoding;
+if (encode == 0) {
+emitByte(0xA9);
+} else {
+encode = prefixAndEncode(encode);
+emitByte(0xF7);
+emitByte(0xC0 | encode);
+}
+emitInt(imm32);
+}
+public final void testl(Register dst, Register src) {
+int encode = prefixAndEncode(dst.encoding, src.encoding);
+emitByte(0x85);
+emitByte(0xC0 | encode);
+}
+public final void testl(Register dst, AMD64Address src) {
+prefix(src, dst);
+emitByte(0x85);
+emitOperandHelper(dst, src);
+}
+public final void xorl(Register dst, Register src) {
+XOR.rmOp.emit(this, DWORD, dst, src);
+}
+public final void xorpd(Register dst, Register src) {
+emitByte(0x66);
+xorps(dst, src);
+}
+public final void xorps(Register dst, Register src) {
+assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
+int encode = prefixAndEncode(dst.encoding, src.encoding);
+emitByte(0x0F);
+emitByte(0x57);
+emitByte(0xC0 | encode);
+}
+protected final void decl(Register dst) {
+// Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
+int encode = prefixAndEncode(dst.encoding);
+emitByte(0xFF);
+emitByte(0xC8 | encode);
+}
+protected final void incl(Register dst) {
+// Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
+int encode = prefixAndEncode(dst.encoding);
+emitByte(0xFF);
+emitByte(0xC0 | encode);
+}
+private int prefixAndEncode(int regEnc) {
+return prefixAndEncode(regEnc, false);
+}
+private int prefixAndEncode(int regEnc, boolean byteinst) {
+if (regEnc >= 8) {
+emitByte(Prefix.REXB);
+return regEnc - 8;
+} else if (byteinst && regEnc >= 4) {
+emitByte(Prefix.REX);
+}
+return regEnc;
+}
+private int prefixqAndEncode(int regEnc) {
+if (regEnc < 8) {
+emitByte(Prefix.REXW);
+return regEnc;
+} else {
+emitByte(Prefix.REXWB);
+return regEnc - 8;
+}
+}
+private int prefixAndEncode(int dstEnc, int srcEnc) {
+return prefixAndEncode(dstEnc, false, srcEnc, false);
+}
+private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
+int srcEnc = srcEncoding;
+int dstEnc = dstEncoding;
+if (dstEnc < 8) {
+if (srcEnc >= 8) {
+emitByte(Prefix.REXB);
+srcEnc -= 8;
+} else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
+emitByte(Prefix.REX);
+}
+} else {
+if (srcEnc < 8) {
+emitByte(Prefix.REXR);
+} else {
+emitByte(Prefix.REXRB);
+srcEnc -= 8;
+}
+dstEnc -= 8;
+}
+return dstEnc << 3 | srcEnc;
+}
+/**
+* Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
+* prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
+*
+* @param regEncoding the encoding of the register part of the ModRM-Byte
+* @param rmEncoding the encoding of the r/m part of the ModRM-Byte
+* @return the lower 6 bits of the ModRM-Byte that should be emitted
+*/
+private int prefixqAndEncode(int regEncoding, int rmEncoding) {
+int rmEnc = rmEncoding;
+int regEnc = regEncoding;
+if (regEnc < 8) {
+if (rmEnc < 8) {
+emitByte(Prefix.REXW);
+} else {
+emitByte(Prefix.REXWB);
+rmEnc -= 8;
+}
+} else {
+if (rmEnc < 8) {
+emitByte(Prefix.REXWR);
+} else {
+emitByte(Prefix.REXWRB);
+rmEnc -= 8;
+}
+regEnc -= 8;
+}
+return regEnc << 3 | rmEnc;
+}
+private static boolean needsRex(Register reg) {
+return reg.encoding >= MinEncodingNeedsRex;
+}
+private void prefix(AMD64Address adr) {
+if (needsRex(adr.getBase())) {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXXB);
+} else {
+emitByte(Prefix.REXB);
+}
+} else {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXX);
+}
+}
+}
+private void prefixq(AMD64Address adr) {
+if (needsRex(adr.getBase())) {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXWXB);
+} else {
+emitByte(Prefix.REXWB);
+}
+} else {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXWX);
+} else {
+emitByte(Prefix.REXW);
+}
+}
+}
+private void prefix(AMD64Address adr, Register reg) {
+prefix(adr, reg, false);
+}
+private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
+if (reg.encoding < 8) {
+if (needsRex(adr.getBase())) {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXXB);
+} else {
+emitByte(Prefix.REXB);
+}
+} else {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXX);
+} else if (byteinst && reg.encoding >= 4) {
+emitByte(Prefix.REX);
+}
+}
+} else {
+if (needsRex(adr.getBase())) {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXRXB);
+} else {
+emitByte(Prefix.REXRB);
+}
+} else {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXRX);
+} else {
+emitByte(Prefix.REXR);
+}
+}
+}
+}
+private void prefixq(AMD64Address adr, Register src) {
+if (src.encoding < 8) {
+if (needsRex(adr.getBase())) {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXWXB);
+} else {
+emitByte(Prefix.REXWB);
+}
+} else {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXWX);
+} else {
+emitByte(Prefix.REXW);
+}
+}
+} else {
+if (needsRex(adr.getBase())) {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXWRXB);
+} else {
+emitByte(Prefix.REXWRB);
+}
+} else {
+if (needsRex(adr.getIndex())) {
+emitByte(Prefix.REXWRX);
+} else {
+emitByte(Prefix.REXWR);
+}
+}
+}
+}
+public final void addq(Register dst, int imm32) {
+ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
+}
+public final void addq(AMD64Address dst, int imm32) {
+ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
+}
+public final void addq(Register dst, Register src) {
+ADD.rmOp.emit(this, QWORD, dst, src);
+}
+public final void addq(AMD64Address dst, Register src) {
+ADD.mrOp.emit(this, QWORD, dst, src);
+}
+public final void andq(Register dst, int imm32) {
+AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
+}
+public final void bswapq(Register reg) {
+int encode = prefixqAndEncode(reg.encoding);
+emitByte(0x0F);
+emitByte(0xC8 | encode);
+}
+public final void cdqq() {
+emitByte(Prefix.REXW);
+emitByte(0x99);
+}
+public final void cmovq(ConditionFlag cc, Register dst, Register src) {
+int encode = prefixqAndEncode(dst.encoding, src.encoding);
+emitByte(0x0F);
+emitByte(0x40 | cc.getValue());
+emitByte(0xC0 | encode);
+}
+public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
+prefixq(src, dst);
+emitByte(0x0F);
+emitByte(0x40 | cc.getValue());
+emitOperandHelper(dst, src);
+}
+public final void cmpq(Register dst, int imm32) {
+CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
+}
+public final void cmpq(Register dst, Register src) {
+CMP.rmOp.emit(this, QWORD, dst, src);
+}
+public final void cmpq(Register dst, AMD64Address src) {
+CMP.rmOp.emit(this, QWORD, dst, src);
+}
+public final void cmpxchgq(Register reg, AMD64Address adr) {
+prefixq(adr, reg);
+emitByte(0x0F);
+emitByte(0xB1);
+emitOperandHelper(reg, adr);
+}
+protected final void decq(Register dst) {
+// Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
+int encode = prefixqAndEncode(dst.encoding);
+emitByte(0xFF);
+emitByte(0xC8 | encode);
+}
+public final void decq(AMD64Address dst) {
+DEC.emit(this, QWORD, dst);
+}
+public final void incq(Register dst) {
+// Don't use it directly. Use Macroincrementq() instead.
+// Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
+int encode = prefixqAndEncode(dst.encoding);
+emitByte(0xFF);
+emitByte(0xC0 | encode);
+}
+public final void incq(AMD64Address dst) {
+INC.emit(this, QWORD, dst);
+}
+public final void movq(Register dst, long imm64) {
+int encode = prefixqAndEncode(dst.encoding);
+emitByte(0xB8 | encode);
+emitLong(imm64);
+}
+public final void movslq(Register dst, int imm32) {
+int encode = prefixqAndEncode(dst.encoding);
+emitByte(0xC7);
+emitByte(0xC0 | encode);
+emitInt(imm32);
+}
+public final void movdq(Register dst, Register src) {
+// table D-1 says MMX/SSE2
+emitByte(0x66);
+if (dst.getRegisterCategory().equals(AMD64.XMM)) {
+int encode = prefixqAndEncode(dst.encoding, src.encoding);
+emitByte(0x0F);
+emitByte(0x6E);
+emitByte(0xC0 | encode);
+} else if (src.getRegisterCategory().equals(AMD64.XMM)) {
+// swap src/dst to get correct prefix
+int encode = prefixqAndEncode(src.encoding, dst.encoding);
+emitByte(0x0F);
+emitByte(0x7E);
+emitByte(0xC0 | encode);
+} else {
+throw new InternalError("should not reach here");
+}
+}
+public final void movdqu(Register dst, AMD64Address src) {
+emitByte(0xF3);
+prefix(src, dst);
+emitByte(0x0F);
+emitByte(0x6F);
+emitOperandHelper(dst, src);
+}
+public final void movslq(AMD64Address dst, int imm32) {
+prefixq(dst);
+emitByte(0xC7);
+emitOperandHelper(0, dst);
+emitInt(imm32);
+}
+public final void movslq(Register dst, AMD64Address src) {
+prefixq(src, dst);
+emitByte(0x63);
+emitOperandHelper(dst, src);
+}
+public final void movslq(Register dst, Register src) {
+int encode = prefixqAndEncode(dst.encoding, src.encoding);
+emitByte(0x63);
+emitByte(0xC0 | encode);
+}
+public final void negq(Register dst) {
+int encode = prefixqAndEncode(dst.encoding);
+emitByte(0xF7);
+emitByte(0xD8 | encode);
+}
+public final void shlq(Register dst, int imm8) {
+assert isShiftCount(imm8 >> 1) : "illegal shift count";
+int encode = prefixqAndEncode(dst.encoding);
+if (imm8 == 1) {
+emitByte(0xD1);
+emitByte(0xE0 | encode);
+} else {
+emitByte(0xC1);
+emitByte(0xE0 | encode);
+emitByte(imm8);
+}
+}
+public final void shrq(Register dst, int imm8) {
+assert isShiftCount(imm8 >> 1) : "illegal shift count";
+int encode = prefixqAndEncode(dst.encoding);
+if (imm8 == 1) {
+emitByte(0xD1);
+emitByte(0xE8 | encode);
+} else {
+emitByte(0xC1);
+emitByte(0xE8 | encode);
+emitByte(imm8);
+}
+}
+public final void subq(Register dst, int imm32) {
+SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
+}
+public final void subq(AMD64Address dst, int imm32) {
+SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
+}
+public final void subqWide(Register dst, int imm32) {
+// don't use the sign-extending version, forcing a 32-bit immediate
+SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
+}
+public final void subq(Register dst, Register src) {
+SUB.rmOp.emit(this, QWORD, dst, src);
+}
+public final void testq(Register dst, Register src) {
+int encode = prefixqAndEncode(dst.encoding, src.encoding);
+emitByte(0x85);
+emitByte(0xC0 | encode);
+}
+public final void xaddl(AMD64Address dst, Register src) {
+prefix(dst, src);
+emitByte(0x0F);
+emitByte(0xC1);
+emitOperandHelper(src, dst);
+}
+public final void xaddq(AMD64Address dst, Register src) {
+prefixq(dst, src);
+emitByte(0x0F);
+emitByte(0xC1);
+emitOperandHelper(src, dst);
+}
+public final void xchgl(Register dst, AMD64Address src) {
+prefix(src, dst);
+emitByte(0x87);
+emitOperandHelper(dst, src);
+}
+public final void xchgq(Register dst, AMD64Address src) {
+prefixq(src, dst);
+emitByte(0x87);
+emitOperandHelper(dst, src);
+}
+public final void membar(int barriers) {
+if (target.isMP) {
+// We only have to handle StoreLoad
+if ((barriers & STORE_LOAD) != 0) {
+// All usable chips support "locked" instructions which suffice
+// as barriers, and are much faster than the alternative of
+// using cpuid instruction. We use here a locked add [rsp],0.
+// This is conveniently otherwise a no-op except for blowing
+// flags.
+// Any change to this code may need to revisit other places in
+// the code where this idiom is used, in particular the
+// orderAccess code.
+lock();
+addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
+}
+}
+}
+@Override
+protected final void patchJumpTarget(int branch, int branchTarget) {
+int op = getByte(branch);
+assert op == 0xE8 // call
+||
+op == 0x00 // jump table entry
+|| op == 0xE9 // jmp
+|| op == 0xEB // short jmp
+|| (op & 0xF0) == 0x70 // short jcc
+|| op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
+: "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
+if (op == 0x00) {
+int offsetToJumpTableBase = getShort(branch + 1);
+int jumpTableBase = branch - offsetToJumpTableBase;
+int imm32 = branchTarget - jumpTableBase;
+emitInt(imm32, branch);
+} else if (op == 0xEB || (op & 0xF0) == 0x70) {
+// short offset operators (jmp and jcc)
+final int imm8 = branchTarget - (branch + 2);
+/*
+* Since a wrongly patched short branch can potentially lead to working but really bad
+* behaving code we should always fail with an exception instead of having an assert.
+*/
+if (!NumUtil.isByte(imm8)) {
+throw new InternalError("branch displacement out of range: " + imm8);
+}
+emitByte(imm8, branch + 1);
+} else {
+int off = 1;
+if (op == 0x0F) {
+off = 2;
+}
+int imm32 = branchTarget - (branch + 4 + off);
+emitInt(imm32, branch + off);
+}
+}
+public void nullCheck(AMD64Address address) {
+testl(AMD64.rax, address);
+}
+@Override
+public void align(int modulus) {
+if (position() % modulus != 0) {
+nop(modulus - (position() % modulus));
+}
+}
+/**
+* Emits a direct call instruction. Note that the actual call target is not specified, because
+* all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
+* responsible to add the call address to the appropriate patching tables.
+*/
+public final void call() {
+emitByte(0xE8);
+emitInt(0);
+}
+public final void call(Register src) {
+int encode = prefixAndEncode(src.encoding);
+emitByte(0xFF);
+emitByte(0xD0 | encode);
+}
+public final void int3() {
+emitByte(0xCC);
+}
+private void emitx87(int b1, int b2, int i) {
+assert 0 <= i && i < 8 : "illegal stack offset";
+emitByte(b1);
+emitByte(b2 + i);
+}
+public final void fldd(AMD64Address src) {
+emitByte(0xDD);
+emitOperandHelper(0, src);
+}
+public final void flds(AMD64Address src) {
+emitByte(0xD9);
+emitOperandHelper(0, src);
+}
+public final void fldln2() {
+emitByte(0xD9);
+emitByte(0xED);
+}
+public final void fldlg2() {
+emitByte(0xD9);
+emitByte(0xEC);
+}
+public final void fyl2x() {
+emitByte(0xD9);
+emitByte(0xF1);
+}
+public final void fstps(AMD64Address src) {
+emitByte(0xD9);
+emitOperandHelper(3, src);
+}
+public final void fstpd(AMD64Address src) {
+emitByte(0xDD);
+emitOperandHelper(3, src);
+}
+private void emitFPUArith(int b1, int b2, int i) {
+assert 0 <= i && i < 8 : "illegal FPU register: " + i;
+emitByte(b1);
+emitByte(b2 + i);
+}
+public void ffree(int i) {
+emitFPUArith(0xDD, 0xC0, i);
+}
+public void fincstp() {
+emitByte(0xD9);
+emitByte(0xF7);
+}
+public void fxch(int i) {
+emitFPUArith(0xD9, 0xC8, i);
+}
+public void fnstswAX() {
+emitByte(0xDF);
+emitByte(0xE0);
+}
+public void fwait() {
+emitByte(0x9B);
+}
+public void fprem() {
+emitByte(0xD9);
+emitByte(0xF8);
+}
+public final void fsin() {
+emitByte(0xD9);
+emitByte(0xFE);
+}
+public final void fcos() {
+emitByte(0xD9);
+emitByte(0xFF);
+}
+public final void fptan() {
+emitByte(0xD9);
+emitByte(0xF2);
+}
+public final void fstp(int i) {
+emitx87(0xDD, 0xD8, i);
+}
+@Override
+public AMD64Address makeAddress(Register base, int displacement) {
+return new AMD64Address(base, displacement);
+}
+@Override
+public AMD64Address getPlaceholder() {
+return Placeholder;
+}
+private void prefetchPrefix(AMD64Address src) {
+prefix(src);
+emitByte(0x0F);
+}
+public void prefetchnta(AMD64Address src) {
+prefetchPrefix(src);
+emitByte(0x18);
+emitOperandHelper(0, src);
+}
+void prefetchr(AMD64Address src) {
+assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
+prefetchPrefix(src);
+emitByte(0x0D);
+emitOperandHelper(0, src);
+}
+public void prefetcht0(AMD64Address src) {
+assert supports(CPUFeature.SSE);
+prefetchPrefix(src);
+emitByte(0x18);
+emitOperandHelper(1, src);
+}
+public void prefetcht1(AMD64Address src) {
+assert supports(CPUFeature.SSE);
+prefetchPrefix(src);
+emitByte(0x18);
+emitOperandHelper(2, src);
+}
+public void prefetcht2(AMD64Address src) {
+assert supports(CPUFeature.SSE);
+prefix(src);
+emitByte(0x0f);
+emitByte(0x18);
+emitOperandHelper(3, src);
+}
+public void prefetchw(AMD64Address src) {
+assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
+prefix(src);
+emitByte(0x0f);
+emitByte(0x0D);
+emitOperandHelper(1, src);
+}
+/**
+* Emits an instruction which is considered to be illegal. This is used if we deliberately want
+* to crash the program (debugging etc.).
+*/
+public void illegal() {
+emitByte(0x0f);
+emitByte(0x0b);
+}
+}

Mercurial > hg > truffle

comparison graal/com.oracle.jvmci.asm.amd64/src/com/oracle/jvmci/asm/amd64/AMD64Assembler.java @ 21708:6df25b1418be