001/*
002 * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004 *
005 * This code is free software; you can redistribute it and/or modify it
006 * under the terms of the GNU General Public License version 2 only, as
007 * published by the Free Software Foundation.
008 *
009 * This code is distributed in the hope that it will be useful, but WITHOUT
010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
011 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
012 * version 2 for more details (a copy is included in the LICENSE file that
013 * accompanied this code).
014 *
015 * You should have received a copy of the GNU General Public License version
016 * 2 along with this work; if not, write to the Free Software Foundation,
017 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
018 *
019 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
020 * or visit www.oracle.com if you need additional information or have any
021 * questions.
022 */
023package com.oracle.graal.asm.amd64;
024
025import jdk.internal.jvmci.amd64.*;
026import jdk.internal.jvmci.amd64.AMD64.*;
027import jdk.internal.jvmci.code.*;
028import jdk.internal.jvmci.code.Register.*;
029
030import com.oracle.graal.asm.*;
031
032import static com.oracle.graal.asm.NumUtil.*;
033import static com.oracle.graal.asm.amd64.AMD64AsmOptions.*;
034import static com.oracle.graal.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.*;
035import static com.oracle.graal.asm.amd64.AMD64Assembler.AMD64MOp.*;
036import static com.oracle.graal.asm.amd64.AMD64Assembler.OperandSize.*;
037import static jdk.internal.jvmci.amd64.AMD64.*;
038import static jdk.internal.jvmci.code.MemoryBarriers.*;
039
040/**
041 * This class implements an assembler that can encode most X86 instructions.
042 */
043public class AMD64Assembler extends Assembler {
044
045    private static final int MinEncodingNeedsRex = 8;
046
047    /**
048     * A sentinel value used as a place holder in an instruction stream for an address that will be
049     * patched.
050     */
051    private static final AMD64Address Placeholder = new AMD64Address(rip);
052
053    /**
054     * The x86 condition codes used for conditional jumps/moves.
055     */
056    public enum ConditionFlag {
057        Zero(0x4, "|zero|"),
058        NotZero(0x5, "|nzero|"),
059        Equal(0x4, "="),
060        NotEqual(0x5, "!="),
061        Less(0xc, "<"),
062        LessEqual(0xe, "<="),
063        Greater(0xf, ">"),
064        GreaterEqual(0xd, ">="),
065        Below(0x2, "|<|"),
066        BelowEqual(0x6, "|<=|"),
067        Above(0x7, "|>|"),
068        AboveEqual(0x3, "|>=|"),
069        Overflow(0x0, "|of|"),
070        NoOverflow(0x1, "|nof|"),
071        CarrySet(0x2, "|carry|"),
072        CarryClear(0x3, "|ncarry|"),
073        Negative(0x8, "|neg|"),
074        Positive(0x9, "|pos|"),
075        Parity(0xa, "|par|"),
076        NoParity(0xb, "|npar|");
077
078        private final int value;
079        private final String operator;
080
081        private ConditionFlag(int value, String operator) {
082            this.value = value;
083            this.operator = operator;
084        }
085
086        public ConditionFlag negate() {
087            switch (this) {
088                case Zero:
089                    return NotZero;
090                case NotZero:
091                    return Zero;
092                case Equal:
093                    return NotEqual;
094                case NotEqual:
095                    return Equal;
096                case Less:
097                    return GreaterEqual;
098                case LessEqual:
099                    return Greater;
100                case Greater:
101                    return LessEqual;
102                case GreaterEqual:
103                    return Less;
104                case Below:
105                    return AboveEqual;
106                case BelowEqual:
107                    return Above;
108                case Above:
109                    return BelowEqual;
110                case AboveEqual:
111                    return Below;
112                case Overflow:
113                    return NoOverflow;
114                case NoOverflow:
115                    return Overflow;
116                case CarrySet:
117                    return CarryClear;
118                case CarryClear:
119                    return CarrySet;
120                case Negative:
121                    return Positive;
122                case Positive:
123                    return Negative;
124                case Parity:
125                    return NoParity;
126                case NoParity:
127                    return Parity;
128            }
129            throw new IllegalArgumentException();
130        }
131
132        public int getValue() {
133            return value;
134        }
135
136        @Override
137        public String toString() {
138            return operator;
139        }
140    }
141
142    /**
143     * Constants for X86 prefix bytes.
144     */
145    private static class Prefix {
146
147        private static final int REX = 0x40;
148        private static final int REXB = 0x41;
149        private static final int REXX = 0x42;
150        private static final int REXXB = 0x43;
151        private static final int REXR = 0x44;
152        private static final int REXRB = 0x45;
153        private static final int REXRX = 0x46;
154        private static final int REXRXB = 0x47;
155        private static final int REXW = 0x48;
156        private static final int REXWB = 0x49;
157        private static final int REXWX = 0x4A;
158        private static final int REXWXB = 0x4B;
159        private static final int REXWR = 0x4C;
160        private static final int REXWRB = 0x4D;
161        private static final int REXWRX = 0x4E;
162        private static final int REXWRXB = 0x4F;
163    }
164
165    /**
166     * The x86 operand sizes.
167     */
168    public static enum OperandSize {
169        BYTE(1) {
170            @Override
171            protected void emitImmediate(AMD64Assembler asm, int imm) {
172                assert imm == (byte) imm;
173                asm.emitByte(imm);
174            }
175        },
176
177        WORD(2, 0x66) {
178            @Override
179            protected void emitImmediate(AMD64Assembler asm, int imm) {
180                assert imm == (short) imm;
181                asm.emitShort(imm);
182            }
183        },
184
185        DWORD(4) {
186            @Override
187            protected void emitImmediate(AMD64Assembler asm, int imm) {
188                asm.emitInt(imm);
189            }
190        },
191
192        QWORD(8) {
193            @Override
194            protected void emitImmediate(AMD64Assembler asm, int imm) {
195                asm.emitInt(imm);
196            }
197        },
198
199        SS(4, 0xF3, true),
200
201        SD(8, 0xF2, true),
202
203        PS(16, true),
204
205        PD(16, 0x66, true);
206
207        private final int sizePrefix;
208
209        private final int bytes;
210        private final boolean xmm;
211
212        private OperandSize(int bytes) {
213            this(bytes, 0);
214        }
215
216        private OperandSize(int bytes, int sizePrefix) {
217            this(bytes, sizePrefix, false);
218        }
219
220        private OperandSize(int bytes, boolean xmm) {
221            this(bytes, 0, xmm);
222        }
223
224        private OperandSize(int bytes, int sizePrefix, boolean xmm) {
225            this.sizePrefix = sizePrefix;
226            this.bytes = bytes;
227            this.xmm = xmm;
228        }
229
230        public int getBytes() {
231            return bytes;
232        }
233
234        public boolean isXmmType() {
235            return xmm;
236        }
237
238        /**
239         * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
240         * as sign-extended 32-bit values.
241         *
242         * @param asm
243         * @param imm
244         */
245        protected void emitImmediate(AMD64Assembler asm, int imm) {
246            assert false;
247        }
248    }
249
250    /**
251     * Operand size and register type constraints.
252     */
253    private static enum OpAssertion {
254        ByteAssertion(CPU, CPU, BYTE),
255        IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD),
256        No16BitAssertion(CPU, CPU, DWORD, QWORD),
257        No32BitAssertion(CPU, CPU, WORD, QWORD),
258        QwordOnlyAssertion(CPU, CPU, QWORD),
259        FloatingAssertion(XMM, XMM, SS, SD, PS, PD),
260        PackedFloatingAssertion(XMM, XMM, PS, PD),
261        SingleAssertion(XMM, XMM, SS),
262        DoubleAssertion(XMM, XMM, SD),
263        IntToFloatingAssertion(XMM, CPU, DWORD, QWORD),
264        FloatingToIntAssertion(CPU, XMM, DWORD, QWORD);
265
266        private final RegisterCategory resultCategory;
267        private final RegisterCategory inputCategory;
268        private final OperandSize[] allowedSizes;
269
270        private OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
271            this.resultCategory = resultCategory;
272            this.inputCategory = inputCategory;
273            this.allowedSizes = allowedSizes;
274        }
275
276        protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
277            assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
278            assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
279
280            for (OperandSize s : allowedSizes) {
281                if (size == s) {
282                    return true;
283                }
284            }
285
286            assert false : "invalid operand size " + size + " used in " + op;
287            return false;
288        }
289    }
290
291    /**
292     * The register to which {@link Register#Frame} and {@link Register#CallerFrame} are bound.
293     */
294    public final Register frameRegister;
295
296    /**
297     * Constructs an assembler for the AMD64 architecture.
298     *
299     * @param registerConfig the register configuration used to bind {@link Register#Frame} and
300     *            {@link Register#CallerFrame} to physical registers. This value can be null if this
301     *            assembler instance will not be used to assemble instructions using these logical
302     *            registers.
303     */
304    public AMD64Assembler(TargetDescription target, RegisterConfig registerConfig) {
305        super(target);
306        this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister();
307    }
308
309    private boolean supports(CPUFeature feature) {
310        return ((AMD64) target.arch).getFeatures().contains(feature);
311    }
312
313    private static int encode(Register r) {
314        assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
315        return r.encoding & 0x7;
316    }
317
318    /**
319     * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
320     * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
321     * field. The X bit must be 0.
322     */
323    protected static int getRXB(Register reg, Register rm) {
324        int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
325        rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
326        return rxb;
327    }
328
329    /**
330     * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
331     * are two cases for the memory operand:<br>
332     * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.<br>
333     * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
334     */
335    protected static int getRXB(Register reg, AMD64Address rm) {
336        int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
337        if (!rm.getIndex().equals(Register.None)) {
338            rxb |= (rm.getIndex().encoding & 0x08) >> 2;
339        }
340        if (!rm.getBase().equals(Register.None)) {
341            rxb |= (rm.getBase().encoding & 0x08) >> 3;
342        }
343        return rxb;
344    }
345
346    /**
347     * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
348     * <p>
349     * Format: [ 11 reg r/m ]
350     */
351    protected void emitModRM(int reg, Register rm) {
352        assert (reg & 0x07) == reg;
353        emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
354    }
355
356    /**
357     * Emit the ModR/M byte for two register operands.
358     * <p>
359     * Format: [ 11 reg r/m ]
360     */
361    protected void emitModRM(Register reg, Register rm) {
362        emitModRM(reg.encoding & 0x07, rm);
363    }
364
365    /**
366     * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
367     */
368    protected void emitOperandHelper(Register reg, AMD64Address addr) {
369        assert !reg.equals(Register.None);
370        emitOperandHelper(encode(reg), addr);
371    }
372
373    /**
374     * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
375     * extension in the R field.
376     */
377    protected void emitOperandHelper(int reg, AMD64Address addr) {
378        assert (reg & 0x07) == reg;
379        int regenc = reg << 3;
380
381        Register base = addr.getBase();
382        Register index = addr.getIndex();
383
384        AMD64Address.Scale scale = addr.getScale();
385        int disp = addr.getDisplacement();
386
387        if (base.equals(Register.Frame)) {
388            assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration";
389            base = frameRegister;
390        }
391
392        if (base.equals(AMD64.rip)) { // also matches Placeholder
393            // [00 000 101] disp32
394            assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
395            emitByte(0x05 | regenc);
396            emitInt(disp);
397        } else if (base.isValid()) {
398            int baseenc = base.isValid() ? encode(base) : 0;
399            if (index.isValid()) {
400                int indexenc = encode(index) << 3;
401                // [base + indexscale + disp]
402                if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
403                    // [base + indexscale]
404                    // [00 reg 100][ss index base]
405                    assert !index.equals(rsp) : "illegal addressing mode";
406                    emitByte(0x04 | regenc);
407                    emitByte(scale.log2 << 6 | indexenc | baseenc);
408                } else if (isByte(disp)) {
409                    // [base + indexscale + imm8]
410                    // [01 reg 100][ss index base] imm8
411                    assert !index.equals(rsp) : "illegal addressing mode";
412                    emitByte(0x44 | regenc);
413                    emitByte(scale.log2 << 6 | indexenc | baseenc);
414                    emitByte(disp & 0xFF);
415                } else {
416                    // [base + indexscale + disp32]
417                    // [10 reg 100][ss index base] disp32
418                    assert !index.equals(rsp) : "illegal addressing mode";
419                    emitByte(0x84 | regenc);
420                    emitByte(scale.log2 << 6 | indexenc | baseenc);
421                    emitInt(disp);
422                }
423            } else if (base.equals(rsp) || base.equals(r12)) {
424                // [rsp + disp]
425                if (disp == 0) {
426                    // [rsp]
427                    // [00 reg 100][00 100 100]
428                    emitByte(0x04 | regenc);
429                    emitByte(0x24);
430                } else if (isByte(disp)) {
431                    // [rsp + imm8]
432                    // [01 reg 100][00 100 100] disp8
433                    emitByte(0x44 | regenc);
434                    emitByte(0x24);
435                    emitByte(disp & 0xFF);
436                } else {
437                    // [rsp + imm32]
438                    // [10 reg 100][00 100 100] disp32
439                    emitByte(0x84 | regenc);
440                    emitByte(0x24);
441                    emitInt(disp);
442                }
443            } else {
444                // [base + disp]
445                assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
446                if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
447                    // [base]
448                    // [00 reg base]
449                    emitByte(0x00 | regenc | baseenc);
450                } else if (isByte(disp)) {
451                    // [base + disp8]
452                    // [01 reg base] disp8
453                    emitByte(0x40 | regenc | baseenc);
454                    emitByte(disp & 0xFF);
455                } else {
456                    // [base + disp32]
457                    // [10 reg base] disp32
458                    emitByte(0x80 | regenc | baseenc);
459                    emitInt(disp);
460                }
461            }
462        } else {
463            if (index.isValid()) {
464                int indexenc = encode(index) << 3;
465                // [indexscale + disp]
466                // [00 reg 100][ss index 101] disp32
467                assert !index.equals(rsp) : "illegal addressing mode";
468                emitByte(0x04 | regenc);
469                emitByte(scale.log2 << 6 | indexenc | 0x05);
470                emitInt(disp);
471            } else {
472                // [disp] ABSOLUTE
473                // [00 reg 100][00 100 101] disp32
474                emitByte(0x04 | regenc);
475                emitByte(0x25);
476                emitInt(disp);
477            }
478        }
479    }
480
481    /**
482     * Base class for AMD64 opcodes.
483     */
484    public static class AMD64Op {
485
486        protected static final int P_0F = 0x0F;
487        protected static final int P_0F38 = 0x380F;
488        protected static final int P_0F3A = 0x3A0F;
489
490        private final String opcode;
491
492        private final int prefix1;
493        private final int prefix2;
494        private final int op;
495
496        private final boolean dstIsByte;
497        private final boolean srcIsByte;
498
499        private final OpAssertion assertion;
500        private final CPUFeature feature;
501
502        protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
503            this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
504        }
505
506        protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
507            this.opcode = opcode;
508            this.prefix1 = prefix1;
509            this.prefix2 = prefix2;
510            this.op = op;
511
512            this.dstIsByte = dstIsByte;
513            this.srcIsByte = srcIsByte;
514
515            this.assertion = assertion;
516            this.feature = feature;
517        }
518
519        protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
520            if (prefix1 != 0) {
521                asm.emitByte(prefix1);
522            }
523            if (size.sizePrefix != 0) {
524                asm.emitByte(size.sizePrefix);
525            }
526            int rexPrefix = 0x40 | rxb;
527            if (size == QWORD) {
528                rexPrefix |= 0x08;
529            }
530            if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
531                asm.emitByte(rexPrefix);
532            }
533            if (prefix2 > 0xFF) {
534                asm.emitShort(prefix2);
535            } else if (prefix2 > 0) {
536                asm.emitByte(prefix2);
537            }
538            asm.emitByte(op);
539        }
540
541        protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
542            assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
543            assert assertion.checkOperands(this, size, resultReg, inputReg);
544            return true;
545        }
546
547        @Override
548        public String toString() {
549            return opcode;
550        }
551    }
552
553    /**
554     * Base class for AMD64 opcodes with immediate operands.
555     */
556    public static class AMD64ImmOp extends AMD64Op {
557
558        private final boolean immIsByte;
559
560        protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
561            super(opcode, 0, prefix, op, assertion, null);
562            this.immIsByte = immIsByte;
563        }
564
565        protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
566            if (immIsByte) {
567                assert imm == (byte) imm;
568                asm.emitByte(imm);
569            } else {
570                size.emitImmediate(asm, imm);
571            }
572        }
573    }
574
575    /**
576     * Opcode with operand order of either RM or MR.
577     */
578    public abstract static class AMD64RROp extends AMD64Op {
579
580        protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
581            super(opcode, prefix1, prefix2, op, assertion, feature);
582        }
583
584        protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
585            super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
586        }
587
588        public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
589    }
590
591    /**
592     * Opcode with operand order of RM.
593     */
594    public static class AMD64RMOp extends AMD64RROp {
595        // @formatter:off
596        public static final AMD64RMOp IMUL   = new AMD64RMOp("IMUL",         P_0F, 0xAF);
597        public static final AMD64RMOp BSF    = new AMD64RMOp("BSF",          P_0F, 0xBC);
598        public static final AMD64RMOp BSR    = new AMD64RMOp("BSR",          P_0F, 0xBD);
599        public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
600        public static final AMD64RMOp TZCNT  = new AMD64RMOp("TZCNT",  0xF3, P_0F, 0xBC, CPUFeature.BMI1);
601        public static final AMD64RMOp LZCNT  = new AMD64RMOp("LZCNT",  0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
602        public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB",       P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion);
603        public static final AMD64RMOp MOVZX  = new AMD64RMOp("MOVZX",        P_0F, 0xB7, OpAssertion.No16BitAssertion);
604        public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB",       P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion);
605        public static final AMD64RMOp MOVSX  = new AMD64RMOp("MOVSX",        P_0F, 0xBF, OpAssertion.No16BitAssertion);
606        public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD",             0x63, OpAssertion.QwordOnlyAssertion);
607        public static final AMD64RMOp MOVB   = new AMD64RMOp("MOVB",               0x8A, OpAssertion.ByteAssertion);
608        public static final AMD64RMOp MOV    = new AMD64RMOp("MOV",                0x8B);
609
610        // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
611        public static final AMD64RMOp MOVD   = new AMD64RMOp("MOVD",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
612        public static final AMD64RMOp MOVQ   = new AMD64RMOp("MOVQ",   0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
613        public static final AMD64RMOp MOVSS  = new AMD64RMOp("MOVSS",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
614        public static final AMD64RMOp MOVSD  = new AMD64RMOp("MOVSD",        P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
615
616        // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
617        public static final AMD64RMOp TESTB  = new AMD64RMOp("TEST",               0x84, OpAssertion.ByteAssertion);
618        public static final AMD64RMOp TEST   = new AMD64RMOp("TEST",               0x85);
619        // @formatter:on
620
621        protected AMD64RMOp(String opcode, int op) {
622            this(opcode, 0, op);
623        }
624
625        protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
626            this(opcode, 0, op, assertion);
627        }
628
629        protected AMD64RMOp(String opcode, int prefix, int op) {
630            this(opcode, 0, prefix, op, null);
631        }
632
633        protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
634            this(opcode, 0, prefix, op, assertion, null);
635        }
636
637        protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
638            this(opcode, 0, prefix, op, assertion, feature);
639        }
640
641        protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
642            super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
643        }
644
645        protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
646            this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
647        }
648
649        protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
650            super(opcode, prefix1, prefix2, op, assertion, feature);
651        }
652
653        @Override
654        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
655            assert verify(asm, size, dst, src);
656            emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
657            asm.emitModRM(dst, src);
658        }
659
660        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
661            assert verify(asm, size, dst, null);
662            emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
663            asm.emitOperandHelper(dst, src);
664        }
665    }
666
667    /**
668     * Opcode with operand order of MR.
669     */
670    public static class AMD64MROp extends AMD64RROp {
671        // @formatter:off
672        public static final AMD64MROp MOVB   = new AMD64MROp("MOVB",               0x88, OpAssertion.ByteAssertion);
673        public static final AMD64MROp MOV    = new AMD64MROp("MOV",                0x89);
674
675        // MOVD and MOVQ are the same opcode, just with different operand size prefix
676        // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
677        public static final AMD64MROp MOVD   = new AMD64MROp("MOVD",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
678        public static final AMD64MROp MOVQ   = new AMD64MROp("MOVQ",   0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
679
680        // MOVSS and MOVSD are the same opcode, just with different operand size prefix
681        public static final AMD64MROp MOVSS  = new AMD64MROp("MOVSS",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
682        public static final AMD64MROp MOVSD  = new AMD64MROp("MOVSD",        P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
683        // @formatter:on
684
685        protected AMD64MROp(String opcode, int op) {
686            this(opcode, 0, op);
687        }
688
689        protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
690            this(opcode, 0, op, assertion);
691        }
692
693        protected AMD64MROp(String opcode, int prefix, int op) {
694            this(opcode, prefix, op, OpAssertion.IntegerAssertion);
695        }
696
697        protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
698            this(opcode, prefix, op, assertion, null);
699        }
700
701        protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
702            this(opcode, 0, prefix, op, assertion, feature);
703        }
704
705        protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
706            super(opcode, prefix1, prefix2, op, assertion, feature);
707        }
708
709        @Override
710        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
711            assert verify(asm, size, src, dst);
712            emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
713            asm.emitModRM(src, dst);
714        }
715
716        public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
717            assert verify(asm, size, null, src);
718            emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
719            asm.emitOperandHelper(src, dst);
720        }
721    }
722
723    /**
724     * Opcodes with operand order of M.
725     */
726    public static class AMD64MOp extends AMD64Op {
727        // @formatter:off
728        public static final AMD64MOp NOT  = new AMD64MOp("NOT",  0xF7, 2);
729        public static final AMD64MOp NEG  = new AMD64MOp("NEG",  0xF7, 3);
730        public static final AMD64MOp MUL  = new AMD64MOp("MUL",  0xF7, 4);
731        public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
732        public static final AMD64MOp DIV  = new AMD64MOp("DIV",  0xF7, 6);
733        public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
734        public static final AMD64MOp INC  = new AMD64MOp("INC",  0xFF, 0);
735        public static final AMD64MOp DEC  = new AMD64MOp("DEC",  0xFF, 1);
736        public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6);
737        public static final AMD64MOp POP  = new AMD64MOp("POP",  0x8F, 0, OpAssertion.No32BitAssertion);
738        // @formatter:on
739
740        private final int ext;
741
742        protected AMD64MOp(String opcode, int op, int ext) {
743            this(opcode, 0, op, ext);
744        }
745
746        protected AMD64MOp(String opcode, int prefix, int op, int ext) {
747            this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion);
748        }
749
750        protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) {
751            this(opcode, 0, op, ext, assertion);
752        }
753
754        protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
755            super(opcode, 0, prefix, op, assertion, null);
756            this.ext = ext;
757        }
758
759        public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
760            assert verify(asm, size, dst, null);
761            emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
762            asm.emitModRM(ext, dst);
763        }
764
765        public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
766            assert verify(asm, size, null, null);
767            emitOpcode(asm, size, getRXB(null, dst), 0, 0);
768            asm.emitOperandHelper(ext, dst);
769        }
770    }
771
772    /**
773     * Opcodes with operand order of MI.
774     */
775    public static class AMD64MIOp extends AMD64ImmOp {
776        // @formatter:off
777        public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true,  0xC6, 0, OpAssertion.ByteAssertion);
778        public static final AMD64MIOp MOV  = new AMD64MIOp("MOV",  false, 0xC7, 0);
779        public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
780        // @formatter:on
781
782        private final int ext;
783
784        protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
785            this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion);
786        }
787
788        protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
789            this(opcode, immIsByte, 0, op, ext, assertion);
790        }
791
792        protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
793            super(opcode, immIsByte, prefix, op, assertion);
794            this.ext = ext;
795        }
796
797        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
798            assert verify(asm, size, dst, null);
799            emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
800            asm.emitModRM(ext, dst);
801            emitImmediate(asm, size, imm);
802        }
803
804        public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
805            assert verify(asm, size, null, null);
806            emitOpcode(asm, size, getRXB(null, dst), 0, 0);
807            asm.emitOperandHelper(ext, dst);
808            emitImmediate(asm, size, imm);
809        }
810    }
811
812    /**
813     * Opcodes with operand order of RMI.
814     */
815    public static class AMD64RMIOp extends AMD64ImmOp {
816        // @formatter:off
817        public static final AMD64RMIOp IMUL    = new AMD64RMIOp("IMUL", false, 0x69);
818        public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true,  0x6B);
819        // @formatter:on
820
821        protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
822            this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion);
823        }
824
825        protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
826            super(opcode, immIsByte, prefix, op, assertion);
827        }
828
829        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
830            assert verify(asm, size, dst, src);
831            emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
832            asm.emitModRM(dst, src);
833            emitImmediate(asm, size, imm);
834        }
835
836        public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
837            assert verify(asm, size, dst, null);
838            emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
839            asm.emitOperandHelper(dst, src);
840            emitImmediate(asm, size, imm);
841        }
842    }
843
844    public static class SSEOp extends AMD64RMOp {
845        // @formatter:off
846        public static final SSEOp CVTSI2SS  = new SSEOp("CVTSI2SS",  0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
847        public static final SSEOp CVTSI2SD  = new SSEOp("CVTSI2SS",  0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
848        public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
849        public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
850        public static final SSEOp UCOMIS    = new SSEOp("UCOMIS",          P_0F, 0x2E, OpAssertion.PackedFloatingAssertion);
851        public static final SSEOp SQRT      = new SSEOp("SQRT",            P_0F, 0x51);
852        public static final SSEOp AND       = new SSEOp("AND",             P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
853        public static final SSEOp ANDN      = new SSEOp("ANDN",            P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
854        public static final SSEOp OR        = new SSEOp("OR",              P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
855        public static final SSEOp XOR       = new SSEOp("XOR",             P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
856        public static final SSEOp ADD       = new SSEOp("ADD",             P_0F, 0x58);
857        public static final SSEOp MUL       = new SSEOp("MUL",             P_0F, 0x59);
858        public static final SSEOp CVTSS2SD  = new SSEOp("CVTSS2SD",        P_0F, 0x5A, OpAssertion.SingleAssertion);
859        public static final SSEOp CVTSD2SS  = new SSEOp("CVTSD2SS",        P_0F, 0x5A, OpAssertion.DoubleAssertion);
860        public static final SSEOp SUB       = new SSEOp("SUB",             P_0F, 0x5C);
861        public static final SSEOp MIN       = new SSEOp("MIN",             P_0F, 0x5D);
862        public static final SSEOp DIV       = new SSEOp("DIV",             P_0F, 0x5E);
863        public static final SSEOp MAX       = new SSEOp("MAX",             P_0F, 0x5F);
864        // @formatter:on
865
866        protected SSEOp(String opcode, int prefix, int op) {
867            this(opcode, prefix, op, OpAssertion.FloatingAssertion);
868        }
869
870        protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
871            this(opcode, 0, prefix, op, assertion);
872        }
873
874        protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
875            super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
876        }
877    }
878
879    /**
880     * Arithmetic operation with operand order of RM, MR or MI.
881     */
882    public static final class AMD64BinaryArithmetic {
883        // @formatter:off
884        public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
885        public static final AMD64BinaryArithmetic OR  = new AMD64BinaryArithmetic("OR",  1);
886        public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
887        public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
888        public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
889        public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
890        public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
891        public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
892        // @formatter:on
893
894        private final AMD64MIOp byteImmOp;
895        private final AMD64MROp byteMrOp;
896        private final AMD64RMOp byteRmOp;
897
898        private final AMD64MIOp immOp;
899        private final AMD64MIOp immSxOp;
900        private final AMD64MROp mrOp;
901        private final AMD64RMOp rmOp;
902
903        private AMD64BinaryArithmetic(String opcode, int code) {
904            int baseOp = code << 3;
905
906            byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
907            byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
908            byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
909
910            immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion);
911            immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion);
912            mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion);
913            rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion);
914        }
915
916        public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
917            if (size == BYTE) {
918                return byteImmOp;
919            } else if (sx) {
920                return immSxOp;
921            } else {
922                return immOp;
923            }
924        }
925
926        public AMD64MROp getMROpcode(OperandSize size) {
927            if (size == BYTE) {
928                return byteMrOp;
929            } else {
930                return mrOp;
931            }
932        }
933
934        public AMD64RMOp getRMOpcode(OperandSize size) {
935            if (size == BYTE) {
936                return byteRmOp;
937            } else {
938                return rmOp;
939            }
940        }
941    }
942
943    /**
944     * Shift operation with operand order of M1, MC or MI.
945     */
946    public static final class AMD64Shift {
947        // @formatter:off
948        public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
949        public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
950        public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
951        public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
952        public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
953        public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
954        public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
955        // @formatter:on
956
957        public final AMD64MOp m1Op;
958        public final AMD64MOp mcOp;
959        public final AMD64MIOp miOp;
960
961        private AMD64Shift(String opcode, int code) {
962            m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion);
963            mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion);
964            miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion);
965        }
966    }
967
968    public final void addl(AMD64Address dst, int imm32) {
969        ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
970    }
971
972    public final void addl(Register dst, int imm32) {
973        ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
974    }
975
976    private void addrNop4() {
977        // 4 bytes: NOP DWORD PTR [EAX+0]
978        emitByte(0x0F);
979        emitByte(0x1F);
980        emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
981        emitByte(0); // 8-bits offset (1 byte)
982    }
983
984    private void addrNop5() {
985        // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
986        emitByte(0x0F);
987        emitByte(0x1F);
988        emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
989        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
990        emitByte(0); // 8-bits offset (1 byte)
991    }
992
993    private void addrNop7() {
994        // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
995        emitByte(0x0F);
996        emitByte(0x1F);
997        emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
998        emitInt(0); // 32-bits offset (4 bytes)
999    }
1000
1001    private void addrNop8() {
1002        // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
1003        emitByte(0x0F);
1004        emitByte(0x1F);
1005        emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
1006        emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1007        emitInt(0); // 32-bits offset (4 bytes)
1008    }
1009
1010    public final void andl(Register dst, int imm32) {
1011        AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1012    }
1013
1014    public final void bswapl(Register reg) {
1015        int encode = prefixAndEncode(reg.encoding);
1016        emitByte(0x0F);
1017        emitByte(0xC8 | encode);
1018    }
1019
1020    public final void cdql() {
1021        emitByte(0x99);
1022    }
1023
1024    public final void cmovl(ConditionFlag cc, Register dst, Register src) {
1025        int encode = prefixAndEncode(dst.encoding, src.encoding);
1026        emitByte(0x0F);
1027        emitByte(0x40 | cc.getValue());
1028        emitByte(0xC0 | encode);
1029    }
1030
1031    public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
1032        prefix(src, dst);
1033        emitByte(0x0F);
1034        emitByte(0x40 | cc.getValue());
1035        emitOperandHelper(dst, src);
1036    }
1037
1038    public final void cmpl(Register dst, int imm32) {
1039        CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1040    }
1041
1042    public final void cmpl(Register dst, Register src) {
1043        CMP.rmOp.emit(this, DWORD, dst, src);
1044    }
1045
1046    public final void cmpl(Register dst, AMD64Address src) {
1047        CMP.rmOp.emit(this, DWORD, dst, src);
1048    }
1049
1050    public final void cmpl(AMD64Address dst, int imm32) {
1051        CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1052    }
1053
1054    // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
1055    // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
1056    // The ZF is set if the compared values were equal, and cleared otherwise.
1057    public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
1058        prefix(adr, reg);
1059        emitByte(0x0F);
1060        emitByte(0xB1);
1061        emitOperandHelper(reg, adr);
1062    }
1063
1064    protected final void decl(AMD64Address dst) {
1065        prefix(dst);
1066        emitByte(0xFF);
1067        emitOperandHelper(1, dst);
1068    }
1069
1070    public final void hlt() {
1071        emitByte(0xF4);
1072    }
1073
1074    public final void imull(Register dst, Register src, int value) {
1075        if (isByte(value)) {
1076            AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
1077        } else {
1078            AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
1079        }
1080    }
1081
1082    protected final void incl(AMD64Address dst) {
1083        prefix(dst);
1084        emitByte(0xFF);
1085        emitOperandHelper(0, dst);
1086    }
1087
1088    public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
1089        int shortSize = 2;
1090        int longSize = 6;
1091        long disp = jumpTarget - position();
1092        if (!forceDisp32 && isByte(disp - shortSize)) {
1093            // 0111 tttn #8-bit disp
1094            emitByte(0x70 | cc.getValue());
1095            emitByte((int) ((disp - shortSize) & 0xFF));
1096        } else {
1097            // 0000 1111 1000 tttn #32-bit disp
1098            assert isInt(disp - longSize) : "must be 32bit offset (call4)";
1099            emitByte(0x0F);
1100            emitByte(0x80 | cc.getValue());
1101            emitInt((int) (disp - longSize));
1102        }
1103    }
1104
1105    public final void jcc(ConditionFlag cc, Label l) {
1106        assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
1107        if (l.isBound()) {
1108            jcc(cc, l.position(), false);
1109        } else {
1110            // Note: could eliminate cond. jumps to this jump if condition
1111            // is the same however, seems to be rather unlikely case.
1112            // Note: use jccb() if label to be bound is very close to get
1113            // an 8-bit displacement
1114            l.addPatchAt(position());
1115            emitByte(0x0F);
1116            emitByte(0x80 | cc.getValue());
1117            emitInt(0);
1118        }
1119
1120    }
1121
1122    public final void jccb(ConditionFlag cc, Label l) {
1123        if (l.isBound()) {
1124            int shortSize = 2;
1125            int entry = l.position();
1126            assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
1127            long disp = entry - position();
1128            // 0111 tttn #8-bit disp
1129            emitByte(0x70 | cc.getValue());
1130            emitByte((int) ((disp - shortSize) & 0xFF));
1131        } else {
1132            l.addPatchAt(position());
1133            emitByte(0x70 | cc.getValue());
1134            emitByte(0);
1135        }
1136    }
1137
1138    public final void jmp(int jumpTarget, boolean forceDisp32) {
1139        int shortSize = 2;
1140        int longSize = 5;
1141        long disp = jumpTarget - position();
1142        if (!forceDisp32 && isByte(disp - shortSize)) {
1143            emitByte(0xEB);
1144            emitByte((int) ((disp - shortSize) & 0xFF));
1145        } else {
1146            emitByte(0xE9);
1147            emitInt((int) (disp - longSize));
1148        }
1149    }
1150
1151    @Override
1152    public final void jmp(Label l) {
1153        if (l.isBound()) {
1154            jmp(l.position(), false);
1155        } else {
1156            // By default, forward jumps are always 32-bit displacements, since
1157            // we can't yet know where the label will be bound. If you're sure that
1158            // the forward jump will not run beyond 256 bytes, use jmpb to
1159            // force an 8-bit displacement.
1160
1161            l.addPatchAt(position());
1162            emitByte(0xE9);
1163            emitInt(0);
1164        }
1165    }
1166
1167    public final void jmp(Register entry) {
1168        int encode = prefixAndEncode(entry.encoding);
1169        emitByte(0xFF);
1170        emitByte(0xE0 | encode);
1171    }
1172
1173    public final void jmpb(Label l) {
1174        if (l.isBound()) {
1175            int shortSize = 2;
1176            int entry = l.position();
1177            assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
1178            long offs = entry - position();
1179            emitByte(0xEB);
1180            emitByte((int) ((offs - shortSize) & 0xFF));
1181        } else {
1182
1183            l.addPatchAt(position());
1184            emitByte(0xEB);
1185            emitByte(0);
1186        }
1187    }
1188
1189    public final void leaq(Register dst, AMD64Address src) {
1190        prefixq(src, dst);
1191        emitByte(0x8D);
1192        emitOperandHelper(dst, src);
1193    }
1194
1195    public final void leave() {
1196        emitByte(0xC9);
1197    }
1198
1199    public final void lock() {
1200        emitByte(0xF0);
1201    }
1202
1203    public final void movapd(Register dst, Register src) {
1204        assert dst.getRegisterCategory().equals(AMD64.XMM);
1205        assert src.getRegisterCategory().equals(AMD64.XMM);
1206        int dstenc = dst.encoding;
1207        int srcenc = src.encoding;
1208        emitByte(0x66);
1209        if (dstenc < 8) {
1210            if (srcenc >= 8) {
1211                emitByte(Prefix.REXB);
1212                srcenc -= 8;
1213            }
1214        } else {
1215            if (srcenc < 8) {
1216                emitByte(Prefix.REXR);
1217            } else {
1218                emitByte(Prefix.REXRB);
1219                srcenc -= 8;
1220            }
1221            dstenc -= 8;
1222        }
1223        emitByte(0x0F);
1224        emitByte(0x28);
1225        emitByte(0xC0 | dstenc << 3 | srcenc);
1226    }
1227
1228    public final void movaps(Register dst, Register src) {
1229        assert dst.getRegisterCategory().equals(AMD64.XMM);
1230        assert src.getRegisterCategory().equals(AMD64.XMM);
1231        int dstenc = dst.encoding;
1232        int srcenc = src.encoding;
1233        if (dstenc < 8) {
1234            if (srcenc >= 8) {
1235                emitByte(Prefix.REXB);
1236                srcenc -= 8;
1237            }
1238        } else {
1239            if (srcenc < 8) {
1240                emitByte(Prefix.REXR);
1241            } else {
1242                emitByte(Prefix.REXRB);
1243                srcenc -= 8;
1244            }
1245            dstenc -= 8;
1246        }
1247        emitByte(0x0F);
1248        emitByte(0x28);
1249        emitByte(0xC0 | dstenc << 3 | srcenc);
1250    }
1251
1252    public final void movb(AMD64Address dst, int imm8) {
1253        prefix(dst);
1254        emitByte(0xC6);
1255        emitOperandHelper(0, dst);
1256        emitByte(imm8);
1257    }
1258
1259    public final void movb(AMD64Address dst, Register src) {
1260        assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
1261        prefix(dst, src, true);
1262        emitByte(0x88);
1263        emitOperandHelper(src, dst);
1264    }
1265
1266    public final void movl(Register dst, int imm32) {
1267        int encode = prefixAndEncode(dst.encoding);
1268        emitByte(0xB8 | encode);
1269        emitInt(imm32);
1270    }
1271
1272    public final void movl(Register dst, Register src) {
1273        int encode = prefixAndEncode(dst.encoding, src.encoding);
1274        emitByte(0x8B);
1275        emitByte(0xC0 | encode);
1276    }
1277
1278    public final void movl(Register dst, AMD64Address src) {
1279        prefix(src, dst);
1280        emitByte(0x8B);
1281        emitOperandHelper(dst, src);
1282    }
1283
1284    public final void movl(AMD64Address dst, int imm32) {
1285        prefix(dst);
1286        emitByte(0xC7);
1287        emitOperandHelper(0, dst);
1288        emitInt(imm32);
1289    }
1290
1291    public final void movl(AMD64Address dst, Register src) {
1292        prefix(dst, src);
1293        emitByte(0x89);
1294        emitOperandHelper(src, dst);
1295    }
1296
1297    /**
1298     * New CPUs require use of movsd and movss to avoid partial register stall when loading from
1299     * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
1300     * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
1301     * {@link AMD64MacroAssembler#movflt(Register, Register)}.
1302     */
1303    public final void movlpd(Register dst, AMD64Address src) {
1304        assert dst.getRegisterCategory().equals(AMD64.XMM);
1305        emitByte(0x66);
1306        prefix(src, dst);
1307        emitByte(0x0F);
1308        emitByte(0x12);
1309        emitOperandHelper(dst, src);
1310    }
1311
1312    public final void movq(Register dst, AMD64Address src) {
1313        if (dst.getRegisterCategory().equals(AMD64.XMM)) {
1314            emitByte(0xF3);
1315            prefixq(src, dst);
1316            emitByte(0x0F);
1317            emitByte(0x7E);
1318            emitOperandHelper(dst, src);
1319        } else {
1320            prefixq(src, dst);
1321            emitByte(0x8B);
1322            emitOperandHelper(dst, src);
1323        }
1324    }
1325
1326    public final void movq(Register dst, Register src) {
1327        int encode = prefixqAndEncode(dst.encoding, src.encoding);
1328        emitByte(0x8B);
1329        emitByte(0xC0 | encode);
1330    }
1331
1332    public final void movq(AMD64Address dst, Register src) {
1333        if (src.getRegisterCategory().equals(AMD64.XMM)) {
1334            emitByte(0x66);
1335            prefixq(dst, src);
1336            emitByte(0x0F);
1337            emitByte(0xD6);
1338            emitOperandHelper(src, dst);
1339        } else {
1340            prefixq(dst, src);
1341            emitByte(0x89);
1342            emitOperandHelper(src, dst);
1343        }
1344    }
1345
1346    public final void movsbl(Register dst, AMD64Address src) {
1347        prefix(src, dst);
1348        emitByte(0x0F);
1349        emitByte(0xBE);
1350        emitOperandHelper(dst, src);
1351    }
1352
1353    public final void movsbl(Register dst, Register src) {
1354        int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
1355        emitByte(0x0F);
1356        emitByte(0xBE);
1357        emitByte(0xC0 | encode);
1358    }
1359
1360    public final void movsbq(Register dst, AMD64Address src) {
1361        prefixq(src, dst);
1362        emitByte(0x0F);
1363        emitByte(0xBE);
1364        emitOperandHelper(dst, src);
1365    }
1366
1367    public final void movsbq(Register dst, Register src) {
1368        int encode = prefixqAndEncode(dst.encoding, src.encoding);
1369        emitByte(0x0F);
1370        emitByte(0xBE);
1371        emitByte(0xC0 | encode);
1372    }
1373
1374    public final void movsd(Register dst, Register src) {
1375        assert dst.getRegisterCategory().equals(AMD64.XMM);
1376        assert src.getRegisterCategory().equals(AMD64.XMM);
1377        emitByte(0xF2);
1378        int encode = prefixAndEncode(dst.encoding, src.encoding);
1379        emitByte(0x0F);
1380        emitByte(0x10);
1381        emitByte(0xC0 | encode);
1382    }
1383
1384    public final void movsd(Register dst, AMD64Address src) {
1385        assert dst.getRegisterCategory().equals(AMD64.XMM);
1386        emitByte(0xF2);
1387        prefix(src, dst);
1388        emitByte(0x0F);
1389        emitByte(0x10);
1390        emitOperandHelper(dst, src);
1391    }
1392
1393    public final void movsd(AMD64Address dst, Register src) {
1394        assert src.getRegisterCategory().equals(AMD64.XMM);
1395        emitByte(0xF2);
1396        prefix(dst, src);
1397        emitByte(0x0F);
1398        emitByte(0x11);
1399        emitOperandHelper(src, dst);
1400    }
1401
1402    public final void movss(Register dst, Register src) {
1403        assert dst.getRegisterCategory().equals(AMD64.XMM);
1404        assert src.getRegisterCategory().equals(AMD64.XMM);
1405        emitByte(0xF3);
1406        int encode = prefixAndEncode(dst.encoding, src.encoding);
1407        emitByte(0x0F);
1408        emitByte(0x10);
1409        emitByte(0xC0 | encode);
1410    }
1411
1412    public final void movss(Register dst, AMD64Address src) {
1413        assert dst.getRegisterCategory().equals(AMD64.XMM);
1414        emitByte(0xF3);
1415        prefix(src, dst);
1416        emitByte(0x0F);
1417        emitByte(0x10);
1418        emitOperandHelper(dst, src);
1419    }
1420
1421    public final void movss(AMD64Address dst, Register src) {
1422        assert src.getRegisterCategory().equals(AMD64.XMM);
1423        emitByte(0xF3);
1424        prefix(dst, src);
1425        emitByte(0x0F);
1426        emitByte(0x11);
1427        emitOperandHelper(src, dst);
1428    }
1429
1430    public final void movswl(Register dst, AMD64Address src) {
1431        prefix(src, dst);
1432        emitByte(0x0F);
1433        emitByte(0xBF);
1434        emitOperandHelper(dst, src);
1435    }
1436
1437    public final void movw(AMD64Address dst, int imm16) {
1438        emitByte(0x66); // switch to 16-bit mode
1439        prefix(dst);
1440        emitByte(0xC7);
1441        emitOperandHelper(0, dst);
1442        emitShort(imm16);
1443    }
1444
1445    public final void movw(AMD64Address dst, Register src) {
1446        emitByte(0x66);
1447        prefix(dst, src);
1448        emitByte(0x89);
1449        emitOperandHelper(src, dst);
1450    }
1451
1452    public final void movzbl(Register dst, AMD64Address src) {
1453        prefix(src, dst);
1454        emitByte(0x0F);
1455        emitByte(0xB6);
1456        emitOperandHelper(dst, src);
1457    }
1458
1459    public final void movzwl(Register dst, AMD64Address src) {
1460        prefix(src, dst);
1461        emitByte(0x0F);
1462        emitByte(0xB7);
1463        emitOperandHelper(dst, src);
1464    }
1465
1466    @Override
1467    public final void ensureUniquePC() {
1468        nop();
1469    }
1470
1471    public final void nop() {
1472        nop(1);
1473    }
1474
1475    public void nop(int count) {
1476        int i = count;
1477        if (UseNormalNop) {
1478            assert i > 0 : " ";
1479            // The fancy nops aren't currently recognized by debuggers making it a
1480            // pain to disassemble code while debugging. If assert are on clearly
1481            // speed is not an issue so simply use the single byte traditional nop
1482            // to do alignment.
1483
1484            for (; i > 0; i--) {
1485                emitByte(0x90);
1486            }
1487            return;
1488        }
1489
1490        if (UseAddressNop) {
1491            //
1492            // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
1493            // 1: 0x90
1494            // 2: 0x66 0x90
1495            // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1496            // 4: 0x0F 0x1F 0x40 0x00
1497            // 5: 0x0F 0x1F 0x44 0x00 0x00
1498            // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1499            // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1500            // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1501            // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1502            // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1503            // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1504
1505            // The rest coding is AMD specific - use consecutive Address nops
1506
1507            // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
1508            // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
1509            // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1510            // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1511            // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1512            // Size prefixes (0x66) are added for larger sizes
1513
1514            while (i >= 22) {
1515                i -= 11;
1516                emitByte(0x66); // size prefix
1517                emitByte(0x66); // size prefix
1518                emitByte(0x66); // size prefix
1519                addrNop8();
1520            }
1521            // Generate first nop for size between 21-12
1522            switch (i) {
1523                case 21:
1524                    i -= 1;
1525                    emitByte(0x66); // size prefix
1526                    // fall through
1527                case 20:
1528                    // fall through
1529                case 19:
1530                    i -= 1;
1531                    emitByte(0x66); // size prefix
1532                    // fall through
1533                case 18:
1534                    // fall through
1535                case 17:
1536                    i -= 1;
1537                    emitByte(0x66); // size prefix
1538                    // fall through
1539                case 16:
1540                    // fall through
1541                case 15:
1542                    i -= 8;
1543                    addrNop8();
1544                    break;
1545                case 14:
1546                case 13:
1547                    i -= 7;
1548                    addrNop7();
1549                    break;
1550                case 12:
1551                    i -= 6;
1552                    emitByte(0x66); // size prefix
1553                    addrNop5();
1554                    break;
1555                default:
1556                    assert i < 12;
1557            }
1558
1559            // Generate second nop for size between 11-1
1560            switch (i) {
1561                case 11:
1562                    emitByte(0x66); // size prefix
1563                    emitByte(0x66); // size prefix
1564                    emitByte(0x66); // size prefix
1565                    addrNop8();
1566                    break;
1567                case 10:
1568                    emitByte(0x66); // size prefix
1569                    emitByte(0x66); // size prefix
1570                    addrNop8();
1571                    break;
1572                case 9:
1573                    emitByte(0x66); // size prefix
1574                    addrNop8();
1575                    break;
1576                case 8:
1577                    addrNop8();
1578                    break;
1579                case 7:
1580                    addrNop7();
1581                    break;
1582                case 6:
1583                    emitByte(0x66); // size prefix
1584                    addrNop5();
1585                    break;
1586                case 5:
1587                    addrNop5();
1588                    break;
1589                case 4:
1590                    addrNop4();
1591                    break;
1592                case 3:
1593                    // Don't use "0x0F 0x1F 0x00" - need patching safe padding
1594                    emitByte(0x66); // size prefix
1595                    emitByte(0x66); // size prefix
1596                    emitByte(0x90); // nop
1597                    break;
1598                case 2:
1599                    emitByte(0x66); // size prefix
1600                    emitByte(0x90); // nop
1601                    break;
1602                case 1:
1603                    emitByte(0x90); // nop
1604                    break;
1605                default:
1606                    assert i == 0;
1607            }
1608            return;
1609        }
1610
1611        // Using nops with size prefixes "0x66 0x90".
1612        // From AMD Optimization Guide:
1613        // 1: 0x90
1614        // 2: 0x66 0x90
1615        // 3: 0x66 0x66 0x90
1616        // 4: 0x66 0x66 0x66 0x90
1617        // 5: 0x66 0x66 0x90 0x66 0x90
1618        // 6: 0x66 0x66 0x90 0x66 0x66 0x90
1619        // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
1620        // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
1621        // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
1622        // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
1623        //
1624        while (i > 12) {
1625            i -= 4;
1626            emitByte(0x66); // size prefix
1627            emitByte(0x66);
1628            emitByte(0x66);
1629            emitByte(0x90); // nop
1630        }
1631        // 1 - 12 nops
1632        if (i > 8) {
1633            if (i > 9) {
1634                i -= 1;
1635                emitByte(0x66);
1636            }
1637            i -= 3;
1638            emitByte(0x66);
1639            emitByte(0x66);
1640            emitByte(0x90);
1641        }
1642        // 1 - 8 nops
1643        if (i > 4) {
1644            if (i > 6) {
1645                i -= 1;
1646                emitByte(0x66);
1647            }
1648            i -= 3;
1649            emitByte(0x66);
1650            emitByte(0x66);
1651            emitByte(0x90);
1652        }
1653        switch (i) {
1654            case 4:
1655                emitByte(0x66);
1656                emitByte(0x66);
1657                emitByte(0x66);
1658                emitByte(0x90);
1659                break;
1660            case 3:
1661                emitByte(0x66);
1662                emitByte(0x66);
1663                emitByte(0x90);
1664                break;
1665            case 2:
1666                emitByte(0x66);
1667                emitByte(0x90);
1668                break;
1669            case 1:
1670                emitByte(0x90);
1671                break;
1672            default:
1673                assert i == 0;
1674        }
1675    }
1676
1677    public final void pop(Register dst) {
1678        int encode = prefixAndEncode(dst.encoding);
1679        emitByte(0x58 | encode);
1680    }
1681
1682    public void popfq() {
1683        emitByte(0x9D);
1684    }
1685
1686    public final void ptest(Register dst, Register src) {
1687        assert supports(CPUFeature.SSE4_1);
1688        emitByte(0x66);
1689        int encode = prefixAndEncode(dst.encoding, src.encoding);
1690        emitByte(0x0F);
1691        emitByte(0x38);
1692        emitByte(0x17);
1693        emitByte(0xC0 | encode);
1694    }
1695
1696    public final void push(Register src) {
1697        int encode = prefixAndEncode(src.encoding);
1698        emitByte(0x50 | encode);
1699    }
1700
1701    public void pushfq() {
1702        emitByte(0x9c);
1703    }
1704
1705    public final void pxor(Register dst, Register src) {
1706        emitByte(0x66);
1707        int encode = prefixAndEncode(dst.encoding, src.encoding);
1708        emitByte(0x0F);
1709        emitByte(0xEF);
1710        emitByte(0xC0 | encode);
1711    }
1712
1713    public final void ret(int imm16) {
1714        if (imm16 == 0) {
1715            emitByte(0xC3);
1716        } else {
1717            emitByte(0xC2);
1718            emitShort(imm16);
1719        }
1720    }
1721
1722    public final void subl(AMD64Address dst, int imm32) {
1723        SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1724    }
1725
1726    public final void subl(Register dst, int imm32) {
1727        SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1728    }
1729
1730    public final void testl(Register dst, int imm32) {
1731        // not using emitArith because test
1732        // doesn't support sign-extension of
1733        // 8bit operands
1734        int encode = dst.encoding;
1735        if (encode == 0) {
1736            emitByte(0xA9);
1737        } else {
1738            encode = prefixAndEncode(encode);
1739            emitByte(0xF7);
1740            emitByte(0xC0 | encode);
1741        }
1742        emitInt(imm32);
1743    }
1744
1745    public final void testl(Register dst, Register src) {
1746        int encode = prefixAndEncode(dst.encoding, src.encoding);
1747        emitByte(0x85);
1748        emitByte(0xC0 | encode);
1749    }
1750
1751    public final void testl(Register dst, AMD64Address src) {
1752        prefix(src, dst);
1753        emitByte(0x85);
1754        emitOperandHelper(dst, src);
1755    }
1756
1757    public final void xorl(Register dst, Register src) {
1758        XOR.rmOp.emit(this, DWORD, dst, src);
1759    }
1760
1761    public final void xorpd(Register dst, Register src) {
1762        emitByte(0x66);
1763        xorps(dst, src);
1764    }
1765
1766    public final void xorps(Register dst, Register src) {
1767        assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1768        int encode = prefixAndEncode(dst.encoding, src.encoding);
1769        emitByte(0x0F);
1770        emitByte(0x57);
1771        emitByte(0xC0 | encode);
1772    }
1773
1774    protected final void decl(Register dst) {
1775        // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
1776        int encode = prefixAndEncode(dst.encoding);
1777        emitByte(0xFF);
1778        emitByte(0xC8 | encode);
1779    }
1780
1781    protected final void incl(Register dst) {
1782        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
1783        int encode = prefixAndEncode(dst.encoding);
1784        emitByte(0xFF);
1785        emitByte(0xC0 | encode);
1786    }
1787
1788    private int prefixAndEncode(int regEnc) {
1789        return prefixAndEncode(regEnc, false);
1790    }
1791
1792    private int prefixAndEncode(int regEnc, boolean byteinst) {
1793        if (regEnc >= 8) {
1794            emitByte(Prefix.REXB);
1795            return regEnc - 8;
1796        } else if (byteinst && regEnc >= 4) {
1797            emitByte(Prefix.REX);
1798        }
1799        return regEnc;
1800    }
1801
1802    private int prefixqAndEncode(int regEnc) {
1803        if (regEnc < 8) {
1804            emitByte(Prefix.REXW);
1805            return regEnc;
1806        } else {
1807            emitByte(Prefix.REXWB);
1808            return regEnc - 8;
1809        }
1810    }
1811
1812    private int prefixAndEncode(int dstEnc, int srcEnc) {
1813        return prefixAndEncode(dstEnc, false, srcEnc, false);
1814    }
1815
1816    private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
1817        int srcEnc = srcEncoding;
1818        int dstEnc = dstEncoding;
1819        if (dstEnc < 8) {
1820            if (srcEnc >= 8) {
1821                emitByte(Prefix.REXB);
1822                srcEnc -= 8;
1823            } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
1824                emitByte(Prefix.REX);
1825            }
1826        } else {
1827            if (srcEnc < 8) {
1828                emitByte(Prefix.REXR);
1829            } else {
1830                emitByte(Prefix.REXRB);
1831                srcEnc -= 8;
1832            }
1833            dstEnc -= 8;
1834        }
1835        return dstEnc << 3 | srcEnc;
1836    }
1837
1838    /**
1839     * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
1840     * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
1841     *
1842     * @param regEncoding the encoding of the register part of the ModRM-Byte
1843     * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
1844     * @return the lower 6 bits of the ModRM-Byte that should be emitted
1845     */
1846    private int prefixqAndEncode(int regEncoding, int rmEncoding) {
1847        int rmEnc = rmEncoding;
1848        int regEnc = regEncoding;
1849        if (regEnc < 8) {
1850            if (rmEnc < 8) {
1851                emitByte(Prefix.REXW);
1852            } else {
1853                emitByte(Prefix.REXWB);
1854                rmEnc -= 8;
1855            }
1856        } else {
1857            if (rmEnc < 8) {
1858                emitByte(Prefix.REXWR);
1859            } else {
1860                emitByte(Prefix.REXWRB);
1861                rmEnc -= 8;
1862            }
1863            regEnc -= 8;
1864        }
1865        return regEnc << 3 | rmEnc;
1866    }
1867
1868    private static boolean needsRex(Register reg) {
1869        return reg.encoding >= MinEncodingNeedsRex;
1870    }
1871
1872    private void prefix(AMD64Address adr) {
1873        if (needsRex(adr.getBase())) {
1874            if (needsRex(adr.getIndex())) {
1875                emitByte(Prefix.REXXB);
1876            } else {
1877                emitByte(Prefix.REXB);
1878            }
1879        } else {
1880            if (needsRex(adr.getIndex())) {
1881                emitByte(Prefix.REXX);
1882            }
1883        }
1884    }
1885
1886    private void prefixq(AMD64Address adr) {
1887        if (needsRex(adr.getBase())) {
1888            if (needsRex(adr.getIndex())) {
1889                emitByte(Prefix.REXWXB);
1890            } else {
1891                emitByte(Prefix.REXWB);
1892            }
1893        } else {
1894            if (needsRex(adr.getIndex())) {
1895                emitByte(Prefix.REXWX);
1896            } else {
1897                emitByte(Prefix.REXW);
1898            }
1899        }
1900    }
1901
1902    private void prefix(AMD64Address adr, Register reg) {
1903        prefix(adr, reg, false);
1904    }
1905
1906    private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
1907        if (reg.encoding < 8) {
1908            if (needsRex(adr.getBase())) {
1909                if (needsRex(adr.getIndex())) {
1910                    emitByte(Prefix.REXXB);
1911                } else {
1912                    emitByte(Prefix.REXB);
1913                }
1914            } else {
1915                if (needsRex(adr.getIndex())) {
1916                    emitByte(Prefix.REXX);
1917                } else if (byteinst && reg.encoding >= 4) {
1918                    emitByte(Prefix.REX);
1919                }
1920            }
1921        } else {
1922            if (needsRex(adr.getBase())) {
1923                if (needsRex(adr.getIndex())) {
1924                    emitByte(Prefix.REXRXB);
1925                } else {
1926                    emitByte(Prefix.REXRB);
1927                }
1928            } else {
1929                if (needsRex(adr.getIndex())) {
1930                    emitByte(Prefix.REXRX);
1931                } else {
1932                    emitByte(Prefix.REXR);
1933                }
1934            }
1935        }
1936    }
1937
1938    private void prefixq(AMD64Address adr, Register src) {
1939        if (src.encoding < 8) {
1940            if (needsRex(adr.getBase())) {
1941                if (needsRex(adr.getIndex())) {
1942                    emitByte(Prefix.REXWXB);
1943                } else {
1944                    emitByte(Prefix.REXWB);
1945                }
1946            } else {
1947                if (needsRex(adr.getIndex())) {
1948                    emitByte(Prefix.REXWX);
1949                } else {
1950                    emitByte(Prefix.REXW);
1951                }
1952            }
1953        } else {
1954            if (needsRex(adr.getBase())) {
1955                if (needsRex(adr.getIndex())) {
1956                    emitByte(Prefix.REXWRXB);
1957                } else {
1958                    emitByte(Prefix.REXWRB);
1959                }
1960            } else {
1961                if (needsRex(adr.getIndex())) {
1962                    emitByte(Prefix.REXWRX);
1963                } else {
1964                    emitByte(Prefix.REXWR);
1965                }
1966            }
1967        }
1968    }
1969
1970    public final void addq(Register dst, int imm32) {
1971        ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
1972    }
1973
1974    public final void addq(AMD64Address dst, int imm32) {
1975        ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
1976    }
1977
1978    public final void addq(Register dst, Register src) {
1979        ADD.rmOp.emit(this, QWORD, dst, src);
1980    }
1981
1982    public final void addq(AMD64Address dst, Register src) {
1983        ADD.mrOp.emit(this, QWORD, dst, src);
1984    }
1985
1986    public final void andq(Register dst, int imm32) {
1987        AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
1988    }
1989
1990    public final void bswapq(Register reg) {
1991        int encode = prefixqAndEncode(reg.encoding);
1992        emitByte(0x0F);
1993        emitByte(0xC8 | encode);
1994    }
1995
1996    public final void cdqq() {
1997        emitByte(Prefix.REXW);
1998        emitByte(0x99);
1999    }
2000
2001    public final void cmovq(ConditionFlag cc, Register dst, Register src) {
2002        int encode = prefixqAndEncode(dst.encoding, src.encoding);
2003        emitByte(0x0F);
2004        emitByte(0x40 | cc.getValue());
2005        emitByte(0xC0 | encode);
2006    }
2007
2008    public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
2009        prefixq(src, dst);
2010        emitByte(0x0F);
2011        emitByte(0x40 | cc.getValue());
2012        emitOperandHelper(dst, src);
2013    }
2014
2015    public final void cmpq(Register dst, int imm32) {
2016        CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2017    }
2018
2019    public final void cmpq(Register dst, Register src) {
2020        CMP.rmOp.emit(this, QWORD, dst, src);
2021    }
2022
2023    public final void cmpq(Register dst, AMD64Address src) {
2024        CMP.rmOp.emit(this, QWORD, dst, src);
2025    }
2026
2027    public final void cmpxchgq(Register reg, AMD64Address adr) {
2028        prefixq(adr, reg);
2029        emitByte(0x0F);
2030        emitByte(0xB1);
2031        emitOperandHelper(reg, adr);
2032    }
2033
2034    protected final void decq(Register dst) {
2035        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2036        int encode = prefixqAndEncode(dst.encoding);
2037        emitByte(0xFF);
2038        emitByte(0xC8 | encode);
2039    }
2040
2041    public final void decq(AMD64Address dst) {
2042        DEC.emit(this, QWORD, dst);
2043    }
2044
2045    public final void incq(Register dst) {
2046        // Don't use it directly. Use Macroincrementq() instead.
2047        // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2048        int encode = prefixqAndEncode(dst.encoding);
2049        emitByte(0xFF);
2050        emitByte(0xC0 | encode);
2051    }
2052
2053    public final void incq(AMD64Address dst) {
2054        INC.emit(this, QWORD, dst);
2055    }
2056
2057    public final void movq(Register dst, long imm64) {
2058        int encode = prefixqAndEncode(dst.encoding);
2059        emitByte(0xB8 | encode);
2060        emitLong(imm64);
2061    }
2062
2063    public final void movslq(Register dst, int imm32) {
2064        int encode = prefixqAndEncode(dst.encoding);
2065        emitByte(0xC7);
2066        emitByte(0xC0 | encode);
2067        emitInt(imm32);
2068    }
2069
2070    public final void movdq(Register dst, Register src) {
2071
2072        // table D-1 says MMX/SSE2
2073        emitByte(0x66);
2074
2075        if (dst.getRegisterCategory().equals(AMD64.XMM)) {
2076            int encode = prefixqAndEncode(dst.encoding, src.encoding);
2077            emitByte(0x0F);
2078            emitByte(0x6E);
2079            emitByte(0xC0 | encode);
2080        } else if (src.getRegisterCategory().equals(AMD64.XMM)) {
2081
2082            // swap src/dst to get correct prefix
2083            int encode = prefixqAndEncode(src.encoding, dst.encoding);
2084            emitByte(0x0F);
2085            emitByte(0x7E);
2086            emitByte(0xC0 | encode);
2087        } else {
2088            throw new InternalError("should not reach here");
2089        }
2090    }
2091
2092    public final void movdqu(Register dst, AMD64Address src) {
2093        emitByte(0xF3);
2094        prefix(src, dst);
2095        emitByte(0x0F);
2096        emitByte(0x6F);
2097        emitOperandHelper(dst, src);
2098    }
2099
2100    public final void movslq(AMD64Address dst, int imm32) {
2101        prefixq(dst);
2102        emitByte(0xC7);
2103        emitOperandHelper(0, dst);
2104        emitInt(imm32);
2105    }
2106
2107    public final void movslq(Register dst, AMD64Address src) {
2108        prefixq(src, dst);
2109        emitByte(0x63);
2110        emitOperandHelper(dst, src);
2111    }
2112
2113    public final void movslq(Register dst, Register src) {
2114        int encode = prefixqAndEncode(dst.encoding, src.encoding);
2115        emitByte(0x63);
2116        emitByte(0xC0 | encode);
2117    }
2118
2119    public final void negq(Register dst) {
2120        int encode = prefixqAndEncode(dst.encoding);
2121        emitByte(0xF7);
2122        emitByte(0xD8 | encode);
2123    }
2124
2125    public final void shlq(Register dst, int imm8) {
2126        assert isShiftCount(imm8 >> 1) : "illegal shift count";
2127        int encode = prefixqAndEncode(dst.encoding);
2128        if (imm8 == 1) {
2129            emitByte(0xD1);
2130            emitByte(0xE0 | encode);
2131        } else {
2132            emitByte(0xC1);
2133            emitByte(0xE0 | encode);
2134            emitByte(imm8);
2135        }
2136    }
2137
2138    public final void shrq(Register dst, int imm8) {
2139        assert isShiftCount(imm8 >> 1) : "illegal shift count";
2140        int encode = prefixqAndEncode(dst.encoding);
2141        if (imm8 == 1) {
2142            emitByte(0xD1);
2143            emitByte(0xE8 | encode);
2144        } else {
2145            emitByte(0xC1);
2146            emitByte(0xE8 | encode);
2147            emitByte(imm8);
2148        }
2149    }
2150
2151    public final void subq(Register dst, int imm32) {
2152        SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2153    }
2154
2155    public final void subq(AMD64Address dst, int imm32) {
2156        SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2157    }
2158
2159    public final void subqWide(Register dst, int imm32) {
2160        // don't use the sign-extending version, forcing a 32-bit immediate
2161        SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
2162    }
2163
2164    public final void subq(Register dst, Register src) {
2165        SUB.rmOp.emit(this, QWORD, dst, src);
2166    }
2167
2168    public final void testq(Register dst, Register src) {
2169        int encode = prefixqAndEncode(dst.encoding, src.encoding);
2170        emitByte(0x85);
2171        emitByte(0xC0 | encode);
2172    }
2173
2174    public final void xaddl(AMD64Address dst, Register src) {
2175        prefix(dst, src);
2176        emitByte(0x0F);
2177        emitByte(0xC1);
2178        emitOperandHelper(src, dst);
2179    }
2180
2181    public final void xaddq(AMD64Address dst, Register src) {
2182        prefixq(dst, src);
2183        emitByte(0x0F);
2184        emitByte(0xC1);
2185        emitOperandHelper(src, dst);
2186    }
2187
2188    public final void xchgl(Register dst, AMD64Address src) {
2189        prefix(src, dst);
2190        emitByte(0x87);
2191        emitOperandHelper(dst, src);
2192    }
2193
2194    public final void xchgq(Register dst, AMD64Address src) {
2195        prefixq(src, dst);
2196        emitByte(0x87);
2197        emitOperandHelper(dst, src);
2198    }
2199
2200    public final void membar(int barriers) {
2201        if (target.isMP) {
2202            // We only have to handle StoreLoad
2203            if ((barriers & STORE_LOAD) != 0) {
2204                // All usable chips support "locked" instructions which suffice
2205                // as barriers, and are much faster than the alternative of
2206                // using cpuid instruction. We use here a locked add [rsp],0.
2207                // This is conveniently otherwise a no-op except for blowing
2208                // flags.
2209                // Any change to this code may need to revisit other places in
2210                // the code where this idiom is used, in particular the
2211                // orderAccess code.
2212                lock();
2213                addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
2214            }
2215        }
2216    }
2217
2218    @Override
2219    protected final void patchJumpTarget(int branch, int branchTarget) {
2220        int op = getByte(branch);
2221        assert op == 0xE8 // call
2222                        ||
2223                        op == 0x00 // jump table entry
2224                        || op == 0xE9 // jmp
2225                        || op == 0xEB // short jmp
2226                        || (op & 0xF0) == 0x70 // short jcc
2227                        || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
2228        : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
2229
2230        if (op == 0x00) {
2231            int offsetToJumpTableBase = getShort(branch + 1);
2232            int jumpTableBase = branch - offsetToJumpTableBase;
2233            int imm32 = branchTarget - jumpTableBase;
2234            emitInt(imm32, branch);
2235        } else if (op == 0xEB || (op & 0xF0) == 0x70) {
2236
2237            // short offset operators (jmp and jcc)
2238            final int imm8 = branchTarget - (branch + 2);
2239            /*
2240             * Since a wrongly patched short branch can potentially lead to working but really bad
2241             * behaving code we should always fail with an exception instead of having an assert.
2242             */
2243            if (!NumUtil.isByte(imm8)) {
2244                throw new InternalError("branch displacement out of range: " + imm8);
2245            }
2246            emitByte(imm8, branch + 1);
2247
2248        } else {
2249
2250            int off = 1;
2251            if (op == 0x0F) {
2252                off = 2;
2253            }
2254
2255            int imm32 = branchTarget - (branch + 4 + off);
2256            emitInt(imm32, branch + off);
2257        }
2258    }
2259
2260    public void nullCheck(AMD64Address address) {
2261        testl(AMD64.rax, address);
2262    }
2263
2264    @Override
2265    public void align(int modulus) {
2266        if (position() % modulus != 0) {
2267            nop(modulus - (position() % modulus));
2268        }
2269    }
2270
2271    /**
2272     * Emits a direct call instruction. Note that the actual call target is not specified, because
2273     * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
2274     * responsible to add the call address to the appropriate patching tables.
2275     */
2276    public final void call() {
2277        emitByte(0xE8);
2278        emitInt(0);
2279    }
2280
2281    public final void call(Register src) {
2282        int encode = prefixAndEncode(src.encoding);
2283        emitByte(0xFF);
2284        emitByte(0xD0 | encode);
2285    }
2286
2287    public final void int3() {
2288        emitByte(0xCC);
2289    }
2290
2291    private void emitx87(int b1, int b2, int i) {
2292        assert 0 <= i && i < 8 : "illegal stack offset";
2293        emitByte(b1);
2294        emitByte(b2 + i);
2295    }
2296
2297    public final void fldd(AMD64Address src) {
2298        emitByte(0xDD);
2299        emitOperandHelper(0, src);
2300    }
2301
2302    public final void flds(AMD64Address src) {
2303        emitByte(0xD9);
2304        emitOperandHelper(0, src);
2305    }
2306
2307    public final void fldln2() {
2308        emitByte(0xD9);
2309        emitByte(0xED);
2310    }
2311
2312    public final void fldlg2() {
2313        emitByte(0xD9);
2314        emitByte(0xEC);
2315    }
2316
2317    public final void fyl2x() {
2318        emitByte(0xD9);
2319        emitByte(0xF1);
2320    }
2321
2322    public final void fstps(AMD64Address src) {
2323        emitByte(0xD9);
2324        emitOperandHelper(3, src);
2325    }
2326
2327    public final void fstpd(AMD64Address src) {
2328        emitByte(0xDD);
2329        emitOperandHelper(3, src);
2330    }
2331
2332    private void emitFPUArith(int b1, int b2, int i) {
2333        assert 0 <= i && i < 8 : "illegal FPU register: " + i;
2334        emitByte(b1);
2335        emitByte(b2 + i);
2336    }
2337
2338    public void ffree(int i) {
2339        emitFPUArith(0xDD, 0xC0, i);
2340    }
2341
2342    public void fincstp() {
2343        emitByte(0xD9);
2344        emitByte(0xF7);
2345    }
2346
2347    public void fxch(int i) {
2348        emitFPUArith(0xD9, 0xC8, i);
2349    }
2350
2351    public void fnstswAX() {
2352        emitByte(0xDF);
2353        emitByte(0xE0);
2354    }
2355
2356    public void fwait() {
2357        emitByte(0x9B);
2358    }
2359
2360    public void fprem() {
2361        emitByte(0xD9);
2362        emitByte(0xF8);
2363    }
2364
2365    public final void fsin() {
2366        emitByte(0xD9);
2367        emitByte(0xFE);
2368    }
2369
2370    public final void fcos() {
2371        emitByte(0xD9);
2372        emitByte(0xFF);
2373    }
2374
2375    public final void fptan() {
2376        emitByte(0xD9);
2377        emitByte(0xF2);
2378    }
2379
2380    public final void fstp(int i) {
2381        emitx87(0xDD, 0xD8, i);
2382    }
2383
2384    @Override
2385    public AMD64Address makeAddress(Register base, int displacement) {
2386        return new AMD64Address(base, displacement);
2387    }
2388
2389    @Override
2390    public AMD64Address getPlaceholder() {
2391        return Placeholder;
2392    }
2393
2394    private void prefetchPrefix(AMD64Address src) {
2395        prefix(src);
2396        emitByte(0x0F);
2397    }
2398
2399    public void prefetchnta(AMD64Address src) {
2400        prefetchPrefix(src);
2401        emitByte(0x18);
2402        emitOperandHelper(0, src);
2403    }
2404
2405    void prefetchr(AMD64Address src) {
2406        assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
2407        prefetchPrefix(src);
2408        emitByte(0x0D);
2409        emitOperandHelper(0, src);
2410    }
2411
2412    public void prefetcht0(AMD64Address src) {
2413        assert supports(CPUFeature.SSE);
2414        prefetchPrefix(src);
2415        emitByte(0x18);
2416        emitOperandHelper(1, src);
2417    }
2418
2419    public void prefetcht1(AMD64Address src) {
2420        assert supports(CPUFeature.SSE);
2421        prefetchPrefix(src);
2422        emitByte(0x18);
2423        emitOperandHelper(2, src);
2424    }
2425
2426    public void prefetcht2(AMD64Address src) {
2427        assert supports(CPUFeature.SSE);
2428        prefix(src);
2429        emitByte(0x0f);
2430        emitByte(0x18);
2431        emitOperandHelper(3, src);
2432    }
2433
2434    public void prefetchw(AMD64Address src) {
2435        assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
2436        prefix(src);
2437        emitByte(0x0f);
2438        emitByte(0x0D);
2439        emitOperandHelper(1, src);
2440    }
2441
2442    /**
2443     * Emits an instruction which is considered to be illegal. This is used if we deliberately want
2444     * to crash the program (debugging etc.).
2445     */
2446    public void illegal() {
2447        emitByte(0x0f);
2448        emitByte(0x0b);
2449    }
2450}