comparison graal/com.oracle.jvmci.asm.amd64/src/com/oracle/jvmci/asm/amd64/AMD64Assembler.java @ 21708:6df25b1418be

moved com.oracle.asm.** to jvmci-util.jar (JBS:GRAAL-53)
author Doug Simon <doug.simon@oracle.com>
date Wed, 03 Jun 2015 18:06:44 +0200
parents graal/com.oracle.graal.asm.amd64/src/com/oracle/graal/asm/amd64/AMD64Assembler.java@5024c80224c7
children
comparison
equal deleted inserted replaced
21707:e0f311284930 21708:6df25b1418be
1 /*
2 * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23 package com.oracle.jvmci.asm.amd64;
24
25 import com.oracle.jvmci.amd64.*;
26 import com.oracle.jvmci.amd64.AMD64.*;
27 import com.oracle.jvmci.asm.*;
28 import com.oracle.jvmci.code.Register;
29 import com.oracle.jvmci.code.TargetDescription;
30 import com.oracle.jvmci.code.RegisterConfig;
31
32 import static com.oracle.jvmci.amd64.AMD64.*;
33 import static com.oracle.jvmci.asm.NumUtil.*;
34 import static com.oracle.jvmci.asm.amd64.AMD64AsmOptions.*;
35 import static com.oracle.jvmci.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.*;
36 import static com.oracle.jvmci.asm.amd64.AMD64Assembler.AMD64MOp.*;
37 import static com.oracle.jvmci.asm.amd64.AMD64Assembler.OperandSize.*;
38 import static com.oracle.jvmci.code.MemoryBarriers.*;
39
40 import com.oracle.jvmci.code.Register.RegisterCategory;
41
42 /**
43 * This class implements an assembler that can encode most X86 instructions.
44 */
45 public class AMD64Assembler extends Assembler {
46
47 private static final int MinEncodingNeedsRex = 8;
48
49 /**
50 * A sentinel value used as a place holder in an instruction stream for an address that will be
51 * patched.
52 */
53 private static final AMD64Address Placeholder = new AMD64Address(rip);
54
55 /**
56 * The x86 condition codes used for conditional jumps/moves.
57 */
58 public enum ConditionFlag {
59 Zero(0x4, "|zero|"),
60 NotZero(0x5, "|nzero|"),
61 Equal(0x4, "="),
62 NotEqual(0x5, "!="),
63 Less(0xc, "<"),
64 LessEqual(0xe, "<="),
65 Greater(0xf, ">"),
66 GreaterEqual(0xd, ">="),
67 Below(0x2, "|<|"),
68 BelowEqual(0x6, "|<=|"),
69 Above(0x7, "|>|"),
70 AboveEqual(0x3, "|>=|"),
71 Overflow(0x0, "|of|"),
72 NoOverflow(0x1, "|nof|"),
73 CarrySet(0x2, "|carry|"),
74 CarryClear(0x3, "|ncarry|"),
75 Negative(0x8, "|neg|"),
76 Positive(0x9, "|pos|"),
77 Parity(0xa, "|par|"),
78 NoParity(0xb, "|npar|");
79
80 private final int value;
81 private final String operator;
82
83 private ConditionFlag(int value, String operator) {
84 this.value = value;
85 this.operator = operator;
86 }
87
88 public ConditionFlag negate() {
89 switch (this) {
90 case Zero:
91 return NotZero;
92 case NotZero:
93 return Zero;
94 case Equal:
95 return NotEqual;
96 case NotEqual:
97 return Equal;
98 case Less:
99 return GreaterEqual;
100 case LessEqual:
101 return Greater;
102 case Greater:
103 return LessEqual;
104 case GreaterEqual:
105 return Less;
106 case Below:
107 return AboveEqual;
108 case BelowEqual:
109 return Above;
110 case Above:
111 return BelowEqual;
112 case AboveEqual:
113 return Below;
114 case Overflow:
115 return NoOverflow;
116 case NoOverflow:
117 return Overflow;
118 case CarrySet:
119 return CarryClear;
120 case CarryClear:
121 return CarrySet;
122 case Negative:
123 return Positive;
124 case Positive:
125 return Negative;
126 case Parity:
127 return NoParity;
128 case NoParity:
129 return Parity;
130 }
131 throw new IllegalArgumentException();
132 }
133
134 public int getValue() {
135 return value;
136 }
137
138 @Override
139 public String toString() {
140 return operator;
141 }
142 }
143
144 /**
145 * Constants for X86 prefix bytes.
146 */
147 private static class Prefix {
148
149 private static final int REX = 0x40;
150 private static final int REXB = 0x41;
151 private static final int REXX = 0x42;
152 private static final int REXXB = 0x43;
153 private static final int REXR = 0x44;
154 private static final int REXRB = 0x45;
155 private static final int REXRX = 0x46;
156 private static final int REXRXB = 0x47;
157 private static final int REXW = 0x48;
158 private static final int REXWB = 0x49;
159 private static final int REXWX = 0x4A;
160 private static final int REXWXB = 0x4B;
161 private static final int REXWR = 0x4C;
162 private static final int REXWRB = 0x4D;
163 private static final int REXWRX = 0x4E;
164 private static final int REXWRXB = 0x4F;
165 }
166
167 /**
168 * The x86 operand sizes.
169 */
170 public static enum OperandSize {
171 BYTE(1) {
172 @Override
173 protected void emitImmediate(AMD64Assembler asm, int imm) {
174 assert imm == (byte) imm;
175 asm.emitByte(imm);
176 }
177 },
178
179 WORD(2, 0x66) {
180 @Override
181 protected void emitImmediate(AMD64Assembler asm, int imm) {
182 assert imm == (short) imm;
183 asm.emitShort(imm);
184 }
185 },
186
187 DWORD(4) {
188 @Override
189 protected void emitImmediate(AMD64Assembler asm, int imm) {
190 asm.emitInt(imm);
191 }
192 },
193
194 QWORD(8) {
195 @Override
196 protected void emitImmediate(AMD64Assembler asm, int imm) {
197 asm.emitInt(imm);
198 }
199 },
200
201 SS(4, 0xF3, true),
202
203 SD(8, 0xF2, true),
204
205 PS(16, true),
206
207 PD(16, 0x66, true);
208
209 private final int sizePrefix;
210
211 private final int bytes;
212 private final boolean xmm;
213
214 private OperandSize(int bytes) {
215 this(bytes, 0);
216 }
217
218 private OperandSize(int bytes, int sizePrefix) {
219 this(bytes, sizePrefix, false);
220 }
221
222 private OperandSize(int bytes, boolean xmm) {
223 this(bytes, 0, xmm);
224 }
225
226 private OperandSize(int bytes, int sizePrefix, boolean xmm) {
227 this.sizePrefix = sizePrefix;
228 this.bytes = bytes;
229 this.xmm = xmm;
230 }
231
232 public int getBytes() {
233 return bytes;
234 }
235
236 public boolean isXmmType() {
237 return xmm;
238 }
239
240 /**
241 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded
242 * as sign-extended 32-bit values.
243 *
244 * @param asm
245 * @param imm
246 */
247 protected void emitImmediate(AMD64Assembler asm, int imm) {
248 assert false;
249 }
250 }
251
252 /**
253 * Operand size and register type constraints.
254 */
255 private static enum OpAssertion {
256 ByteAssertion(CPU, CPU, BYTE),
257 IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD),
258 No16BitAssertion(CPU, CPU, DWORD, QWORD),
259 QwordOnlyAssertion(CPU, CPU, QWORD),
260 FloatingAssertion(XMM, XMM, SS, SD, PS, PD),
261 PackedFloatingAssertion(XMM, XMM, PS, PD),
262 SingleAssertion(XMM, XMM, SS),
263 DoubleAssertion(XMM, XMM, SD),
264 IntToFloatingAssertion(XMM, CPU, DWORD, QWORD),
265 FloatingToIntAssertion(CPU, XMM, DWORD, QWORD);
266
267 private final RegisterCategory resultCategory;
268 private final RegisterCategory inputCategory;
269 private final OperandSize[] allowedSizes;
270
271 private OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) {
272 this.resultCategory = resultCategory;
273 this.inputCategory = inputCategory;
274 this.allowedSizes = allowedSizes;
275 }
276
277 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) {
278 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op;
279 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op;
280
281 for (OperandSize s : allowedSizes) {
282 if (size == s) {
283 return true;
284 }
285 }
286
287 assert false : "invalid operand size " + size + " used in " + op;
288 return false;
289 }
290 }
291
292 /**
293 * The register to which {@link Register#Frame} and {@link Register#CallerFrame} are bound.
294 */
295 public final Register frameRegister;
296
297 /**
298 * Constructs an assembler for the AMD64 architecture.
299 *
300 * @param registerConfig the register configuration used to bind {@link Register#Frame} and
301 * {@link Register#CallerFrame} to physical registers. This value can be null if this
302 * assembler instance will not be used to assemble instructions using these logical
303 * registers.
304 */
305 public AMD64Assembler(TargetDescription target, RegisterConfig registerConfig) {
306 super(target);
307 this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister();
308 }
309
310 private boolean supports(CPUFeature feature) {
311 return ((AMD64) target.arch).getFeatures().contains(feature);
312 }
313
314 private static int encode(Register r) {
315 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
316 return r.encoding & 0x7;
317 }
318
319 /**
320 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a
321 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm
322 * field. The X bit must be 0.
323 */
324 protected static int getRXB(Register reg, Register rm) {
325 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
326 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3;
327 return rxb;
328 }
329
330 /**
331 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There
332 * are two cases for the memory operand:<br>
333 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.<br>
334 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base.
335 */
336 protected static int getRXB(Register reg, AMD64Address rm) {
337 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1;
338 if (!rm.getIndex().equals(Register.None)) {
339 rxb |= (rm.getIndex().encoding & 0x08) >> 2;
340 }
341 if (!rm.getBase().equals(Register.None)) {
342 rxb |= (rm.getBase().encoding & 0x08) >> 3;
343 }
344 return rxb;
345 }
346
347 /**
348 * Emit the ModR/M byte for one register operand and an opcode extension in the R field.
349 * <p>
350 * Format: [ 11 reg r/m ]
351 */
352 protected void emitModRM(int reg, Register rm) {
353 assert (reg & 0x07) == reg;
354 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07));
355 }
356
357 /**
358 * Emit the ModR/M byte for two register operands.
359 * <p>
360 * Format: [ 11 reg r/m ]
361 */
362 protected void emitModRM(Register reg, Register rm) {
363 emitModRM(reg.encoding & 0x07, rm);
364 }
365
366 /**
367 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand.
368 */
369 protected void emitOperandHelper(Register reg, AMD64Address addr) {
370 assert !reg.equals(Register.None);
371 emitOperandHelper(encode(reg), addr);
372 }
373
374 /**
375 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
376 * extension in the R field.
377 */
378 protected void emitOperandHelper(int reg, AMD64Address addr) {
379 assert (reg & 0x07) == reg;
380 int regenc = reg << 3;
381
382 Register base = addr.getBase();
383 Register index = addr.getIndex();
384
385 AMD64Address.Scale scale = addr.getScale();
386 int disp = addr.getDisplacement();
387
388 if (base.equals(Register.Frame)) {
389 assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration";
390 base = frameRegister;
391 }
392
393 if (base.equals(AMD64.rip)) { // also matches Placeholder
394 // [00 000 101] disp32
395 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
396 emitByte(0x05 | regenc);
397 emitInt(disp);
398 } else if (base.isValid()) {
399 int baseenc = base.isValid() ? encode(base) : 0;
400 if (index.isValid()) {
401 int indexenc = encode(index) << 3;
402 // [base + indexscale + disp]
403 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
404 // [base + indexscale]
405 // [00 reg 100][ss index base]
406 assert !index.equals(rsp) : "illegal addressing mode";
407 emitByte(0x04 | regenc);
408 emitByte(scale.log2 << 6 | indexenc | baseenc);
409 } else if (isByte(disp)) {
410 // [base + indexscale + imm8]
411 // [01 reg 100][ss index base] imm8
412 assert !index.equals(rsp) : "illegal addressing mode";
413 emitByte(0x44 | regenc);
414 emitByte(scale.log2 << 6 | indexenc | baseenc);
415 emitByte(disp & 0xFF);
416 } else {
417 // [base + indexscale + disp32]
418 // [10 reg 100][ss index base] disp32
419 assert !index.equals(rsp) : "illegal addressing mode";
420 emitByte(0x84 | regenc);
421 emitByte(scale.log2 << 6 | indexenc | baseenc);
422 emitInt(disp);
423 }
424 } else if (base.equals(rsp) || base.equals(r12)) {
425 // [rsp + disp]
426 if (disp == 0) {
427 // [rsp]
428 // [00 reg 100][00 100 100]
429 emitByte(0x04 | regenc);
430 emitByte(0x24);
431 } else if (isByte(disp)) {
432 // [rsp + imm8]
433 // [01 reg 100][00 100 100] disp8
434 emitByte(0x44 | regenc);
435 emitByte(0x24);
436 emitByte(disp & 0xFF);
437 } else {
438 // [rsp + imm32]
439 // [10 reg 100][00 100 100] disp32
440 emitByte(0x84 | regenc);
441 emitByte(0x24);
442 emitInt(disp);
443 }
444 } else {
445 // [base + disp]
446 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
447 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
448 // [base]
449 // [00 reg base]
450 emitByte(0x00 | regenc | baseenc);
451 } else if (isByte(disp)) {
452 // [base + disp8]
453 // [01 reg base] disp8
454 emitByte(0x40 | regenc | baseenc);
455 emitByte(disp & 0xFF);
456 } else {
457 // [base + disp32]
458 // [10 reg base] disp32
459 emitByte(0x80 | regenc | baseenc);
460 emitInt(disp);
461 }
462 }
463 } else {
464 if (index.isValid()) {
465 int indexenc = encode(index) << 3;
466 // [indexscale + disp]
467 // [00 reg 100][ss index 101] disp32
468 assert !index.equals(rsp) : "illegal addressing mode";
469 emitByte(0x04 | regenc);
470 emitByte(scale.log2 << 6 | indexenc | 0x05);
471 emitInt(disp);
472 } else {
473 // [disp] ABSOLUTE
474 // [00 reg 100][00 100 101] disp32
475 emitByte(0x04 | regenc);
476 emitByte(0x25);
477 emitInt(disp);
478 }
479 }
480 }
481
482 /**
483 * Base class for AMD64 opcodes.
484 */
485 public static class AMD64Op {
486
487 protected static final int P_0F = 0x0F;
488 protected static final int P_0F38 = 0x380F;
489 protected static final int P_0F3A = 0x3A0F;
490
491 private final String opcode;
492
493 private final int prefix1;
494 private final int prefix2;
495 private final int op;
496
497 private final boolean dstIsByte;
498 private final boolean srcIsByte;
499
500 private final OpAssertion assertion;
501 private final CPUFeature feature;
502
503 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
504 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature);
505 }
506
507 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
508 this.opcode = opcode;
509 this.prefix1 = prefix1;
510 this.prefix2 = prefix2;
511 this.op = op;
512
513 this.dstIsByte = dstIsByte;
514 this.srcIsByte = srcIsByte;
515
516 this.assertion = assertion;
517 this.feature = feature;
518 }
519
520 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) {
521 if (prefix1 != 0) {
522 asm.emitByte(prefix1);
523 }
524 if (size.sizePrefix != 0) {
525 asm.emitByte(size.sizePrefix);
526 }
527 int rexPrefix = 0x40 | rxb;
528 if (size == QWORD) {
529 rexPrefix |= 0x08;
530 }
531 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) {
532 asm.emitByte(rexPrefix);
533 }
534 if (prefix2 > 0xFF) {
535 asm.emitShort(prefix2);
536 } else if (prefix2 > 0) {
537 asm.emitByte(prefix2);
538 }
539 asm.emitByte(op);
540 }
541
542 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) {
543 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode);
544 assert assertion.checkOperands(this, size, resultReg, inputReg);
545 return true;
546 }
547
548 @Override
549 public String toString() {
550 return opcode;
551 }
552 }
553
554 /**
555 * Base class for AMD64 opcodes with immediate operands.
556 */
557 public static class AMD64ImmOp extends AMD64Op {
558
559 private final boolean immIsByte;
560
561 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
562 super(opcode, 0, prefix, op, assertion, null);
563 this.immIsByte = immIsByte;
564 }
565
566 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) {
567 if (immIsByte) {
568 assert imm == (byte) imm;
569 asm.emitByte(imm);
570 } else {
571 size.emitImmediate(asm, imm);
572 }
573 }
574 }
575
576 /**
577 * Opcode with operand order of either RM or MR.
578 */
579 public abstract static class AMD64RROp extends AMD64Op {
580
581 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
582 super(opcode, prefix1, prefix2, op, assertion, feature);
583 }
584
585 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) {
586 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature);
587 }
588
589 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src);
590 }
591
592 /**
593 * Opcode with operand order of RM.
594 */
595 public static class AMD64RMOp extends AMD64RROp {
596 // @formatter:off
597 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF);
598 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC);
599 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD);
600 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT);
601 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, CPUFeature.BMI1);
602 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, CPUFeature.LZCNT);
603 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion);
604 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.No16BitAssertion);
605 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion);
606 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.No16BitAssertion);
607 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordOnlyAssertion);
608 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion);
609 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B);
610
611 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix
612 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
613 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
614 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
615 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE);
616
617 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient.
618 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion);
619 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85);
620 // @formatter:on
621
622 protected AMD64RMOp(String opcode, int op) {
623 this(opcode, 0, op);
624 }
625
626 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) {
627 this(opcode, 0, op, assertion);
628 }
629
630 protected AMD64RMOp(String opcode, int prefix, int op) {
631 this(opcode, 0, prefix, op, null);
632 }
633
634 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) {
635 this(opcode, 0, prefix, op, assertion, null);
636 }
637
638 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
639 this(opcode, 0, prefix, op, assertion, feature);
640 }
641
642 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) {
643 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null);
644 }
645
646 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) {
647 this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature);
648 }
649
650 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
651 super(opcode, prefix1, prefix2, op, assertion, feature);
652 }
653
654 @Override
655 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
656 assert verify(asm, size, dst, src);
657 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
658 asm.emitModRM(dst, src);
659 }
660
661 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) {
662 assert verify(asm, size, dst, null);
663 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
664 asm.emitOperandHelper(dst, src);
665 }
666 }
667
668 /**
669 * Opcode with operand order of MR.
670 */
671 public static class AMD64MROp extends AMD64RROp {
672 // @formatter:off
673 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion);
674 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89);
675
676 // MOVD and MOVQ are the same opcode, just with different operand size prefix
677 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used.
678 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
679 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2);
680
681 // MOVSS and MOVSD are the same opcode, just with different operand size prefix
682 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
683 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE);
684 // @formatter:on
685
686 protected AMD64MROp(String opcode, int op) {
687 this(opcode, 0, op);
688 }
689
690 protected AMD64MROp(String opcode, int op, OpAssertion assertion) {
691 this(opcode, 0, op, assertion);
692 }
693
694 protected AMD64MROp(String opcode, int prefix, int op) {
695 this(opcode, prefix, op, OpAssertion.IntegerAssertion);
696 }
697
698 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) {
699 this(opcode, prefix, op, assertion, null);
700 }
701
702 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) {
703 this(opcode, 0, prefix, op, assertion, feature);
704 }
705
706 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) {
707 super(opcode, prefix1, prefix2, op, assertion, feature);
708 }
709
710 @Override
711 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) {
712 assert verify(asm, size, src, dst);
713 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding);
714 asm.emitModRM(src, dst);
715 }
716
717 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) {
718 assert verify(asm, size, null, src);
719 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0);
720 asm.emitOperandHelper(src, dst);
721 }
722 }
723
724 /**
725 * Opcodes with operand order of M.
726 */
727 public static class AMD64MOp extends AMD64Op {
728 // @formatter:off
729 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2);
730 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3);
731 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4);
732 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5);
733 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6);
734 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7);
735 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0);
736 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1);
737 // @formatter:on
738
739 private final int ext;
740
741 protected AMD64MOp(String opcode, int op, int ext) {
742 this(opcode, 0, op, ext);
743 }
744
745 protected AMD64MOp(String opcode, int prefix, int op, int ext) {
746 this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion);
747 }
748
749 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) {
750 super(opcode, 0, prefix, op, assertion, null);
751 this.ext = ext;
752 }
753
754 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) {
755 assert verify(asm, size, dst, null);
756 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
757 asm.emitModRM(ext, dst);
758 }
759
760 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) {
761 assert verify(asm, size, null, null);
762 emitOpcode(asm, size, getRXB(null, dst), 0, 0);
763 asm.emitOperandHelper(ext, dst);
764 }
765 }
766
767 /**
768 * Opcodes with operand order of MI.
769 */
770 public static class AMD64MIOp extends AMD64ImmOp {
771 // @formatter:off
772 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion);
773 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0);
774 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0);
775 // @formatter:on
776
777 private final int ext;
778
779 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) {
780 this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion);
781 }
782
783 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) {
784 this(opcode, immIsByte, 0, op, ext, assertion);
785 }
786
787 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) {
788 super(opcode, immIsByte, prefix, op, assertion);
789 this.ext = ext;
790 }
791
792 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) {
793 assert verify(asm, size, dst, null);
794 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding);
795 asm.emitModRM(ext, dst);
796 emitImmediate(asm, size, imm);
797 }
798
799 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) {
800 assert verify(asm, size, null, null);
801 emitOpcode(asm, size, getRXB(null, dst), 0, 0);
802 asm.emitOperandHelper(ext, dst);
803 emitImmediate(asm, size, imm);
804 }
805 }
806
807 /**
808 * Opcodes with operand order of RMI.
809 */
810 public static class AMD64RMIOp extends AMD64ImmOp {
811 // @formatter:off
812 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69);
813 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B);
814 // @formatter:on
815
816 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) {
817 this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion);
818 }
819
820 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) {
821 super(opcode, immIsByte, prefix, op, assertion);
822 }
823
824 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) {
825 assert verify(asm, size, dst, src);
826 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding);
827 asm.emitModRM(dst, src);
828 emitImmediate(asm, size, imm);
829 }
830
831 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) {
832 assert verify(asm, size, dst, null);
833 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0);
834 asm.emitOperandHelper(dst, src);
835 emitImmediate(asm, size, imm);
836 }
837 }
838
839 public static class SSEOp extends AMD64RMOp {
840 // @formatter:off
841 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
842 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SS", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion);
843 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
844 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion);
845 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatingAssertion);
846 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51);
847 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatingAssertion);
848 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatingAssertion);
849 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatingAssertion);
850 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatingAssertion);
851 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58);
852 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59);
853 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion);
854 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion);
855 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C);
856 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D);
857 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E);
858 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F);
859 // @formatter:on
860
861 protected SSEOp(String opcode, int prefix, int op) {
862 this(opcode, prefix, op, OpAssertion.FloatingAssertion);
863 }
864
865 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) {
866 this(opcode, 0, prefix, op, assertion);
867 }
868
869 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) {
870 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2);
871 }
872 }
873
874 /**
875 * Arithmetic operation with operand order of RM, MR or MI.
876 */
877 public static final class AMD64BinaryArithmetic {
878 // @formatter:off
879 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0);
880 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1);
881 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2);
882 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3);
883 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4);
884 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5);
885 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6);
886 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7);
887 // @formatter:on
888
889 private final AMD64MIOp byteImmOp;
890 private final AMD64MROp byteMrOp;
891 private final AMD64RMOp byteRmOp;
892
893 private final AMD64MIOp immOp;
894 private final AMD64MIOp immSxOp;
895 private final AMD64MROp mrOp;
896 private final AMD64RMOp rmOp;
897
898 private AMD64BinaryArithmetic(String opcode, int code) {
899 int baseOp = code << 3;
900
901 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion);
902 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion);
903 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion);
904
905 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion);
906 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion);
907 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion);
908 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion);
909 }
910
911 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) {
912 if (size == BYTE) {
913 return byteImmOp;
914 } else if (sx) {
915 return immSxOp;
916 } else {
917 return immOp;
918 }
919 }
920
921 public AMD64MROp getMROpcode(OperandSize size) {
922 if (size == BYTE) {
923 return byteMrOp;
924 } else {
925 return mrOp;
926 }
927 }
928
929 public AMD64RMOp getRMOpcode(OperandSize size) {
930 if (size == BYTE) {
931 return byteRmOp;
932 } else {
933 return rmOp;
934 }
935 }
936 }
937
938 /**
939 * Shift operation with operand order of M1, MC or MI.
940 */
941 public static final class AMD64Shift {
942 // @formatter:off
943 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0);
944 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1);
945 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2);
946 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3);
947 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4);
948 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5);
949 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7);
950 // @formatter:on
951
952 public final AMD64MOp m1Op;
953 public final AMD64MOp mcOp;
954 public final AMD64MIOp miOp;
955
956 private AMD64Shift(String opcode, int code) {
957 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion);
958 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion);
959 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion);
960 }
961 }
962
963 public final void addl(AMD64Address dst, int imm32) {
964 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
965 }
966
967 public final void addl(Register dst, int imm32) {
968 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
969 }
970
971 private void addrNop4() {
972 // 4 bytes: NOP DWORD PTR [EAX+0]
973 emitByte(0x0F);
974 emitByte(0x1F);
975 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
976 emitByte(0); // 8-bits offset (1 byte)
977 }
978
979 private void addrNop5() {
980 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
981 emitByte(0x0F);
982 emitByte(0x1F);
983 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
984 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
985 emitByte(0); // 8-bits offset (1 byte)
986 }
987
988 private void addrNop7() {
989 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
990 emitByte(0x0F);
991 emitByte(0x1F);
992 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
993 emitInt(0); // 32-bits offset (4 bytes)
994 }
995
996 private void addrNop8() {
997 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
998 emitByte(0x0F);
999 emitByte(0x1F);
1000 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
1001 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
1002 emitInt(0); // 32-bits offset (4 bytes)
1003 }
1004
1005 public final void andl(Register dst, int imm32) {
1006 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1007 }
1008
1009 public final void bswapl(Register reg) {
1010 int encode = prefixAndEncode(reg.encoding);
1011 emitByte(0x0F);
1012 emitByte(0xC8 | encode);
1013 }
1014
1015 public final void cdql() {
1016 emitByte(0x99);
1017 }
1018
1019 public final void cmovl(ConditionFlag cc, Register dst, Register src) {
1020 int encode = prefixAndEncode(dst.encoding, src.encoding);
1021 emitByte(0x0F);
1022 emitByte(0x40 | cc.getValue());
1023 emitByte(0xC0 | encode);
1024 }
1025
1026 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) {
1027 prefix(src, dst);
1028 emitByte(0x0F);
1029 emitByte(0x40 | cc.getValue());
1030 emitOperandHelper(dst, src);
1031 }
1032
1033 public final void cmpl(Register dst, int imm32) {
1034 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1035 }
1036
1037 public final void cmpl(Register dst, Register src) {
1038 CMP.rmOp.emit(this, DWORD, dst, src);
1039 }
1040
1041 public final void cmpl(Register dst, AMD64Address src) {
1042 CMP.rmOp.emit(this, DWORD, dst, src);
1043 }
1044
1045 public final void cmpl(AMD64Address dst, int imm32) {
1046 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1047 }
1048
1049 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
1050 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
1051 // The ZF is set if the compared values were equal, and cleared otherwise.
1052 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg
1053 prefix(adr, reg);
1054 emitByte(0x0F);
1055 emitByte(0xB1);
1056 emitOperandHelper(reg, adr);
1057 }
1058
1059 protected final void decl(AMD64Address dst) {
1060 prefix(dst);
1061 emitByte(0xFF);
1062 emitOperandHelper(1, dst);
1063 }
1064
1065 public final void hlt() {
1066 emitByte(0xF4);
1067 }
1068
1069 public final void imull(Register dst, Register src, int value) {
1070 if (isByte(value)) {
1071 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value);
1072 } else {
1073 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value);
1074 }
1075 }
1076
1077 protected final void incl(AMD64Address dst) {
1078 prefix(dst);
1079 emitByte(0xFF);
1080 emitOperandHelper(0, dst);
1081 }
1082
1083 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
1084 int shortSize = 2;
1085 int longSize = 6;
1086 long disp = jumpTarget - position();
1087 if (!forceDisp32 && isByte(disp - shortSize)) {
1088 // 0111 tttn #8-bit disp
1089 emitByte(0x70 | cc.getValue());
1090 emitByte((int) ((disp - shortSize) & 0xFF));
1091 } else {
1092 // 0000 1111 1000 tttn #32-bit disp
1093 assert isInt(disp - longSize) : "must be 32bit offset (call4)";
1094 emitByte(0x0F);
1095 emitByte(0x80 | cc.getValue());
1096 emitInt((int) (disp - longSize));
1097 }
1098 }
1099
1100 public final void jcc(ConditionFlag cc, Label l) {
1101 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc";
1102 if (l.isBound()) {
1103 jcc(cc, l.position(), false);
1104 } else {
1105 // Note: could eliminate cond. jumps to this jump if condition
1106 // is the same however, seems to be rather unlikely case.
1107 // Note: use jccb() if label to be bound is very close to get
1108 // an 8-bit displacement
1109 l.addPatchAt(position());
1110 emitByte(0x0F);
1111 emitByte(0x80 | cc.getValue());
1112 emitInt(0);
1113 }
1114
1115 }
1116
1117 public final void jccb(ConditionFlag cc, Label l) {
1118 if (l.isBound()) {
1119 int shortSize = 2;
1120 int entry = l.position();
1121 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp";
1122 long disp = entry - position();
1123 // 0111 tttn #8-bit disp
1124 emitByte(0x70 | cc.getValue());
1125 emitByte((int) ((disp - shortSize) & 0xFF));
1126 } else {
1127 l.addPatchAt(position());
1128 emitByte(0x70 | cc.getValue());
1129 emitByte(0);
1130 }
1131 }
1132
1133 public final void jmp(int jumpTarget, boolean forceDisp32) {
1134 int shortSize = 2;
1135 int longSize = 5;
1136 long disp = jumpTarget - position();
1137 if (!forceDisp32 && isByte(disp - shortSize)) {
1138 emitByte(0xEB);
1139 emitByte((int) ((disp - shortSize) & 0xFF));
1140 } else {
1141 emitByte(0xE9);
1142 emitInt((int) (disp - longSize));
1143 }
1144 }
1145
1146 @Override
1147 public final void jmp(Label l) {
1148 if (l.isBound()) {
1149 jmp(l.position(), false);
1150 } else {
1151 // By default, forward jumps are always 32-bit displacements, since
1152 // we can't yet know where the label will be bound. If you're sure that
1153 // the forward jump will not run beyond 256 bytes, use jmpb to
1154 // force an 8-bit displacement.
1155
1156 l.addPatchAt(position());
1157 emitByte(0xE9);
1158 emitInt(0);
1159 }
1160 }
1161
1162 public final void jmp(Register entry) {
1163 int encode = prefixAndEncode(entry.encoding);
1164 emitByte(0xFF);
1165 emitByte(0xE0 | encode);
1166 }
1167
1168 public final void jmpb(Label l) {
1169 if (l.isBound()) {
1170 int shortSize = 2;
1171 int entry = l.position();
1172 assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp";
1173 long offs = entry - position();
1174 emitByte(0xEB);
1175 emitByte((int) ((offs - shortSize) & 0xFF));
1176 } else {
1177
1178 l.addPatchAt(position());
1179 emitByte(0xEB);
1180 emitByte(0);
1181 }
1182 }
1183
1184 public final void leaq(Register dst, AMD64Address src) {
1185 prefixq(src, dst);
1186 emitByte(0x8D);
1187 emitOperandHelper(dst, src);
1188 }
1189
1190 public final void leave() {
1191 emitByte(0xC9);
1192 }
1193
1194 public final void lock() {
1195 emitByte(0xF0);
1196 }
1197
1198 public final void movapd(Register dst, Register src) {
1199 assert dst.getRegisterCategory().equals(AMD64.XMM);
1200 assert src.getRegisterCategory().equals(AMD64.XMM);
1201 int dstenc = dst.encoding;
1202 int srcenc = src.encoding;
1203 emitByte(0x66);
1204 if (dstenc < 8) {
1205 if (srcenc >= 8) {
1206 emitByte(Prefix.REXB);
1207 srcenc -= 8;
1208 }
1209 } else {
1210 if (srcenc < 8) {
1211 emitByte(Prefix.REXR);
1212 } else {
1213 emitByte(Prefix.REXRB);
1214 srcenc -= 8;
1215 }
1216 dstenc -= 8;
1217 }
1218 emitByte(0x0F);
1219 emitByte(0x28);
1220 emitByte(0xC0 | dstenc << 3 | srcenc);
1221 }
1222
1223 public final void movaps(Register dst, Register src) {
1224 assert dst.getRegisterCategory().equals(AMD64.XMM);
1225 assert src.getRegisterCategory().equals(AMD64.XMM);
1226 int dstenc = dst.encoding;
1227 int srcenc = src.encoding;
1228 if (dstenc < 8) {
1229 if (srcenc >= 8) {
1230 emitByte(Prefix.REXB);
1231 srcenc -= 8;
1232 }
1233 } else {
1234 if (srcenc < 8) {
1235 emitByte(Prefix.REXR);
1236 } else {
1237 emitByte(Prefix.REXRB);
1238 srcenc -= 8;
1239 }
1240 dstenc -= 8;
1241 }
1242 emitByte(0x0F);
1243 emitByte(0x28);
1244 emitByte(0xC0 | dstenc << 3 | srcenc);
1245 }
1246
1247 public final void movb(AMD64Address dst, int imm8) {
1248 prefix(dst);
1249 emitByte(0xC6);
1250 emitOperandHelper(0, dst);
1251 emitByte(imm8);
1252 }
1253
1254 public final void movb(AMD64Address dst, Register src) {
1255 assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register";
1256 prefix(dst, src, true);
1257 emitByte(0x88);
1258 emitOperandHelper(src, dst);
1259 }
1260
1261 public final void movl(Register dst, int imm32) {
1262 int encode = prefixAndEncode(dst.encoding);
1263 emitByte(0xB8 | encode);
1264 emitInt(imm32);
1265 }
1266
1267 public final void movl(Register dst, Register src) {
1268 int encode = prefixAndEncode(dst.encoding, src.encoding);
1269 emitByte(0x8B);
1270 emitByte(0xC0 | encode);
1271 }
1272
1273 public final void movl(Register dst, AMD64Address src) {
1274 prefix(src, dst);
1275 emitByte(0x8B);
1276 emitOperandHelper(dst, src);
1277 }
1278
1279 public final void movl(AMD64Address dst, int imm32) {
1280 prefix(dst);
1281 emitByte(0xC7);
1282 emitOperandHelper(0, dst);
1283 emitInt(imm32);
1284 }
1285
1286 public final void movl(AMD64Address dst, Register src) {
1287 prefix(dst, src);
1288 emitByte(0x89);
1289 emitOperandHelper(src, dst);
1290 }
1291
1292 /**
1293 * New CPUs require use of movsd and movss to avoid partial register stall when loading from
1294 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in
1295 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and
1296 * {@link AMD64MacroAssembler#movflt(Register, Register)}.
1297 */
1298 public final void movlpd(Register dst, AMD64Address src) {
1299 assert dst.getRegisterCategory().equals(AMD64.XMM);
1300 emitByte(0x66);
1301 prefix(src, dst);
1302 emitByte(0x0F);
1303 emitByte(0x12);
1304 emitOperandHelper(dst, src);
1305 }
1306
1307 public final void movq(Register dst, AMD64Address src) {
1308 if (dst.getRegisterCategory().equals(AMD64.XMM)) {
1309 emitByte(0xF3);
1310 prefixq(src, dst);
1311 emitByte(0x0F);
1312 emitByte(0x7E);
1313 emitOperandHelper(dst, src);
1314 } else {
1315 prefixq(src, dst);
1316 emitByte(0x8B);
1317 emitOperandHelper(dst, src);
1318 }
1319 }
1320
1321 public final void movq(Register dst, Register src) {
1322 int encode = prefixqAndEncode(dst.encoding, src.encoding);
1323 emitByte(0x8B);
1324 emitByte(0xC0 | encode);
1325 }
1326
1327 public final void movq(AMD64Address dst, Register src) {
1328 if (src.getRegisterCategory().equals(AMD64.XMM)) {
1329 emitByte(0x66);
1330 prefixq(dst, src);
1331 emitByte(0x0F);
1332 emitByte(0xD6);
1333 emitOperandHelper(src, dst);
1334 } else {
1335 prefixq(dst, src);
1336 emitByte(0x89);
1337 emitOperandHelper(src, dst);
1338 }
1339 }
1340
1341 public final void movsbl(Register dst, AMD64Address src) {
1342 prefix(src, dst);
1343 emitByte(0x0F);
1344 emitByte(0xBE);
1345 emitOperandHelper(dst, src);
1346 }
1347
1348 public final void movsbl(Register dst, Register src) {
1349 int encode = prefixAndEncode(dst.encoding, false, src.encoding, true);
1350 emitByte(0x0F);
1351 emitByte(0xBE);
1352 emitByte(0xC0 | encode);
1353 }
1354
1355 public final void movsbq(Register dst, AMD64Address src) {
1356 prefixq(src, dst);
1357 emitByte(0x0F);
1358 emitByte(0xBE);
1359 emitOperandHelper(dst, src);
1360 }
1361
1362 public final void movsbq(Register dst, Register src) {
1363 int encode = prefixqAndEncode(dst.encoding, src.encoding);
1364 emitByte(0x0F);
1365 emitByte(0xBE);
1366 emitByte(0xC0 | encode);
1367 }
1368
1369 public final void movsd(Register dst, Register src) {
1370 assert dst.getRegisterCategory().equals(AMD64.XMM);
1371 assert src.getRegisterCategory().equals(AMD64.XMM);
1372 emitByte(0xF2);
1373 int encode = prefixAndEncode(dst.encoding, src.encoding);
1374 emitByte(0x0F);
1375 emitByte(0x10);
1376 emitByte(0xC0 | encode);
1377 }
1378
1379 public final void movsd(Register dst, AMD64Address src) {
1380 assert dst.getRegisterCategory().equals(AMD64.XMM);
1381 emitByte(0xF2);
1382 prefix(src, dst);
1383 emitByte(0x0F);
1384 emitByte(0x10);
1385 emitOperandHelper(dst, src);
1386 }
1387
1388 public final void movsd(AMD64Address dst, Register src) {
1389 assert src.getRegisterCategory().equals(AMD64.XMM);
1390 emitByte(0xF2);
1391 prefix(dst, src);
1392 emitByte(0x0F);
1393 emitByte(0x11);
1394 emitOperandHelper(src, dst);
1395 }
1396
1397 public final void movss(Register dst, Register src) {
1398 assert dst.getRegisterCategory().equals(AMD64.XMM);
1399 assert src.getRegisterCategory().equals(AMD64.XMM);
1400 emitByte(0xF3);
1401 int encode = prefixAndEncode(dst.encoding, src.encoding);
1402 emitByte(0x0F);
1403 emitByte(0x10);
1404 emitByte(0xC0 | encode);
1405 }
1406
1407 public final void movss(Register dst, AMD64Address src) {
1408 assert dst.getRegisterCategory().equals(AMD64.XMM);
1409 emitByte(0xF3);
1410 prefix(src, dst);
1411 emitByte(0x0F);
1412 emitByte(0x10);
1413 emitOperandHelper(dst, src);
1414 }
1415
1416 public final void movss(AMD64Address dst, Register src) {
1417 assert src.getRegisterCategory().equals(AMD64.XMM);
1418 emitByte(0xF3);
1419 prefix(dst, src);
1420 emitByte(0x0F);
1421 emitByte(0x11);
1422 emitOperandHelper(src, dst);
1423 }
1424
1425 public final void movswl(Register dst, AMD64Address src) {
1426 prefix(src, dst);
1427 emitByte(0x0F);
1428 emitByte(0xBF);
1429 emitOperandHelper(dst, src);
1430 }
1431
1432 public final void movw(AMD64Address dst, int imm16) {
1433 emitByte(0x66); // switch to 16-bit mode
1434 prefix(dst);
1435 emitByte(0xC7);
1436 emitOperandHelper(0, dst);
1437 emitShort(imm16);
1438 }
1439
1440 public final void movw(AMD64Address dst, Register src) {
1441 emitByte(0x66);
1442 prefix(dst, src);
1443 emitByte(0x89);
1444 emitOperandHelper(src, dst);
1445 }
1446
1447 public final void movzbl(Register dst, AMD64Address src) {
1448 prefix(src, dst);
1449 emitByte(0x0F);
1450 emitByte(0xB6);
1451 emitOperandHelper(dst, src);
1452 }
1453
1454 public final void movzwl(Register dst, AMD64Address src) {
1455 prefix(src, dst);
1456 emitByte(0x0F);
1457 emitByte(0xB7);
1458 emitOperandHelper(dst, src);
1459 }
1460
1461 @Override
1462 public final void ensureUniquePC() {
1463 nop();
1464 }
1465
1466 public final void nop() {
1467 nop(1);
1468 }
1469
1470 public void nop(int count) {
1471 int i = count;
1472 if (UseNormalNop) {
1473 assert i > 0 : " ";
1474 // The fancy nops aren't currently recognized by debuggers making it a
1475 // pain to disassemble code while debugging. If assert are on clearly
1476 // speed is not an issue so simply use the single byte traditional nop
1477 // to do alignment.
1478
1479 for (; i > 0; i--) {
1480 emitByte(0x90);
1481 }
1482 return;
1483 }
1484
1485 if (UseAddressNop) {
1486 //
1487 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
1488 // 1: 0x90
1489 // 2: 0x66 0x90
1490 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1491 // 4: 0x0F 0x1F 0x40 0x00
1492 // 5: 0x0F 0x1F 0x44 0x00 0x00
1493 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1494 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1495 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1496 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1497 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1498 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1499
1500 // The rest coding is AMD specific - use consecutive Address nops
1501
1502 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
1503 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
1504 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1505 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1506 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1507 // Size prefixes (0x66) are added for larger sizes
1508
1509 while (i >= 22) {
1510 i -= 11;
1511 emitByte(0x66); // size prefix
1512 emitByte(0x66); // size prefix
1513 emitByte(0x66); // size prefix
1514 addrNop8();
1515 }
1516 // Generate first nop for size between 21-12
1517 switch (i) {
1518 case 21:
1519 i -= 1;
1520 emitByte(0x66); // size prefix
1521 // fall through
1522 case 20:
1523 // fall through
1524 case 19:
1525 i -= 1;
1526 emitByte(0x66); // size prefix
1527 // fall through
1528 case 18:
1529 // fall through
1530 case 17:
1531 i -= 1;
1532 emitByte(0x66); // size prefix
1533 // fall through
1534 case 16:
1535 // fall through
1536 case 15:
1537 i -= 8;
1538 addrNop8();
1539 break;
1540 case 14:
1541 case 13:
1542 i -= 7;
1543 addrNop7();
1544 break;
1545 case 12:
1546 i -= 6;
1547 emitByte(0x66); // size prefix
1548 addrNop5();
1549 break;
1550 default:
1551 assert i < 12;
1552 }
1553
1554 // Generate second nop for size between 11-1
1555 switch (i) {
1556 case 11:
1557 emitByte(0x66); // size prefix
1558 emitByte(0x66); // size prefix
1559 emitByte(0x66); // size prefix
1560 addrNop8();
1561 break;
1562 case 10:
1563 emitByte(0x66); // size prefix
1564 emitByte(0x66); // size prefix
1565 addrNop8();
1566 break;
1567 case 9:
1568 emitByte(0x66); // size prefix
1569 addrNop8();
1570 break;
1571 case 8:
1572 addrNop8();
1573 break;
1574 case 7:
1575 addrNop7();
1576 break;
1577 case 6:
1578 emitByte(0x66); // size prefix
1579 addrNop5();
1580 break;
1581 case 5:
1582 addrNop5();
1583 break;
1584 case 4:
1585 addrNop4();
1586 break;
1587 case 3:
1588 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
1589 emitByte(0x66); // size prefix
1590 emitByte(0x66); // size prefix
1591 emitByte(0x90); // nop
1592 break;
1593 case 2:
1594 emitByte(0x66); // size prefix
1595 emitByte(0x90); // nop
1596 break;
1597 case 1:
1598 emitByte(0x90); // nop
1599 break;
1600 default:
1601 assert i == 0;
1602 }
1603 return;
1604 }
1605
1606 // Using nops with size prefixes "0x66 0x90".
1607 // From AMD Optimization Guide:
1608 // 1: 0x90
1609 // 2: 0x66 0x90
1610 // 3: 0x66 0x66 0x90
1611 // 4: 0x66 0x66 0x66 0x90
1612 // 5: 0x66 0x66 0x90 0x66 0x90
1613 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
1614 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
1615 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
1616 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
1617 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
1618 //
1619 while (i > 12) {
1620 i -= 4;
1621 emitByte(0x66); // size prefix
1622 emitByte(0x66);
1623 emitByte(0x66);
1624 emitByte(0x90); // nop
1625 }
1626 // 1 - 12 nops
1627 if (i > 8) {
1628 if (i > 9) {
1629 i -= 1;
1630 emitByte(0x66);
1631 }
1632 i -= 3;
1633 emitByte(0x66);
1634 emitByte(0x66);
1635 emitByte(0x90);
1636 }
1637 // 1 - 8 nops
1638 if (i > 4) {
1639 if (i > 6) {
1640 i -= 1;
1641 emitByte(0x66);
1642 }
1643 i -= 3;
1644 emitByte(0x66);
1645 emitByte(0x66);
1646 emitByte(0x90);
1647 }
1648 switch (i) {
1649 case 4:
1650 emitByte(0x66);
1651 emitByte(0x66);
1652 emitByte(0x66);
1653 emitByte(0x90);
1654 break;
1655 case 3:
1656 emitByte(0x66);
1657 emitByte(0x66);
1658 emitByte(0x90);
1659 break;
1660 case 2:
1661 emitByte(0x66);
1662 emitByte(0x90);
1663 break;
1664 case 1:
1665 emitByte(0x90);
1666 break;
1667 default:
1668 assert i == 0;
1669 }
1670 }
1671
1672 public final void pop(Register dst) {
1673 int encode = prefixAndEncode(dst.encoding);
1674 emitByte(0x58 | encode);
1675 }
1676
1677 public void popfq() {
1678 emitByte(0x9D);
1679 }
1680
1681 public final void ptest(Register dst, Register src) {
1682 assert supports(CPUFeature.SSE4_1);
1683 emitByte(0x66);
1684 int encode = prefixAndEncode(dst.encoding, src.encoding);
1685 emitByte(0x0F);
1686 emitByte(0x38);
1687 emitByte(0x17);
1688 emitByte(0xC0 | encode);
1689 }
1690
1691 public final void push(Register src) {
1692 int encode = prefixAndEncode(src.encoding);
1693 emitByte(0x50 | encode);
1694 }
1695
1696 public void pushfq() {
1697 emitByte(0x9c);
1698 }
1699
1700 public final void pxor(Register dst, Register src) {
1701 emitByte(0x66);
1702 int encode = prefixAndEncode(dst.encoding, src.encoding);
1703 emitByte(0x0F);
1704 emitByte(0xEF);
1705 emitByte(0xC0 | encode);
1706 }
1707
1708 public final void ret(int imm16) {
1709 if (imm16 == 0) {
1710 emitByte(0xC3);
1711 } else {
1712 emitByte(0xC2);
1713 emitShort(imm16);
1714 }
1715 }
1716
1717 public final void subl(AMD64Address dst, int imm32) {
1718 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1719 }
1720
1721 public final void subl(Register dst, int imm32) {
1722 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32);
1723 }
1724
1725 public final void testl(Register dst, int imm32) {
1726 // not using emitArith because test
1727 // doesn't support sign-extension of
1728 // 8bit operands
1729 int encode = dst.encoding;
1730 if (encode == 0) {
1731 emitByte(0xA9);
1732 } else {
1733 encode = prefixAndEncode(encode);
1734 emitByte(0xF7);
1735 emitByte(0xC0 | encode);
1736 }
1737 emitInt(imm32);
1738 }
1739
1740 public final void testl(Register dst, Register src) {
1741 int encode = prefixAndEncode(dst.encoding, src.encoding);
1742 emitByte(0x85);
1743 emitByte(0xC0 | encode);
1744 }
1745
1746 public final void testl(Register dst, AMD64Address src) {
1747 prefix(src, dst);
1748 emitByte(0x85);
1749 emitOperandHelper(dst, src);
1750 }
1751
1752 public final void xorl(Register dst, Register src) {
1753 XOR.rmOp.emit(this, DWORD, dst, src);
1754 }
1755
1756 public final void xorpd(Register dst, Register src) {
1757 emitByte(0x66);
1758 xorps(dst, src);
1759 }
1760
1761 public final void xorps(Register dst, Register src) {
1762 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM);
1763 int encode = prefixAndEncode(dst.encoding, src.encoding);
1764 emitByte(0x0F);
1765 emitByte(0x57);
1766 emitByte(0xC0 | encode);
1767 }
1768
1769 protected final void decl(Register dst) {
1770 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
1771 int encode = prefixAndEncode(dst.encoding);
1772 emitByte(0xFF);
1773 emitByte(0xC8 | encode);
1774 }
1775
1776 protected final void incl(Register dst) {
1777 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
1778 int encode = prefixAndEncode(dst.encoding);
1779 emitByte(0xFF);
1780 emitByte(0xC0 | encode);
1781 }
1782
1783 private int prefixAndEncode(int regEnc) {
1784 return prefixAndEncode(regEnc, false);
1785 }
1786
1787 private int prefixAndEncode(int regEnc, boolean byteinst) {
1788 if (regEnc >= 8) {
1789 emitByte(Prefix.REXB);
1790 return regEnc - 8;
1791 } else if (byteinst && regEnc >= 4) {
1792 emitByte(Prefix.REX);
1793 }
1794 return regEnc;
1795 }
1796
1797 private int prefixqAndEncode(int regEnc) {
1798 if (regEnc < 8) {
1799 emitByte(Prefix.REXW);
1800 return regEnc;
1801 } else {
1802 emitByte(Prefix.REXWB);
1803 return regEnc - 8;
1804 }
1805 }
1806
1807 private int prefixAndEncode(int dstEnc, int srcEnc) {
1808 return prefixAndEncode(dstEnc, false, srcEnc, false);
1809 }
1810
1811 private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) {
1812 int srcEnc = srcEncoding;
1813 int dstEnc = dstEncoding;
1814 if (dstEnc < 8) {
1815 if (srcEnc >= 8) {
1816 emitByte(Prefix.REXB);
1817 srcEnc -= 8;
1818 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) {
1819 emitByte(Prefix.REX);
1820 }
1821 } else {
1822 if (srcEnc < 8) {
1823 emitByte(Prefix.REXR);
1824 } else {
1825 emitByte(Prefix.REXRB);
1826 srcEnc -= 8;
1827 }
1828 dstEnc -= 8;
1829 }
1830 return dstEnc << 3 | srcEnc;
1831 }
1832
1833 /**
1834 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand
1835 * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix.
1836 *
1837 * @param regEncoding the encoding of the register part of the ModRM-Byte
1838 * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
1839 * @return the lower 6 bits of the ModRM-Byte that should be emitted
1840 */
1841 private int prefixqAndEncode(int regEncoding, int rmEncoding) {
1842 int rmEnc = rmEncoding;
1843 int regEnc = regEncoding;
1844 if (regEnc < 8) {
1845 if (rmEnc < 8) {
1846 emitByte(Prefix.REXW);
1847 } else {
1848 emitByte(Prefix.REXWB);
1849 rmEnc -= 8;
1850 }
1851 } else {
1852 if (rmEnc < 8) {
1853 emitByte(Prefix.REXWR);
1854 } else {
1855 emitByte(Prefix.REXWRB);
1856 rmEnc -= 8;
1857 }
1858 regEnc -= 8;
1859 }
1860 return regEnc << 3 | rmEnc;
1861 }
1862
1863 private static boolean needsRex(Register reg) {
1864 return reg.encoding >= MinEncodingNeedsRex;
1865 }
1866
1867 private void prefix(AMD64Address adr) {
1868 if (needsRex(adr.getBase())) {
1869 if (needsRex(adr.getIndex())) {
1870 emitByte(Prefix.REXXB);
1871 } else {
1872 emitByte(Prefix.REXB);
1873 }
1874 } else {
1875 if (needsRex(adr.getIndex())) {
1876 emitByte(Prefix.REXX);
1877 }
1878 }
1879 }
1880
1881 private void prefixq(AMD64Address adr) {
1882 if (needsRex(adr.getBase())) {
1883 if (needsRex(adr.getIndex())) {
1884 emitByte(Prefix.REXWXB);
1885 } else {
1886 emitByte(Prefix.REXWB);
1887 }
1888 } else {
1889 if (needsRex(adr.getIndex())) {
1890 emitByte(Prefix.REXWX);
1891 } else {
1892 emitByte(Prefix.REXW);
1893 }
1894 }
1895 }
1896
1897 private void prefix(AMD64Address adr, Register reg) {
1898 prefix(adr, reg, false);
1899 }
1900
1901 private void prefix(AMD64Address adr, Register reg, boolean byteinst) {
1902 if (reg.encoding < 8) {
1903 if (needsRex(adr.getBase())) {
1904 if (needsRex(adr.getIndex())) {
1905 emitByte(Prefix.REXXB);
1906 } else {
1907 emitByte(Prefix.REXB);
1908 }
1909 } else {
1910 if (needsRex(adr.getIndex())) {
1911 emitByte(Prefix.REXX);
1912 } else if (byteinst && reg.encoding >= 4) {
1913 emitByte(Prefix.REX);
1914 }
1915 }
1916 } else {
1917 if (needsRex(adr.getBase())) {
1918 if (needsRex(adr.getIndex())) {
1919 emitByte(Prefix.REXRXB);
1920 } else {
1921 emitByte(Prefix.REXRB);
1922 }
1923 } else {
1924 if (needsRex(adr.getIndex())) {
1925 emitByte(Prefix.REXRX);
1926 } else {
1927 emitByte(Prefix.REXR);
1928 }
1929 }
1930 }
1931 }
1932
1933 private void prefixq(AMD64Address adr, Register src) {
1934 if (src.encoding < 8) {
1935 if (needsRex(adr.getBase())) {
1936 if (needsRex(adr.getIndex())) {
1937 emitByte(Prefix.REXWXB);
1938 } else {
1939 emitByte(Prefix.REXWB);
1940 }
1941 } else {
1942 if (needsRex(adr.getIndex())) {
1943 emitByte(Prefix.REXWX);
1944 } else {
1945 emitByte(Prefix.REXW);
1946 }
1947 }
1948 } else {
1949 if (needsRex(adr.getBase())) {
1950 if (needsRex(adr.getIndex())) {
1951 emitByte(Prefix.REXWRXB);
1952 } else {
1953 emitByte(Prefix.REXWRB);
1954 }
1955 } else {
1956 if (needsRex(adr.getIndex())) {
1957 emitByte(Prefix.REXWRX);
1958 } else {
1959 emitByte(Prefix.REXWR);
1960 }
1961 }
1962 }
1963 }
1964
1965 public final void addq(Register dst, int imm32) {
1966 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
1967 }
1968
1969 public final void addq(AMD64Address dst, int imm32) {
1970 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
1971 }
1972
1973 public final void addq(Register dst, Register src) {
1974 ADD.rmOp.emit(this, QWORD, dst, src);
1975 }
1976
1977 public final void addq(AMD64Address dst, Register src) {
1978 ADD.mrOp.emit(this, QWORD, dst, src);
1979 }
1980
1981 public final void andq(Register dst, int imm32) {
1982 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
1983 }
1984
1985 public final void bswapq(Register reg) {
1986 int encode = prefixqAndEncode(reg.encoding);
1987 emitByte(0x0F);
1988 emitByte(0xC8 | encode);
1989 }
1990
1991 public final void cdqq() {
1992 emitByte(Prefix.REXW);
1993 emitByte(0x99);
1994 }
1995
1996 public final void cmovq(ConditionFlag cc, Register dst, Register src) {
1997 int encode = prefixqAndEncode(dst.encoding, src.encoding);
1998 emitByte(0x0F);
1999 emitByte(0x40 | cc.getValue());
2000 emitByte(0xC0 | encode);
2001 }
2002
2003 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) {
2004 prefixq(src, dst);
2005 emitByte(0x0F);
2006 emitByte(0x40 | cc.getValue());
2007 emitOperandHelper(dst, src);
2008 }
2009
2010 public final void cmpq(Register dst, int imm32) {
2011 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2012 }
2013
2014 public final void cmpq(Register dst, Register src) {
2015 CMP.rmOp.emit(this, QWORD, dst, src);
2016 }
2017
2018 public final void cmpq(Register dst, AMD64Address src) {
2019 CMP.rmOp.emit(this, QWORD, dst, src);
2020 }
2021
2022 public final void cmpxchgq(Register reg, AMD64Address adr) {
2023 prefixq(adr, reg);
2024 emitByte(0x0F);
2025 emitByte(0xB1);
2026 emitOperandHelper(reg, adr);
2027 }
2028
2029 protected final void decq(Register dst) {
2030 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2031 int encode = prefixqAndEncode(dst.encoding);
2032 emitByte(0xFF);
2033 emitByte(0xC8 | encode);
2034 }
2035
2036 public final void decq(AMD64Address dst) {
2037 DEC.emit(this, QWORD, dst);
2038 }
2039
2040 public final void incq(Register dst) {
2041 // Don't use it directly. Use Macroincrementq() instead.
2042 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2043 int encode = prefixqAndEncode(dst.encoding);
2044 emitByte(0xFF);
2045 emitByte(0xC0 | encode);
2046 }
2047
2048 public final void incq(AMD64Address dst) {
2049 INC.emit(this, QWORD, dst);
2050 }
2051
2052 public final void movq(Register dst, long imm64) {
2053 int encode = prefixqAndEncode(dst.encoding);
2054 emitByte(0xB8 | encode);
2055 emitLong(imm64);
2056 }
2057
2058 public final void movslq(Register dst, int imm32) {
2059 int encode = prefixqAndEncode(dst.encoding);
2060 emitByte(0xC7);
2061 emitByte(0xC0 | encode);
2062 emitInt(imm32);
2063 }
2064
2065 public final void movdq(Register dst, Register src) {
2066
2067 // table D-1 says MMX/SSE2
2068 emitByte(0x66);
2069
2070 if (dst.getRegisterCategory().equals(AMD64.XMM)) {
2071 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2072 emitByte(0x0F);
2073 emitByte(0x6E);
2074 emitByte(0xC0 | encode);
2075 } else if (src.getRegisterCategory().equals(AMD64.XMM)) {
2076
2077 // swap src/dst to get correct prefix
2078 int encode = prefixqAndEncode(src.encoding, dst.encoding);
2079 emitByte(0x0F);
2080 emitByte(0x7E);
2081 emitByte(0xC0 | encode);
2082 } else {
2083 throw new InternalError("should not reach here");
2084 }
2085 }
2086
2087 public final void movdqu(Register dst, AMD64Address src) {
2088 emitByte(0xF3);
2089 prefix(src, dst);
2090 emitByte(0x0F);
2091 emitByte(0x6F);
2092 emitOperandHelper(dst, src);
2093 }
2094
2095 public final void movslq(AMD64Address dst, int imm32) {
2096 prefixq(dst);
2097 emitByte(0xC7);
2098 emitOperandHelper(0, dst);
2099 emitInt(imm32);
2100 }
2101
2102 public final void movslq(Register dst, AMD64Address src) {
2103 prefixq(src, dst);
2104 emitByte(0x63);
2105 emitOperandHelper(dst, src);
2106 }
2107
2108 public final void movslq(Register dst, Register src) {
2109 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2110 emitByte(0x63);
2111 emitByte(0xC0 | encode);
2112 }
2113
2114 public final void negq(Register dst) {
2115 int encode = prefixqAndEncode(dst.encoding);
2116 emitByte(0xF7);
2117 emitByte(0xD8 | encode);
2118 }
2119
2120 public final void shlq(Register dst, int imm8) {
2121 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2122 int encode = prefixqAndEncode(dst.encoding);
2123 if (imm8 == 1) {
2124 emitByte(0xD1);
2125 emitByte(0xE0 | encode);
2126 } else {
2127 emitByte(0xC1);
2128 emitByte(0xE0 | encode);
2129 emitByte(imm8);
2130 }
2131 }
2132
2133 public final void shrq(Register dst, int imm8) {
2134 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2135 int encode = prefixqAndEncode(dst.encoding);
2136 if (imm8 == 1) {
2137 emitByte(0xD1);
2138 emitByte(0xE8 | encode);
2139 } else {
2140 emitByte(0xC1);
2141 emitByte(0xE8 | encode);
2142 emitByte(imm8);
2143 }
2144 }
2145
2146 public final void subq(Register dst, int imm32) {
2147 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2148 }
2149
2150 public final void subq(AMD64Address dst, int imm32) {
2151 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32);
2152 }
2153
2154 public final void subqWide(Register dst, int imm32) {
2155 // don't use the sign-extending version, forcing a 32-bit immediate
2156 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32);
2157 }
2158
2159 public final void subq(Register dst, Register src) {
2160 SUB.rmOp.emit(this, QWORD, dst, src);
2161 }
2162
2163 public final void testq(Register dst, Register src) {
2164 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2165 emitByte(0x85);
2166 emitByte(0xC0 | encode);
2167 }
2168
2169 public final void xaddl(AMD64Address dst, Register src) {
2170 prefix(dst, src);
2171 emitByte(0x0F);
2172 emitByte(0xC1);
2173 emitOperandHelper(src, dst);
2174 }
2175
2176 public final void xaddq(AMD64Address dst, Register src) {
2177 prefixq(dst, src);
2178 emitByte(0x0F);
2179 emitByte(0xC1);
2180 emitOperandHelper(src, dst);
2181 }
2182
2183 public final void xchgl(Register dst, AMD64Address src) {
2184 prefix(src, dst);
2185 emitByte(0x87);
2186 emitOperandHelper(dst, src);
2187 }
2188
2189 public final void xchgq(Register dst, AMD64Address src) {
2190 prefixq(src, dst);
2191 emitByte(0x87);
2192 emitOperandHelper(dst, src);
2193 }
2194
2195 public final void membar(int barriers) {
2196 if (target.isMP) {
2197 // We only have to handle StoreLoad
2198 if ((barriers & STORE_LOAD) != 0) {
2199 // All usable chips support "locked" instructions which suffice
2200 // as barriers, and are much faster than the alternative of
2201 // using cpuid instruction. We use here a locked add [rsp],0.
2202 // This is conveniently otherwise a no-op except for blowing
2203 // flags.
2204 // Any change to this code may need to revisit other places in
2205 // the code where this idiom is used, in particular the
2206 // orderAccess code.
2207 lock();
2208 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here
2209 }
2210 }
2211 }
2212
2213 @Override
2214 protected final void patchJumpTarget(int branch, int branchTarget) {
2215 int op = getByte(branch);
2216 assert op == 0xE8 // call
2217 ||
2218 op == 0x00 // jump table entry
2219 || op == 0xE9 // jmp
2220 || op == 0xEB // short jmp
2221 || (op & 0xF0) == 0x70 // short jcc
2222 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc
2223 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
2224
2225 if (op == 0x00) {
2226 int offsetToJumpTableBase = getShort(branch + 1);
2227 int jumpTableBase = branch - offsetToJumpTableBase;
2228 int imm32 = branchTarget - jumpTableBase;
2229 emitInt(imm32, branch);
2230 } else if (op == 0xEB || (op & 0xF0) == 0x70) {
2231
2232 // short offset operators (jmp and jcc)
2233 final int imm8 = branchTarget - (branch + 2);
2234 /*
2235 * Since a wrongly patched short branch can potentially lead to working but really bad
2236 * behaving code we should always fail with an exception instead of having an assert.
2237 */
2238 if (!NumUtil.isByte(imm8)) {
2239 throw new InternalError("branch displacement out of range: " + imm8);
2240 }
2241 emitByte(imm8, branch + 1);
2242
2243 } else {
2244
2245 int off = 1;
2246 if (op == 0x0F) {
2247 off = 2;
2248 }
2249
2250 int imm32 = branchTarget - (branch + 4 + off);
2251 emitInt(imm32, branch + off);
2252 }
2253 }
2254
2255 public void nullCheck(AMD64Address address) {
2256 testl(AMD64.rax, address);
2257 }
2258
2259 @Override
2260 public void align(int modulus) {
2261 if (position() % modulus != 0) {
2262 nop(modulus - (position() % modulus));
2263 }
2264 }
2265
2266 /**
2267 * Emits a direct call instruction. Note that the actual call target is not specified, because
2268 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is
2269 * responsible to add the call address to the appropriate patching tables.
2270 */
2271 public final void call() {
2272 emitByte(0xE8);
2273 emitInt(0);
2274 }
2275
2276 public final void call(Register src) {
2277 int encode = prefixAndEncode(src.encoding);
2278 emitByte(0xFF);
2279 emitByte(0xD0 | encode);
2280 }
2281
2282 public final void int3() {
2283 emitByte(0xCC);
2284 }
2285
2286 private void emitx87(int b1, int b2, int i) {
2287 assert 0 <= i && i < 8 : "illegal stack offset";
2288 emitByte(b1);
2289 emitByte(b2 + i);
2290 }
2291
2292 public final void fldd(AMD64Address src) {
2293 emitByte(0xDD);
2294 emitOperandHelper(0, src);
2295 }
2296
2297 public final void flds(AMD64Address src) {
2298 emitByte(0xD9);
2299 emitOperandHelper(0, src);
2300 }
2301
2302 public final void fldln2() {
2303 emitByte(0xD9);
2304 emitByte(0xED);
2305 }
2306
2307 public final void fldlg2() {
2308 emitByte(0xD9);
2309 emitByte(0xEC);
2310 }
2311
2312 public final void fyl2x() {
2313 emitByte(0xD9);
2314 emitByte(0xF1);
2315 }
2316
2317 public final void fstps(AMD64Address src) {
2318 emitByte(0xD9);
2319 emitOperandHelper(3, src);
2320 }
2321
2322 public final void fstpd(AMD64Address src) {
2323 emitByte(0xDD);
2324 emitOperandHelper(3, src);
2325 }
2326
2327 private void emitFPUArith(int b1, int b2, int i) {
2328 assert 0 <= i && i < 8 : "illegal FPU register: " + i;
2329 emitByte(b1);
2330 emitByte(b2 + i);
2331 }
2332
2333 public void ffree(int i) {
2334 emitFPUArith(0xDD, 0xC0, i);
2335 }
2336
2337 public void fincstp() {
2338 emitByte(0xD9);
2339 emitByte(0xF7);
2340 }
2341
2342 public void fxch(int i) {
2343 emitFPUArith(0xD9, 0xC8, i);
2344 }
2345
2346 public void fnstswAX() {
2347 emitByte(0xDF);
2348 emitByte(0xE0);
2349 }
2350
2351 public void fwait() {
2352 emitByte(0x9B);
2353 }
2354
2355 public void fprem() {
2356 emitByte(0xD9);
2357 emitByte(0xF8);
2358 }
2359
2360 public final void fsin() {
2361 emitByte(0xD9);
2362 emitByte(0xFE);
2363 }
2364
2365 public final void fcos() {
2366 emitByte(0xD9);
2367 emitByte(0xFF);
2368 }
2369
2370 public final void fptan() {
2371 emitByte(0xD9);
2372 emitByte(0xF2);
2373 }
2374
2375 public final void fstp(int i) {
2376 emitx87(0xDD, 0xD8, i);
2377 }
2378
2379 @Override
2380 public AMD64Address makeAddress(Register base, int displacement) {
2381 return new AMD64Address(base, displacement);
2382 }
2383
2384 @Override
2385 public AMD64Address getPlaceholder() {
2386 return Placeholder;
2387 }
2388
2389 private void prefetchPrefix(AMD64Address src) {
2390 prefix(src);
2391 emitByte(0x0F);
2392 }
2393
2394 public void prefetchnta(AMD64Address src) {
2395 prefetchPrefix(src);
2396 emitByte(0x18);
2397 emitOperandHelper(0, src);
2398 }
2399
2400 void prefetchr(AMD64Address src) {
2401 assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
2402 prefetchPrefix(src);
2403 emitByte(0x0D);
2404 emitOperandHelper(0, src);
2405 }
2406
2407 public void prefetcht0(AMD64Address src) {
2408 assert supports(CPUFeature.SSE);
2409 prefetchPrefix(src);
2410 emitByte(0x18);
2411 emitOperandHelper(1, src);
2412 }
2413
2414 public void prefetcht1(AMD64Address src) {
2415 assert supports(CPUFeature.SSE);
2416 prefetchPrefix(src);
2417 emitByte(0x18);
2418 emitOperandHelper(2, src);
2419 }
2420
2421 public void prefetcht2(AMD64Address src) {
2422 assert supports(CPUFeature.SSE);
2423 prefix(src);
2424 emitByte(0x0f);
2425 emitByte(0x18);
2426 emitOperandHelper(3, src);
2427 }
2428
2429 public void prefetchw(AMD64Address src) {
2430 assert supports(CPUFeature.AMD_3DNOW_PREFETCH);
2431 prefix(src);
2432 emitByte(0x0f);
2433 emitByte(0x0D);
2434 emitOperandHelper(1, src);
2435 }
2436
2437 /**
2438 * Emits an instruction which is considered to be illegal. This is used if we deliberately want
2439 * to crash the program (debugging etc.).
2440 */
2441 public void illegal() {
2442 emitByte(0x0f);
2443 emitByte(0x0b);
2444 }
2445 }