001/* 002 * Copyright (c) 2009, 2015, Oracle and/or its affiliates. All rights reserved. 003 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 004 * 005 * This code is free software; you can redistribute it and/or modify it 006 * under the terms of the GNU General Public License version 2 only, as 007 * published by the Free Software Foundation. 008 * 009 * This code is distributed in the hope that it will be useful, but WITHOUT 010 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 011 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 012 * version 2 for more details (a copy is included in the LICENSE file that 013 * accompanied this code). 014 * 015 * You should have received a copy of the GNU General Public License version 016 * 2 along with this work; if not, write to the Free Software Foundation, 017 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 018 * 019 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 020 * or visit www.oracle.com if you need additional information or have any 021 * questions. 022 */ 023package com.oracle.graal.asm.amd64; 024 025import jdk.internal.jvmci.amd64.*; 026import jdk.internal.jvmci.amd64.AMD64.*; 027import jdk.internal.jvmci.code.*; 028import jdk.internal.jvmci.code.Register.*; 029 030import com.oracle.graal.asm.*; 031 032import static com.oracle.graal.asm.NumUtil.*; 033import static com.oracle.graal.asm.amd64.AMD64AsmOptions.*; 034import static com.oracle.graal.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.*; 035import static com.oracle.graal.asm.amd64.AMD64Assembler.AMD64MOp.*; 036import static com.oracle.graal.asm.amd64.AMD64Assembler.OperandSize.*; 037import static jdk.internal.jvmci.amd64.AMD64.*; 038import static jdk.internal.jvmci.code.MemoryBarriers.*; 039 040/** 041 * This class implements an assembler that can encode most X86 instructions. 042 */ 043public class AMD64Assembler extends Assembler { 044 045 private static final int MinEncodingNeedsRex = 8; 046 047 /** 048 * A sentinel value used as a place holder in an instruction stream for an address that will be 049 * patched. 050 */ 051 private static final AMD64Address Placeholder = new AMD64Address(rip); 052 053 /** 054 * The x86 condition codes used for conditional jumps/moves. 055 */ 056 public enum ConditionFlag { 057 Zero(0x4, "|zero|"), 058 NotZero(0x5, "|nzero|"), 059 Equal(0x4, "="), 060 NotEqual(0x5, "!="), 061 Less(0xc, "<"), 062 LessEqual(0xe, "<="), 063 Greater(0xf, ">"), 064 GreaterEqual(0xd, ">="), 065 Below(0x2, "|<|"), 066 BelowEqual(0x6, "|<=|"), 067 Above(0x7, "|>|"), 068 AboveEqual(0x3, "|>=|"), 069 Overflow(0x0, "|of|"), 070 NoOverflow(0x1, "|nof|"), 071 CarrySet(0x2, "|carry|"), 072 CarryClear(0x3, "|ncarry|"), 073 Negative(0x8, "|neg|"), 074 Positive(0x9, "|pos|"), 075 Parity(0xa, "|par|"), 076 NoParity(0xb, "|npar|"); 077 078 private final int value; 079 private final String operator; 080 081 private ConditionFlag(int value, String operator) { 082 this.value = value; 083 this.operator = operator; 084 } 085 086 public ConditionFlag negate() { 087 switch (this) { 088 case Zero: 089 return NotZero; 090 case NotZero: 091 return Zero; 092 case Equal: 093 return NotEqual; 094 case NotEqual: 095 return Equal; 096 case Less: 097 return GreaterEqual; 098 case LessEqual: 099 return Greater; 100 case Greater: 101 return LessEqual; 102 case GreaterEqual: 103 return Less; 104 case Below: 105 return AboveEqual; 106 case BelowEqual: 107 return Above; 108 case Above: 109 return BelowEqual; 110 case AboveEqual: 111 return Below; 112 case Overflow: 113 return NoOverflow; 114 case NoOverflow: 115 return Overflow; 116 case CarrySet: 117 return CarryClear; 118 case CarryClear: 119 return CarrySet; 120 case Negative: 121 return Positive; 122 case Positive: 123 return Negative; 124 case Parity: 125 return NoParity; 126 case NoParity: 127 return Parity; 128 } 129 throw new IllegalArgumentException(); 130 } 131 132 public int getValue() { 133 return value; 134 } 135 136 @Override 137 public String toString() { 138 return operator; 139 } 140 } 141 142 /** 143 * Constants for X86 prefix bytes. 144 */ 145 private static class Prefix { 146 147 private static final int REX = 0x40; 148 private static final int REXB = 0x41; 149 private static final int REXX = 0x42; 150 private static final int REXXB = 0x43; 151 private static final int REXR = 0x44; 152 private static final int REXRB = 0x45; 153 private static final int REXRX = 0x46; 154 private static final int REXRXB = 0x47; 155 private static final int REXW = 0x48; 156 private static final int REXWB = 0x49; 157 private static final int REXWX = 0x4A; 158 private static final int REXWXB = 0x4B; 159 private static final int REXWR = 0x4C; 160 private static final int REXWRB = 0x4D; 161 private static final int REXWRX = 0x4E; 162 private static final int REXWRXB = 0x4F; 163 } 164 165 /** 166 * The x86 operand sizes. 167 */ 168 public static enum OperandSize { 169 BYTE(1) { 170 @Override 171 protected void emitImmediate(AMD64Assembler asm, int imm) { 172 assert imm == (byte) imm; 173 asm.emitByte(imm); 174 } 175 }, 176 177 WORD(2, 0x66) { 178 @Override 179 protected void emitImmediate(AMD64Assembler asm, int imm) { 180 assert imm == (short) imm; 181 asm.emitShort(imm); 182 } 183 }, 184 185 DWORD(4) { 186 @Override 187 protected void emitImmediate(AMD64Assembler asm, int imm) { 188 asm.emitInt(imm); 189 } 190 }, 191 192 QWORD(8) { 193 @Override 194 protected void emitImmediate(AMD64Assembler asm, int imm) { 195 asm.emitInt(imm); 196 } 197 }, 198 199 SS(4, 0xF3, true), 200 201 SD(8, 0xF2, true), 202 203 PS(16, true), 204 205 PD(16, 0x66, true); 206 207 private final int sizePrefix; 208 209 private final int bytes; 210 private final boolean xmm; 211 212 private OperandSize(int bytes) { 213 this(bytes, 0); 214 } 215 216 private OperandSize(int bytes, int sizePrefix) { 217 this(bytes, sizePrefix, false); 218 } 219 220 private OperandSize(int bytes, boolean xmm) { 221 this(bytes, 0, xmm); 222 } 223 224 private OperandSize(int bytes, int sizePrefix, boolean xmm) { 225 this.sizePrefix = sizePrefix; 226 this.bytes = bytes; 227 this.xmm = xmm; 228 } 229 230 public int getBytes() { 231 return bytes; 232 } 233 234 public boolean isXmmType() { 235 return xmm; 236 } 237 238 /** 239 * Emit an immediate of this size. Note that immediate {@link #QWORD} operands are encoded 240 * as sign-extended 32-bit values. 241 * 242 * @param asm 243 * @param imm 244 */ 245 protected void emitImmediate(AMD64Assembler asm, int imm) { 246 assert false; 247 } 248 } 249 250 /** 251 * Operand size and register type constraints. 252 */ 253 private static enum OpAssertion { 254 ByteAssertion(CPU, CPU, BYTE), 255 IntegerAssertion(CPU, CPU, WORD, DWORD, QWORD), 256 No16BitAssertion(CPU, CPU, DWORD, QWORD), 257 No32BitAssertion(CPU, CPU, WORD, QWORD), 258 QwordOnlyAssertion(CPU, CPU, QWORD), 259 FloatingAssertion(XMM, XMM, SS, SD, PS, PD), 260 PackedFloatingAssertion(XMM, XMM, PS, PD), 261 SingleAssertion(XMM, XMM, SS), 262 DoubleAssertion(XMM, XMM, SD), 263 IntToFloatingAssertion(XMM, CPU, DWORD, QWORD), 264 FloatingToIntAssertion(CPU, XMM, DWORD, QWORD); 265 266 private final RegisterCategory resultCategory; 267 private final RegisterCategory inputCategory; 268 private final OperandSize[] allowedSizes; 269 270 private OpAssertion(RegisterCategory resultCategory, RegisterCategory inputCategory, OperandSize... allowedSizes) { 271 this.resultCategory = resultCategory; 272 this.inputCategory = inputCategory; 273 this.allowedSizes = allowedSizes; 274 } 275 276 protected boolean checkOperands(AMD64Op op, OperandSize size, Register resultReg, Register inputReg) { 277 assert resultReg == null || resultCategory.equals(resultReg.getRegisterCategory()) : "invalid result register " + resultReg + " used in " + op; 278 assert inputReg == null || inputCategory.equals(inputReg.getRegisterCategory()) : "invalid input register " + inputReg + " used in " + op; 279 280 for (OperandSize s : allowedSizes) { 281 if (size == s) { 282 return true; 283 } 284 } 285 286 assert false : "invalid operand size " + size + " used in " + op; 287 return false; 288 } 289 } 290 291 /** 292 * The register to which {@link Register#Frame} and {@link Register#CallerFrame} are bound. 293 */ 294 public final Register frameRegister; 295 296 /** 297 * Constructs an assembler for the AMD64 architecture. 298 * 299 * @param registerConfig the register configuration used to bind {@link Register#Frame} and 300 * {@link Register#CallerFrame} to physical registers. This value can be null if this 301 * assembler instance will not be used to assemble instructions using these logical 302 * registers. 303 */ 304 public AMD64Assembler(TargetDescription target, RegisterConfig registerConfig) { 305 super(target); 306 this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister(); 307 } 308 309 private boolean supports(CPUFeature feature) { 310 return ((AMD64) target.arch).getFeatures().contains(feature); 311 } 312 313 private static int encode(Register r) { 314 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding; 315 return r.encoding & 0x7; 316 } 317 318 /** 319 * Get RXB bits for register-register instruction. In that encoding, ModRM.rm contains a 320 * register index. The R bit extends the ModRM.reg field and the B bit extends the ModRM.rm 321 * field. The X bit must be 0. 322 */ 323 protected static int getRXB(Register reg, Register rm) { 324 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 325 rxb |= (rm == null ? 0 : rm.encoding & 0x08) >> 3; 326 return rxb; 327 } 328 329 /** 330 * Get RXB bits for register-memory instruction. The R bit extends the ModRM.reg field. There 331 * are two cases for the memory operand:<br> 332 * ModRM.rm contains the base register: In that case, B extends the ModRM.rm field and X = 0.<br> 333 * There is an SIB byte: In that case, X extends SIB.index and B extends SIB.base. 334 */ 335 protected static int getRXB(Register reg, AMD64Address rm) { 336 int rxb = (reg == null ? 0 : reg.encoding & 0x08) >> 1; 337 if (!rm.getIndex().equals(Register.None)) { 338 rxb |= (rm.getIndex().encoding & 0x08) >> 2; 339 } 340 if (!rm.getBase().equals(Register.None)) { 341 rxb |= (rm.getBase().encoding & 0x08) >> 3; 342 } 343 return rxb; 344 } 345 346 /** 347 * Emit the ModR/M byte for one register operand and an opcode extension in the R field. 348 * <p> 349 * Format: [ 11 reg r/m ] 350 */ 351 protected void emitModRM(int reg, Register rm) { 352 assert (reg & 0x07) == reg; 353 emitByte(0xC0 | (reg << 3) | (rm.encoding & 0x07)); 354 } 355 356 /** 357 * Emit the ModR/M byte for two register operands. 358 * <p> 359 * Format: [ 11 reg r/m ] 360 */ 361 protected void emitModRM(Register reg, Register rm) { 362 emitModRM(reg.encoding & 0x07, rm); 363 } 364 365 /** 366 * Emits the ModR/M byte and optionally the SIB byte for one register and one memory operand. 367 */ 368 protected void emitOperandHelper(Register reg, AMD64Address addr) { 369 assert !reg.equals(Register.None); 370 emitOperandHelper(encode(reg), addr); 371 } 372 373 /** 374 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode 375 * extension in the R field. 376 */ 377 protected void emitOperandHelper(int reg, AMD64Address addr) { 378 assert (reg & 0x07) == reg; 379 int regenc = reg << 3; 380 381 Register base = addr.getBase(); 382 Register index = addr.getIndex(); 383 384 AMD64Address.Scale scale = addr.getScale(); 385 int disp = addr.getDisplacement(); 386 387 if (base.equals(Register.Frame)) { 388 assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration"; 389 base = frameRegister; 390 } 391 392 if (base.equals(AMD64.rip)) { // also matches Placeholder 393 // [00 000 101] disp32 394 assert index.equals(Register.None) : "cannot use RIP relative addressing with index register"; 395 emitByte(0x05 | regenc); 396 emitInt(disp); 397 } else if (base.isValid()) { 398 int baseenc = base.isValid() ? encode(base) : 0; 399 if (index.isValid()) { 400 int indexenc = encode(index) << 3; 401 // [base + indexscale + disp] 402 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 403 // [base + indexscale] 404 // [00 reg 100][ss index base] 405 assert !index.equals(rsp) : "illegal addressing mode"; 406 emitByte(0x04 | regenc); 407 emitByte(scale.log2 << 6 | indexenc | baseenc); 408 } else if (isByte(disp)) { 409 // [base + indexscale + imm8] 410 // [01 reg 100][ss index base] imm8 411 assert !index.equals(rsp) : "illegal addressing mode"; 412 emitByte(0x44 | regenc); 413 emitByte(scale.log2 << 6 | indexenc | baseenc); 414 emitByte(disp & 0xFF); 415 } else { 416 // [base + indexscale + disp32] 417 // [10 reg 100][ss index base] disp32 418 assert !index.equals(rsp) : "illegal addressing mode"; 419 emitByte(0x84 | regenc); 420 emitByte(scale.log2 << 6 | indexenc | baseenc); 421 emitInt(disp); 422 } 423 } else if (base.equals(rsp) || base.equals(r12)) { 424 // [rsp + disp] 425 if (disp == 0) { 426 // [rsp] 427 // [00 reg 100][00 100 100] 428 emitByte(0x04 | regenc); 429 emitByte(0x24); 430 } else if (isByte(disp)) { 431 // [rsp + imm8] 432 // [01 reg 100][00 100 100] disp8 433 emitByte(0x44 | regenc); 434 emitByte(0x24); 435 emitByte(disp & 0xFF); 436 } else { 437 // [rsp + imm32] 438 // [10 reg 100][00 100 100] disp32 439 emitByte(0x84 | regenc); 440 emitByte(0x24); 441 emitInt(disp); 442 } 443 } else { 444 // [base + disp] 445 assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode"; 446 if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) { 447 // [base] 448 // [00 reg base] 449 emitByte(0x00 | regenc | baseenc); 450 } else if (isByte(disp)) { 451 // [base + disp8] 452 // [01 reg base] disp8 453 emitByte(0x40 | regenc | baseenc); 454 emitByte(disp & 0xFF); 455 } else { 456 // [base + disp32] 457 // [10 reg base] disp32 458 emitByte(0x80 | regenc | baseenc); 459 emitInt(disp); 460 } 461 } 462 } else { 463 if (index.isValid()) { 464 int indexenc = encode(index) << 3; 465 // [indexscale + disp] 466 // [00 reg 100][ss index 101] disp32 467 assert !index.equals(rsp) : "illegal addressing mode"; 468 emitByte(0x04 | regenc); 469 emitByte(scale.log2 << 6 | indexenc | 0x05); 470 emitInt(disp); 471 } else { 472 // [disp] ABSOLUTE 473 // [00 reg 100][00 100 101] disp32 474 emitByte(0x04 | regenc); 475 emitByte(0x25); 476 emitInt(disp); 477 } 478 } 479 } 480 481 /** 482 * Base class for AMD64 opcodes. 483 */ 484 public static class AMD64Op { 485 486 protected static final int P_0F = 0x0F; 487 protected static final int P_0F38 = 0x380F; 488 protected static final int P_0F3A = 0x3A0F; 489 490 private final String opcode; 491 492 private final int prefix1; 493 private final int prefix2; 494 private final int op; 495 496 private final boolean dstIsByte; 497 private final boolean srcIsByte; 498 499 private final OpAssertion assertion; 500 private final CPUFeature feature; 501 502 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 503 this(opcode, prefix1, prefix2, op, assertion == OpAssertion.ByteAssertion, assertion == OpAssertion.ByteAssertion, assertion, feature); 504 } 505 506 protected AMD64Op(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 507 this.opcode = opcode; 508 this.prefix1 = prefix1; 509 this.prefix2 = prefix2; 510 this.op = op; 511 512 this.dstIsByte = dstIsByte; 513 this.srcIsByte = srcIsByte; 514 515 this.assertion = assertion; 516 this.feature = feature; 517 } 518 519 protected final void emitOpcode(AMD64Assembler asm, OperandSize size, int rxb, int dstEnc, int srcEnc) { 520 if (prefix1 != 0) { 521 asm.emitByte(prefix1); 522 } 523 if (size.sizePrefix != 0) { 524 asm.emitByte(size.sizePrefix); 525 } 526 int rexPrefix = 0x40 | rxb; 527 if (size == QWORD) { 528 rexPrefix |= 0x08; 529 } 530 if (rexPrefix != 0x40 || (dstIsByte && dstEnc >= 4) || (srcIsByte && srcEnc >= 4)) { 531 asm.emitByte(rexPrefix); 532 } 533 if (prefix2 > 0xFF) { 534 asm.emitShort(prefix2); 535 } else if (prefix2 > 0) { 536 asm.emitByte(prefix2); 537 } 538 asm.emitByte(op); 539 } 540 541 protected final boolean verify(AMD64Assembler asm, OperandSize size, Register resultReg, Register inputReg) { 542 assert feature == null || asm.supports(feature) : String.format("unsupported feature %s required for %s", feature, opcode); 543 assert assertion.checkOperands(this, size, resultReg, inputReg); 544 return true; 545 } 546 547 @Override 548 public String toString() { 549 return opcode; 550 } 551 } 552 553 /** 554 * Base class for AMD64 opcodes with immediate operands. 555 */ 556 public static class AMD64ImmOp extends AMD64Op { 557 558 private final boolean immIsByte; 559 560 protected AMD64ImmOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 561 super(opcode, 0, prefix, op, assertion, null); 562 this.immIsByte = immIsByte; 563 } 564 565 protected final void emitImmediate(AMD64Assembler asm, OperandSize size, int imm) { 566 if (immIsByte) { 567 assert imm == (byte) imm; 568 asm.emitByte(imm); 569 } else { 570 size.emitImmediate(asm, imm); 571 } 572 } 573 } 574 575 /** 576 * Opcode with operand order of either RM or MR. 577 */ 578 public abstract static class AMD64RROp extends AMD64Op { 579 580 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 581 super(opcode, prefix1, prefix2, op, assertion, feature); 582 } 583 584 protected AMD64RROp(String opcode, int prefix1, int prefix2, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion, CPUFeature feature) { 585 super(opcode, prefix1, prefix2, op, dstIsByte, srcIsByte, assertion, feature); 586 } 587 588 public abstract void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src); 589 } 590 591 /** 592 * Opcode with operand order of RM. 593 */ 594 public static class AMD64RMOp extends AMD64RROp { 595 // @formatter:off 596 public static final AMD64RMOp IMUL = new AMD64RMOp("IMUL", P_0F, 0xAF); 597 public static final AMD64RMOp BSF = new AMD64RMOp("BSF", P_0F, 0xBC); 598 public static final AMD64RMOp BSR = new AMD64RMOp("BSR", P_0F, 0xBD); 599 public static final AMD64RMOp POPCNT = new AMD64RMOp("POPCNT", 0xF3, P_0F, 0xB8, CPUFeature.POPCNT); 600 public static final AMD64RMOp TZCNT = new AMD64RMOp("TZCNT", 0xF3, P_0F, 0xBC, CPUFeature.BMI1); 601 public static final AMD64RMOp LZCNT = new AMD64RMOp("LZCNT", 0xF3, P_0F, 0xBD, CPUFeature.LZCNT); 602 public static final AMD64RMOp MOVZXB = new AMD64RMOp("MOVZXB", P_0F, 0xB6, false, true, OpAssertion.IntegerAssertion); 603 public static final AMD64RMOp MOVZX = new AMD64RMOp("MOVZX", P_0F, 0xB7, OpAssertion.No16BitAssertion); 604 public static final AMD64RMOp MOVSXB = new AMD64RMOp("MOVSXB", P_0F, 0xBE, false, true, OpAssertion.IntegerAssertion); 605 public static final AMD64RMOp MOVSX = new AMD64RMOp("MOVSX", P_0F, 0xBF, OpAssertion.No16BitAssertion); 606 public static final AMD64RMOp MOVSXD = new AMD64RMOp("MOVSXD", 0x63, OpAssertion.QwordOnlyAssertion); 607 public static final AMD64RMOp MOVB = new AMD64RMOp("MOVB", 0x8A, OpAssertion.ByteAssertion); 608 public static final AMD64RMOp MOV = new AMD64RMOp("MOV", 0x8B); 609 610 // MOVD/MOVQ and MOVSS/MOVSD are the same opcode, just with different operand size prefix 611 public static final AMD64RMOp MOVD = new AMD64RMOp("MOVD", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 612 public static final AMD64RMOp MOVQ = new AMD64RMOp("MOVQ", 0x66, P_0F, 0x6E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 613 public static final AMD64RMOp MOVSS = new AMD64RMOp("MOVSS", P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE); 614 public static final AMD64RMOp MOVSD = new AMD64RMOp("MOVSD", P_0F, 0x10, OpAssertion.FloatingAssertion, CPUFeature.SSE); 615 616 // TEST is documented as MR operation, but it's symmetric, and using it as RM operation is more convenient. 617 public static final AMD64RMOp TESTB = new AMD64RMOp("TEST", 0x84, OpAssertion.ByteAssertion); 618 public static final AMD64RMOp TEST = new AMD64RMOp("TEST", 0x85); 619 // @formatter:on 620 621 protected AMD64RMOp(String opcode, int op) { 622 this(opcode, 0, op); 623 } 624 625 protected AMD64RMOp(String opcode, int op, OpAssertion assertion) { 626 this(opcode, 0, op, assertion); 627 } 628 629 protected AMD64RMOp(String opcode, int prefix, int op) { 630 this(opcode, 0, prefix, op, null); 631 } 632 633 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion) { 634 this(opcode, 0, prefix, op, assertion, null); 635 } 636 637 protected AMD64RMOp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 638 this(opcode, 0, prefix, op, assertion, feature); 639 } 640 641 protected AMD64RMOp(String opcode, int prefix, int op, boolean dstIsByte, boolean srcIsByte, OpAssertion assertion) { 642 super(opcode, 0, prefix, op, dstIsByte, srcIsByte, assertion, null); 643 } 644 645 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, CPUFeature feature) { 646 this(opcode, prefix1, prefix2, op, OpAssertion.IntegerAssertion, feature); 647 } 648 649 protected AMD64RMOp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 650 super(opcode, prefix1, prefix2, op, assertion, feature); 651 } 652 653 @Override 654 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 655 assert verify(asm, size, dst, src); 656 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 657 asm.emitModRM(dst, src); 658 } 659 660 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src) { 661 assert verify(asm, size, dst, null); 662 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 663 asm.emitOperandHelper(dst, src); 664 } 665 } 666 667 /** 668 * Opcode with operand order of MR. 669 */ 670 public static class AMD64MROp extends AMD64RROp { 671 // @formatter:off 672 public static final AMD64MROp MOVB = new AMD64MROp("MOVB", 0x88, OpAssertion.ByteAssertion); 673 public static final AMD64MROp MOV = new AMD64MROp("MOV", 0x89); 674 675 // MOVD and MOVQ are the same opcode, just with different operand size prefix 676 // Note that as MR opcodes, they have reverse operand order, so the IntToFloatingAssertion must be used. 677 public static final AMD64MROp MOVD = new AMD64MROp("MOVD", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 678 public static final AMD64MROp MOVQ = new AMD64MROp("MOVQ", 0x66, P_0F, 0x7E, OpAssertion.IntToFloatingAssertion, CPUFeature.SSE2); 679 680 // MOVSS and MOVSD are the same opcode, just with different operand size prefix 681 public static final AMD64MROp MOVSS = new AMD64MROp("MOVSS", P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE); 682 public static final AMD64MROp MOVSD = new AMD64MROp("MOVSD", P_0F, 0x11, OpAssertion.FloatingAssertion, CPUFeature.SSE); 683 // @formatter:on 684 685 protected AMD64MROp(String opcode, int op) { 686 this(opcode, 0, op); 687 } 688 689 protected AMD64MROp(String opcode, int op, OpAssertion assertion) { 690 this(opcode, 0, op, assertion); 691 } 692 693 protected AMD64MROp(String opcode, int prefix, int op) { 694 this(opcode, prefix, op, OpAssertion.IntegerAssertion); 695 } 696 697 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion) { 698 this(opcode, prefix, op, assertion, null); 699 } 700 701 protected AMD64MROp(String opcode, int prefix, int op, OpAssertion assertion, CPUFeature feature) { 702 this(opcode, 0, prefix, op, assertion, feature); 703 } 704 705 protected AMD64MROp(String opcode, int prefix1, int prefix2, int op, OpAssertion assertion, CPUFeature feature) { 706 super(opcode, prefix1, prefix2, op, assertion, feature); 707 } 708 709 @Override 710 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src) { 711 assert verify(asm, size, src, dst); 712 emitOpcode(asm, size, getRXB(src, dst), src.encoding, dst.encoding); 713 asm.emitModRM(src, dst); 714 } 715 716 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, Register src) { 717 assert verify(asm, size, null, src); 718 emitOpcode(asm, size, getRXB(src, dst), src.encoding, 0); 719 asm.emitOperandHelper(src, dst); 720 } 721 } 722 723 /** 724 * Opcodes with operand order of M. 725 */ 726 public static class AMD64MOp extends AMD64Op { 727 // @formatter:off 728 public static final AMD64MOp NOT = new AMD64MOp("NOT", 0xF7, 2); 729 public static final AMD64MOp NEG = new AMD64MOp("NEG", 0xF7, 3); 730 public static final AMD64MOp MUL = new AMD64MOp("MUL", 0xF7, 4); 731 public static final AMD64MOp IMUL = new AMD64MOp("IMUL", 0xF7, 5); 732 public static final AMD64MOp DIV = new AMD64MOp("DIV", 0xF7, 6); 733 public static final AMD64MOp IDIV = new AMD64MOp("IDIV", 0xF7, 7); 734 public static final AMD64MOp INC = new AMD64MOp("INC", 0xFF, 0); 735 public static final AMD64MOp DEC = new AMD64MOp("DEC", 0xFF, 1); 736 public static final AMD64MOp PUSH = new AMD64MOp("PUSH", 0xFF, 6); 737 public static final AMD64MOp POP = new AMD64MOp("POP", 0x8F, 0, OpAssertion.No32BitAssertion); 738 // @formatter:on 739 740 private final int ext; 741 742 protected AMD64MOp(String opcode, int op, int ext) { 743 this(opcode, 0, op, ext); 744 } 745 746 protected AMD64MOp(String opcode, int prefix, int op, int ext) { 747 this(opcode, prefix, op, ext, OpAssertion.IntegerAssertion); 748 } 749 750 protected AMD64MOp(String opcode, int op, int ext, OpAssertion assertion) { 751 this(opcode, 0, op, ext, assertion); 752 } 753 754 protected AMD64MOp(String opcode, int prefix, int op, int ext, OpAssertion assertion) { 755 super(opcode, 0, prefix, op, assertion, null); 756 this.ext = ext; 757 } 758 759 public final void emit(AMD64Assembler asm, OperandSize size, Register dst) { 760 assert verify(asm, size, dst, null); 761 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 762 asm.emitModRM(ext, dst); 763 } 764 765 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst) { 766 assert verify(asm, size, null, null); 767 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 768 asm.emitOperandHelper(ext, dst); 769 } 770 } 771 772 /** 773 * Opcodes with operand order of MI. 774 */ 775 public static class AMD64MIOp extends AMD64ImmOp { 776 // @formatter:off 777 public static final AMD64MIOp MOVB = new AMD64MIOp("MOVB", true, 0xC6, 0, OpAssertion.ByteAssertion); 778 public static final AMD64MIOp MOV = new AMD64MIOp("MOV", false, 0xC7, 0); 779 public static final AMD64MIOp TEST = new AMD64MIOp("TEST", false, 0xF7, 0); 780 // @formatter:on 781 782 private final int ext; 783 784 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext) { 785 this(opcode, immIsByte, op, ext, OpAssertion.IntegerAssertion); 786 } 787 788 protected AMD64MIOp(String opcode, boolean immIsByte, int op, int ext, OpAssertion assertion) { 789 this(opcode, immIsByte, 0, op, ext, assertion); 790 } 791 792 protected AMD64MIOp(String opcode, boolean immIsByte, int prefix, int op, int ext, OpAssertion assertion) { 793 super(opcode, immIsByte, prefix, op, assertion); 794 this.ext = ext; 795 } 796 797 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, int imm) { 798 assert verify(asm, size, dst, null); 799 emitOpcode(asm, size, getRXB(null, dst), 0, dst.encoding); 800 asm.emitModRM(ext, dst); 801 emitImmediate(asm, size, imm); 802 } 803 804 public final void emit(AMD64Assembler asm, OperandSize size, AMD64Address dst, int imm) { 805 assert verify(asm, size, null, null); 806 emitOpcode(asm, size, getRXB(null, dst), 0, 0); 807 asm.emitOperandHelper(ext, dst); 808 emitImmediate(asm, size, imm); 809 } 810 } 811 812 /** 813 * Opcodes with operand order of RMI. 814 */ 815 public static class AMD64RMIOp extends AMD64ImmOp { 816 // @formatter:off 817 public static final AMD64RMIOp IMUL = new AMD64RMIOp("IMUL", false, 0x69); 818 public static final AMD64RMIOp IMUL_SX = new AMD64RMIOp("IMUL", true, 0x6B); 819 // @formatter:on 820 821 protected AMD64RMIOp(String opcode, boolean immIsByte, int op) { 822 this(opcode, immIsByte, 0, op, OpAssertion.IntegerAssertion); 823 } 824 825 protected AMD64RMIOp(String opcode, boolean immIsByte, int prefix, int op, OpAssertion assertion) { 826 super(opcode, immIsByte, prefix, op, assertion); 827 } 828 829 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, Register src, int imm) { 830 assert verify(asm, size, dst, src); 831 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, src.encoding); 832 asm.emitModRM(dst, src); 833 emitImmediate(asm, size, imm); 834 } 835 836 public final void emit(AMD64Assembler asm, OperandSize size, Register dst, AMD64Address src, int imm) { 837 assert verify(asm, size, dst, null); 838 emitOpcode(asm, size, getRXB(dst, src), dst.encoding, 0); 839 asm.emitOperandHelper(dst, src); 840 emitImmediate(asm, size, imm); 841 } 842 } 843 844 public static class SSEOp extends AMD64RMOp { 845 // @formatter:off 846 public static final SSEOp CVTSI2SS = new SSEOp("CVTSI2SS", 0xF3, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion); 847 public static final SSEOp CVTSI2SD = new SSEOp("CVTSI2SS", 0xF2, P_0F, 0x2A, OpAssertion.IntToFloatingAssertion); 848 public static final SSEOp CVTTSS2SI = new SSEOp("CVTTSS2SI", 0xF3, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion); 849 public static final SSEOp CVTTSD2SI = new SSEOp("CVTTSD2SI", 0xF2, P_0F, 0x2C, OpAssertion.FloatingToIntAssertion); 850 public static final SSEOp UCOMIS = new SSEOp("UCOMIS", P_0F, 0x2E, OpAssertion.PackedFloatingAssertion); 851 public static final SSEOp SQRT = new SSEOp("SQRT", P_0F, 0x51); 852 public static final SSEOp AND = new SSEOp("AND", P_0F, 0x54, OpAssertion.PackedFloatingAssertion); 853 public static final SSEOp ANDN = new SSEOp("ANDN", P_0F, 0x55, OpAssertion.PackedFloatingAssertion); 854 public static final SSEOp OR = new SSEOp("OR", P_0F, 0x56, OpAssertion.PackedFloatingAssertion); 855 public static final SSEOp XOR = new SSEOp("XOR", P_0F, 0x57, OpAssertion.PackedFloatingAssertion); 856 public static final SSEOp ADD = new SSEOp("ADD", P_0F, 0x58); 857 public static final SSEOp MUL = new SSEOp("MUL", P_0F, 0x59); 858 public static final SSEOp CVTSS2SD = new SSEOp("CVTSS2SD", P_0F, 0x5A, OpAssertion.SingleAssertion); 859 public static final SSEOp CVTSD2SS = new SSEOp("CVTSD2SS", P_0F, 0x5A, OpAssertion.DoubleAssertion); 860 public static final SSEOp SUB = new SSEOp("SUB", P_0F, 0x5C); 861 public static final SSEOp MIN = new SSEOp("MIN", P_0F, 0x5D); 862 public static final SSEOp DIV = new SSEOp("DIV", P_0F, 0x5E); 863 public static final SSEOp MAX = new SSEOp("MAX", P_0F, 0x5F); 864 // @formatter:on 865 866 protected SSEOp(String opcode, int prefix, int op) { 867 this(opcode, prefix, op, OpAssertion.FloatingAssertion); 868 } 869 870 protected SSEOp(String opcode, int prefix, int op, OpAssertion assertion) { 871 this(opcode, 0, prefix, op, assertion); 872 } 873 874 protected SSEOp(String opcode, int mandatoryPrefix, int prefix, int op, OpAssertion assertion) { 875 super(opcode, mandatoryPrefix, prefix, op, assertion, CPUFeature.SSE2); 876 } 877 } 878 879 /** 880 * Arithmetic operation with operand order of RM, MR or MI. 881 */ 882 public static final class AMD64BinaryArithmetic { 883 // @formatter:off 884 public static final AMD64BinaryArithmetic ADD = new AMD64BinaryArithmetic("ADD", 0); 885 public static final AMD64BinaryArithmetic OR = new AMD64BinaryArithmetic("OR", 1); 886 public static final AMD64BinaryArithmetic ADC = new AMD64BinaryArithmetic("ADC", 2); 887 public static final AMD64BinaryArithmetic SBB = new AMD64BinaryArithmetic("SBB", 3); 888 public static final AMD64BinaryArithmetic AND = new AMD64BinaryArithmetic("AND", 4); 889 public static final AMD64BinaryArithmetic SUB = new AMD64BinaryArithmetic("SUB", 5); 890 public static final AMD64BinaryArithmetic XOR = new AMD64BinaryArithmetic("XOR", 6); 891 public static final AMD64BinaryArithmetic CMP = new AMD64BinaryArithmetic("CMP", 7); 892 // @formatter:on 893 894 private final AMD64MIOp byteImmOp; 895 private final AMD64MROp byteMrOp; 896 private final AMD64RMOp byteRmOp; 897 898 private final AMD64MIOp immOp; 899 private final AMD64MIOp immSxOp; 900 private final AMD64MROp mrOp; 901 private final AMD64RMOp rmOp; 902 903 private AMD64BinaryArithmetic(String opcode, int code) { 904 int baseOp = code << 3; 905 906 byteImmOp = new AMD64MIOp(opcode, true, 0, 0x80, code, OpAssertion.ByteAssertion); 907 byteMrOp = new AMD64MROp(opcode, 0, baseOp, OpAssertion.ByteAssertion); 908 byteRmOp = new AMD64RMOp(opcode, 0, baseOp | 0x02, OpAssertion.ByteAssertion); 909 910 immOp = new AMD64MIOp(opcode, false, 0, 0x81, code, OpAssertion.IntegerAssertion); 911 immSxOp = new AMD64MIOp(opcode, true, 0, 0x83, code, OpAssertion.IntegerAssertion); 912 mrOp = new AMD64MROp(opcode, 0, baseOp | 0x01, OpAssertion.IntegerAssertion); 913 rmOp = new AMD64RMOp(opcode, 0, baseOp | 0x03, OpAssertion.IntegerAssertion); 914 } 915 916 public AMD64MIOp getMIOpcode(OperandSize size, boolean sx) { 917 if (size == BYTE) { 918 return byteImmOp; 919 } else if (sx) { 920 return immSxOp; 921 } else { 922 return immOp; 923 } 924 } 925 926 public AMD64MROp getMROpcode(OperandSize size) { 927 if (size == BYTE) { 928 return byteMrOp; 929 } else { 930 return mrOp; 931 } 932 } 933 934 public AMD64RMOp getRMOpcode(OperandSize size) { 935 if (size == BYTE) { 936 return byteRmOp; 937 } else { 938 return rmOp; 939 } 940 } 941 } 942 943 /** 944 * Shift operation with operand order of M1, MC or MI. 945 */ 946 public static final class AMD64Shift { 947 // @formatter:off 948 public static final AMD64Shift ROL = new AMD64Shift("ROL", 0); 949 public static final AMD64Shift ROR = new AMD64Shift("ROR", 1); 950 public static final AMD64Shift RCL = new AMD64Shift("RCL", 2); 951 public static final AMD64Shift RCR = new AMD64Shift("RCR", 3); 952 public static final AMD64Shift SHL = new AMD64Shift("SHL", 4); 953 public static final AMD64Shift SHR = new AMD64Shift("SHR", 5); 954 public static final AMD64Shift SAR = new AMD64Shift("SAR", 7); 955 // @formatter:on 956 957 public final AMD64MOp m1Op; 958 public final AMD64MOp mcOp; 959 public final AMD64MIOp miOp; 960 961 private AMD64Shift(String opcode, int code) { 962 m1Op = new AMD64MOp(opcode, 0, 0xD1, code, OpAssertion.IntegerAssertion); 963 mcOp = new AMD64MOp(opcode, 0, 0xD3, code, OpAssertion.IntegerAssertion); 964 miOp = new AMD64MIOp(opcode, true, 0, 0xC1, code, OpAssertion.IntegerAssertion); 965 } 966 } 967 968 public final void addl(AMD64Address dst, int imm32) { 969 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 970 } 971 972 public final void addl(Register dst, int imm32) { 973 ADD.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 974 } 975 976 private void addrNop4() { 977 // 4 bytes: NOP DWORD PTR [EAX+0] 978 emitByte(0x0F); 979 emitByte(0x1F); 980 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); 981 emitByte(0); // 8-bits offset (1 byte) 982 } 983 984 private void addrNop5() { 985 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 986 emitByte(0x0F); 987 emitByte(0x1F); 988 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); 989 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 990 emitByte(0); // 8-bits offset (1 byte) 991 } 992 993 private void addrNop7() { 994 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 995 emitByte(0x0F); 996 emitByte(0x1F); 997 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); 998 emitInt(0); // 32-bits offset (4 bytes) 999 } 1000 1001 private void addrNop8() { 1002 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 1003 emitByte(0x0F); 1004 emitByte(0x1F); 1005 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); 1006 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); 1007 emitInt(0); // 32-bits offset (4 bytes) 1008 } 1009 1010 public final void andl(Register dst, int imm32) { 1011 AND.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1012 } 1013 1014 public final void bswapl(Register reg) { 1015 int encode = prefixAndEncode(reg.encoding); 1016 emitByte(0x0F); 1017 emitByte(0xC8 | encode); 1018 } 1019 1020 public final void cdql() { 1021 emitByte(0x99); 1022 } 1023 1024 public final void cmovl(ConditionFlag cc, Register dst, Register src) { 1025 int encode = prefixAndEncode(dst.encoding, src.encoding); 1026 emitByte(0x0F); 1027 emitByte(0x40 | cc.getValue()); 1028 emitByte(0xC0 | encode); 1029 } 1030 1031 public final void cmovl(ConditionFlag cc, Register dst, AMD64Address src) { 1032 prefix(src, dst); 1033 emitByte(0x0F); 1034 emitByte(0x40 | cc.getValue()); 1035 emitOperandHelper(dst, src); 1036 } 1037 1038 public final void cmpl(Register dst, int imm32) { 1039 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1040 } 1041 1042 public final void cmpl(Register dst, Register src) { 1043 CMP.rmOp.emit(this, DWORD, dst, src); 1044 } 1045 1046 public final void cmpl(Register dst, AMD64Address src) { 1047 CMP.rmOp.emit(this, DWORD, dst, src); 1048 } 1049 1050 public final void cmpl(AMD64Address dst, int imm32) { 1051 CMP.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1052 } 1053 1054 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, 1055 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,. 1056 // The ZF is set if the compared values were equal, and cleared otherwise. 1057 public final void cmpxchgl(Register reg, AMD64Address adr) { // cmpxchg 1058 prefix(adr, reg); 1059 emitByte(0x0F); 1060 emitByte(0xB1); 1061 emitOperandHelper(reg, adr); 1062 } 1063 1064 protected final void decl(AMD64Address dst) { 1065 prefix(dst); 1066 emitByte(0xFF); 1067 emitOperandHelper(1, dst); 1068 } 1069 1070 public final void hlt() { 1071 emitByte(0xF4); 1072 } 1073 1074 public final void imull(Register dst, Register src, int value) { 1075 if (isByte(value)) { 1076 AMD64RMIOp.IMUL_SX.emit(this, DWORD, dst, src, value); 1077 } else { 1078 AMD64RMIOp.IMUL.emit(this, DWORD, dst, src, value); 1079 } 1080 } 1081 1082 protected final void incl(AMD64Address dst) { 1083 prefix(dst); 1084 emitByte(0xFF); 1085 emitOperandHelper(0, dst); 1086 } 1087 1088 public void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { 1089 int shortSize = 2; 1090 int longSize = 6; 1091 long disp = jumpTarget - position(); 1092 if (!forceDisp32 && isByte(disp - shortSize)) { 1093 // 0111 tttn #8-bit disp 1094 emitByte(0x70 | cc.getValue()); 1095 emitByte((int) ((disp - shortSize) & 0xFF)); 1096 } else { 1097 // 0000 1111 1000 tttn #32-bit disp 1098 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; 1099 emitByte(0x0F); 1100 emitByte(0x80 | cc.getValue()); 1101 emitInt((int) (disp - longSize)); 1102 } 1103 } 1104 1105 public final void jcc(ConditionFlag cc, Label l) { 1106 assert (0 <= cc.getValue()) && (cc.getValue() < 16) : "illegal cc"; 1107 if (l.isBound()) { 1108 jcc(cc, l.position(), false); 1109 } else { 1110 // Note: could eliminate cond. jumps to this jump if condition 1111 // is the same however, seems to be rather unlikely case. 1112 // Note: use jccb() if label to be bound is very close to get 1113 // an 8-bit displacement 1114 l.addPatchAt(position()); 1115 emitByte(0x0F); 1116 emitByte(0x80 | cc.getValue()); 1117 emitInt(0); 1118 } 1119 1120 } 1121 1122 public final void jccb(ConditionFlag cc, Label l) { 1123 if (l.isBound()) { 1124 int shortSize = 2; 1125 int entry = l.position(); 1126 assert isByte(entry - (position() + shortSize)) : "Dispacement too large for a short jmp"; 1127 long disp = entry - position(); 1128 // 0111 tttn #8-bit disp 1129 emitByte(0x70 | cc.getValue()); 1130 emitByte((int) ((disp - shortSize) & 0xFF)); 1131 } else { 1132 l.addPatchAt(position()); 1133 emitByte(0x70 | cc.getValue()); 1134 emitByte(0); 1135 } 1136 } 1137 1138 public final void jmp(int jumpTarget, boolean forceDisp32) { 1139 int shortSize = 2; 1140 int longSize = 5; 1141 long disp = jumpTarget - position(); 1142 if (!forceDisp32 && isByte(disp - shortSize)) { 1143 emitByte(0xEB); 1144 emitByte((int) ((disp - shortSize) & 0xFF)); 1145 } else { 1146 emitByte(0xE9); 1147 emitInt((int) (disp - longSize)); 1148 } 1149 } 1150 1151 @Override 1152 public final void jmp(Label l) { 1153 if (l.isBound()) { 1154 jmp(l.position(), false); 1155 } else { 1156 // By default, forward jumps are always 32-bit displacements, since 1157 // we can't yet know where the label will be bound. If you're sure that 1158 // the forward jump will not run beyond 256 bytes, use jmpb to 1159 // force an 8-bit displacement. 1160 1161 l.addPatchAt(position()); 1162 emitByte(0xE9); 1163 emitInt(0); 1164 } 1165 } 1166 1167 public final void jmp(Register entry) { 1168 int encode = prefixAndEncode(entry.encoding); 1169 emitByte(0xFF); 1170 emitByte(0xE0 | encode); 1171 } 1172 1173 public final void jmpb(Label l) { 1174 if (l.isBound()) { 1175 int shortSize = 2; 1176 int entry = l.position(); 1177 assert isByte((entry - position()) + shortSize) : "Dispacement too large for a short jmp"; 1178 long offs = entry - position(); 1179 emitByte(0xEB); 1180 emitByte((int) ((offs - shortSize) & 0xFF)); 1181 } else { 1182 1183 l.addPatchAt(position()); 1184 emitByte(0xEB); 1185 emitByte(0); 1186 } 1187 } 1188 1189 public final void leaq(Register dst, AMD64Address src) { 1190 prefixq(src, dst); 1191 emitByte(0x8D); 1192 emitOperandHelper(dst, src); 1193 } 1194 1195 public final void leave() { 1196 emitByte(0xC9); 1197 } 1198 1199 public final void lock() { 1200 emitByte(0xF0); 1201 } 1202 1203 public final void movapd(Register dst, Register src) { 1204 assert dst.getRegisterCategory().equals(AMD64.XMM); 1205 assert src.getRegisterCategory().equals(AMD64.XMM); 1206 int dstenc = dst.encoding; 1207 int srcenc = src.encoding; 1208 emitByte(0x66); 1209 if (dstenc < 8) { 1210 if (srcenc >= 8) { 1211 emitByte(Prefix.REXB); 1212 srcenc -= 8; 1213 } 1214 } else { 1215 if (srcenc < 8) { 1216 emitByte(Prefix.REXR); 1217 } else { 1218 emitByte(Prefix.REXRB); 1219 srcenc -= 8; 1220 } 1221 dstenc -= 8; 1222 } 1223 emitByte(0x0F); 1224 emitByte(0x28); 1225 emitByte(0xC0 | dstenc << 3 | srcenc); 1226 } 1227 1228 public final void movaps(Register dst, Register src) { 1229 assert dst.getRegisterCategory().equals(AMD64.XMM); 1230 assert src.getRegisterCategory().equals(AMD64.XMM); 1231 int dstenc = dst.encoding; 1232 int srcenc = src.encoding; 1233 if (dstenc < 8) { 1234 if (srcenc >= 8) { 1235 emitByte(Prefix.REXB); 1236 srcenc -= 8; 1237 } 1238 } else { 1239 if (srcenc < 8) { 1240 emitByte(Prefix.REXR); 1241 } else { 1242 emitByte(Prefix.REXRB); 1243 srcenc -= 8; 1244 } 1245 dstenc -= 8; 1246 } 1247 emitByte(0x0F); 1248 emitByte(0x28); 1249 emitByte(0xC0 | dstenc << 3 | srcenc); 1250 } 1251 1252 public final void movb(AMD64Address dst, int imm8) { 1253 prefix(dst); 1254 emitByte(0xC6); 1255 emitOperandHelper(0, dst); 1256 emitByte(imm8); 1257 } 1258 1259 public final void movb(AMD64Address dst, Register src) { 1260 assert src.getRegisterCategory().equals(AMD64.CPU) : "must have byte register"; 1261 prefix(dst, src, true); 1262 emitByte(0x88); 1263 emitOperandHelper(src, dst); 1264 } 1265 1266 public final void movl(Register dst, int imm32) { 1267 int encode = prefixAndEncode(dst.encoding); 1268 emitByte(0xB8 | encode); 1269 emitInt(imm32); 1270 } 1271 1272 public final void movl(Register dst, Register src) { 1273 int encode = prefixAndEncode(dst.encoding, src.encoding); 1274 emitByte(0x8B); 1275 emitByte(0xC0 | encode); 1276 } 1277 1278 public final void movl(Register dst, AMD64Address src) { 1279 prefix(src, dst); 1280 emitByte(0x8B); 1281 emitOperandHelper(dst, src); 1282 } 1283 1284 public final void movl(AMD64Address dst, int imm32) { 1285 prefix(dst); 1286 emitByte(0xC7); 1287 emitOperandHelper(0, dst); 1288 emitInt(imm32); 1289 } 1290 1291 public final void movl(AMD64Address dst, Register src) { 1292 prefix(dst, src); 1293 emitByte(0x89); 1294 emitOperandHelper(src, dst); 1295 } 1296 1297 /** 1298 * New CPUs require use of movsd and movss to avoid partial register stall when loading from 1299 * memory. But for old Opteron use movlpd instead of movsd. The selection is done in 1300 * {@link AMD64MacroAssembler#movdbl(Register, AMD64Address)} and 1301 * {@link AMD64MacroAssembler#movflt(Register, Register)}. 1302 */ 1303 public final void movlpd(Register dst, AMD64Address src) { 1304 assert dst.getRegisterCategory().equals(AMD64.XMM); 1305 emitByte(0x66); 1306 prefix(src, dst); 1307 emitByte(0x0F); 1308 emitByte(0x12); 1309 emitOperandHelper(dst, src); 1310 } 1311 1312 public final void movq(Register dst, AMD64Address src) { 1313 if (dst.getRegisterCategory().equals(AMD64.XMM)) { 1314 emitByte(0xF3); 1315 prefixq(src, dst); 1316 emitByte(0x0F); 1317 emitByte(0x7E); 1318 emitOperandHelper(dst, src); 1319 } else { 1320 prefixq(src, dst); 1321 emitByte(0x8B); 1322 emitOperandHelper(dst, src); 1323 } 1324 } 1325 1326 public final void movq(Register dst, Register src) { 1327 int encode = prefixqAndEncode(dst.encoding, src.encoding); 1328 emitByte(0x8B); 1329 emitByte(0xC0 | encode); 1330 } 1331 1332 public final void movq(AMD64Address dst, Register src) { 1333 if (src.getRegisterCategory().equals(AMD64.XMM)) { 1334 emitByte(0x66); 1335 prefixq(dst, src); 1336 emitByte(0x0F); 1337 emitByte(0xD6); 1338 emitOperandHelper(src, dst); 1339 } else { 1340 prefixq(dst, src); 1341 emitByte(0x89); 1342 emitOperandHelper(src, dst); 1343 } 1344 } 1345 1346 public final void movsbl(Register dst, AMD64Address src) { 1347 prefix(src, dst); 1348 emitByte(0x0F); 1349 emitByte(0xBE); 1350 emitOperandHelper(dst, src); 1351 } 1352 1353 public final void movsbl(Register dst, Register src) { 1354 int encode = prefixAndEncode(dst.encoding, false, src.encoding, true); 1355 emitByte(0x0F); 1356 emitByte(0xBE); 1357 emitByte(0xC0 | encode); 1358 } 1359 1360 public final void movsbq(Register dst, AMD64Address src) { 1361 prefixq(src, dst); 1362 emitByte(0x0F); 1363 emitByte(0xBE); 1364 emitOperandHelper(dst, src); 1365 } 1366 1367 public final void movsbq(Register dst, Register src) { 1368 int encode = prefixqAndEncode(dst.encoding, src.encoding); 1369 emitByte(0x0F); 1370 emitByte(0xBE); 1371 emitByte(0xC0 | encode); 1372 } 1373 1374 public final void movsd(Register dst, Register src) { 1375 assert dst.getRegisterCategory().equals(AMD64.XMM); 1376 assert src.getRegisterCategory().equals(AMD64.XMM); 1377 emitByte(0xF2); 1378 int encode = prefixAndEncode(dst.encoding, src.encoding); 1379 emitByte(0x0F); 1380 emitByte(0x10); 1381 emitByte(0xC0 | encode); 1382 } 1383 1384 public final void movsd(Register dst, AMD64Address src) { 1385 assert dst.getRegisterCategory().equals(AMD64.XMM); 1386 emitByte(0xF2); 1387 prefix(src, dst); 1388 emitByte(0x0F); 1389 emitByte(0x10); 1390 emitOperandHelper(dst, src); 1391 } 1392 1393 public final void movsd(AMD64Address dst, Register src) { 1394 assert src.getRegisterCategory().equals(AMD64.XMM); 1395 emitByte(0xF2); 1396 prefix(dst, src); 1397 emitByte(0x0F); 1398 emitByte(0x11); 1399 emitOperandHelper(src, dst); 1400 } 1401 1402 public final void movss(Register dst, Register src) { 1403 assert dst.getRegisterCategory().equals(AMD64.XMM); 1404 assert src.getRegisterCategory().equals(AMD64.XMM); 1405 emitByte(0xF3); 1406 int encode = prefixAndEncode(dst.encoding, src.encoding); 1407 emitByte(0x0F); 1408 emitByte(0x10); 1409 emitByte(0xC0 | encode); 1410 } 1411 1412 public final void movss(Register dst, AMD64Address src) { 1413 assert dst.getRegisterCategory().equals(AMD64.XMM); 1414 emitByte(0xF3); 1415 prefix(src, dst); 1416 emitByte(0x0F); 1417 emitByte(0x10); 1418 emitOperandHelper(dst, src); 1419 } 1420 1421 public final void movss(AMD64Address dst, Register src) { 1422 assert src.getRegisterCategory().equals(AMD64.XMM); 1423 emitByte(0xF3); 1424 prefix(dst, src); 1425 emitByte(0x0F); 1426 emitByte(0x11); 1427 emitOperandHelper(src, dst); 1428 } 1429 1430 public final void movswl(Register dst, AMD64Address src) { 1431 prefix(src, dst); 1432 emitByte(0x0F); 1433 emitByte(0xBF); 1434 emitOperandHelper(dst, src); 1435 } 1436 1437 public final void movw(AMD64Address dst, int imm16) { 1438 emitByte(0x66); // switch to 16-bit mode 1439 prefix(dst); 1440 emitByte(0xC7); 1441 emitOperandHelper(0, dst); 1442 emitShort(imm16); 1443 } 1444 1445 public final void movw(AMD64Address dst, Register src) { 1446 emitByte(0x66); 1447 prefix(dst, src); 1448 emitByte(0x89); 1449 emitOperandHelper(src, dst); 1450 } 1451 1452 public final void movzbl(Register dst, AMD64Address src) { 1453 prefix(src, dst); 1454 emitByte(0x0F); 1455 emitByte(0xB6); 1456 emitOperandHelper(dst, src); 1457 } 1458 1459 public final void movzwl(Register dst, AMD64Address src) { 1460 prefix(src, dst); 1461 emitByte(0x0F); 1462 emitByte(0xB7); 1463 emitOperandHelper(dst, src); 1464 } 1465 1466 @Override 1467 public final void ensureUniquePC() { 1468 nop(); 1469 } 1470 1471 public final void nop() { 1472 nop(1); 1473 } 1474 1475 public void nop(int count) { 1476 int i = count; 1477 if (UseNormalNop) { 1478 assert i > 0 : " "; 1479 // The fancy nops aren't currently recognized by debuggers making it a 1480 // pain to disassemble code while debugging. If assert are on clearly 1481 // speed is not an issue so simply use the single byte traditional nop 1482 // to do alignment. 1483 1484 for (; i > 0; i--) { 1485 emitByte(0x90); 1486 } 1487 return; 1488 } 1489 1490 if (UseAddressNop) { 1491 // 1492 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. 1493 // 1: 0x90 1494 // 2: 0x66 0x90 1495 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) 1496 // 4: 0x0F 0x1F 0x40 0x00 1497 // 5: 0x0F 0x1F 0x44 0x00 0x00 1498 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 1499 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1500 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1501 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1502 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1503 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1504 1505 // The rest coding is AMD specific - use consecutive Address nops 1506 1507 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 1508 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 1509 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1510 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 1511 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 1512 // Size prefixes (0x66) are added for larger sizes 1513 1514 while (i >= 22) { 1515 i -= 11; 1516 emitByte(0x66); // size prefix 1517 emitByte(0x66); // size prefix 1518 emitByte(0x66); // size prefix 1519 addrNop8(); 1520 } 1521 // Generate first nop for size between 21-12 1522 switch (i) { 1523 case 21: 1524 i -= 1; 1525 emitByte(0x66); // size prefix 1526 // fall through 1527 case 20: 1528 // fall through 1529 case 19: 1530 i -= 1; 1531 emitByte(0x66); // size prefix 1532 // fall through 1533 case 18: 1534 // fall through 1535 case 17: 1536 i -= 1; 1537 emitByte(0x66); // size prefix 1538 // fall through 1539 case 16: 1540 // fall through 1541 case 15: 1542 i -= 8; 1543 addrNop8(); 1544 break; 1545 case 14: 1546 case 13: 1547 i -= 7; 1548 addrNop7(); 1549 break; 1550 case 12: 1551 i -= 6; 1552 emitByte(0x66); // size prefix 1553 addrNop5(); 1554 break; 1555 default: 1556 assert i < 12; 1557 } 1558 1559 // Generate second nop for size between 11-1 1560 switch (i) { 1561 case 11: 1562 emitByte(0x66); // size prefix 1563 emitByte(0x66); // size prefix 1564 emitByte(0x66); // size prefix 1565 addrNop8(); 1566 break; 1567 case 10: 1568 emitByte(0x66); // size prefix 1569 emitByte(0x66); // size prefix 1570 addrNop8(); 1571 break; 1572 case 9: 1573 emitByte(0x66); // size prefix 1574 addrNop8(); 1575 break; 1576 case 8: 1577 addrNop8(); 1578 break; 1579 case 7: 1580 addrNop7(); 1581 break; 1582 case 6: 1583 emitByte(0x66); // size prefix 1584 addrNop5(); 1585 break; 1586 case 5: 1587 addrNop5(); 1588 break; 1589 case 4: 1590 addrNop4(); 1591 break; 1592 case 3: 1593 // Don't use "0x0F 0x1F 0x00" - need patching safe padding 1594 emitByte(0x66); // size prefix 1595 emitByte(0x66); // size prefix 1596 emitByte(0x90); // nop 1597 break; 1598 case 2: 1599 emitByte(0x66); // size prefix 1600 emitByte(0x90); // nop 1601 break; 1602 case 1: 1603 emitByte(0x90); // nop 1604 break; 1605 default: 1606 assert i == 0; 1607 } 1608 return; 1609 } 1610 1611 // Using nops with size prefixes "0x66 0x90". 1612 // From AMD Optimization Guide: 1613 // 1: 0x90 1614 // 2: 0x66 0x90 1615 // 3: 0x66 0x66 0x90 1616 // 4: 0x66 0x66 0x66 0x90 1617 // 5: 0x66 0x66 0x90 0x66 0x90 1618 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 1619 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 1620 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 1621 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 1622 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 1623 // 1624 while (i > 12) { 1625 i -= 4; 1626 emitByte(0x66); // size prefix 1627 emitByte(0x66); 1628 emitByte(0x66); 1629 emitByte(0x90); // nop 1630 } 1631 // 1 - 12 nops 1632 if (i > 8) { 1633 if (i > 9) { 1634 i -= 1; 1635 emitByte(0x66); 1636 } 1637 i -= 3; 1638 emitByte(0x66); 1639 emitByte(0x66); 1640 emitByte(0x90); 1641 } 1642 // 1 - 8 nops 1643 if (i > 4) { 1644 if (i > 6) { 1645 i -= 1; 1646 emitByte(0x66); 1647 } 1648 i -= 3; 1649 emitByte(0x66); 1650 emitByte(0x66); 1651 emitByte(0x90); 1652 } 1653 switch (i) { 1654 case 4: 1655 emitByte(0x66); 1656 emitByte(0x66); 1657 emitByte(0x66); 1658 emitByte(0x90); 1659 break; 1660 case 3: 1661 emitByte(0x66); 1662 emitByte(0x66); 1663 emitByte(0x90); 1664 break; 1665 case 2: 1666 emitByte(0x66); 1667 emitByte(0x90); 1668 break; 1669 case 1: 1670 emitByte(0x90); 1671 break; 1672 default: 1673 assert i == 0; 1674 } 1675 } 1676 1677 public final void pop(Register dst) { 1678 int encode = prefixAndEncode(dst.encoding); 1679 emitByte(0x58 | encode); 1680 } 1681 1682 public void popfq() { 1683 emitByte(0x9D); 1684 } 1685 1686 public final void ptest(Register dst, Register src) { 1687 assert supports(CPUFeature.SSE4_1); 1688 emitByte(0x66); 1689 int encode = prefixAndEncode(dst.encoding, src.encoding); 1690 emitByte(0x0F); 1691 emitByte(0x38); 1692 emitByte(0x17); 1693 emitByte(0xC0 | encode); 1694 } 1695 1696 public final void push(Register src) { 1697 int encode = prefixAndEncode(src.encoding); 1698 emitByte(0x50 | encode); 1699 } 1700 1701 public void pushfq() { 1702 emitByte(0x9c); 1703 } 1704 1705 public final void pxor(Register dst, Register src) { 1706 emitByte(0x66); 1707 int encode = prefixAndEncode(dst.encoding, src.encoding); 1708 emitByte(0x0F); 1709 emitByte(0xEF); 1710 emitByte(0xC0 | encode); 1711 } 1712 1713 public final void ret(int imm16) { 1714 if (imm16 == 0) { 1715 emitByte(0xC3); 1716 } else { 1717 emitByte(0xC2); 1718 emitShort(imm16); 1719 } 1720 } 1721 1722 public final void subl(AMD64Address dst, int imm32) { 1723 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1724 } 1725 1726 public final void subl(Register dst, int imm32) { 1727 SUB.getMIOpcode(DWORD, isByte(imm32)).emit(this, DWORD, dst, imm32); 1728 } 1729 1730 public final void testl(Register dst, int imm32) { 1731 // not using emitArith because test 1732 // doesn't support sign-extension of 1733 // 8bit operands 1734 int encode = dst.encoding; 1735 if (encode == 0) { 1736 emitByte(0xA9); 1737 } else { 1738 encode = prefixAndEncode(encode); 1739 emitByte(0xF7); 1740 emitByte(0xC0 | encode); 1741 } 1742 emitInt(imm32); 1743 } 1744 1745 public final void testl(Register dst, Register src) { 1746 int encode = prefixAndEncode(dst.encoding, src.encoding); 1747 emitByte(0x85); 1748 emitByte(0xC0 | encode); 1749 } 1750 1751 public final void testl(Register dst, AMD64Address src) { 1752 prefix(src, dst); 1753 emitByte(0x85); 1754 emitOperandHelper(dst, src); 1755 } 1756 1757 public final void xorl(Register dst, Register src) { 1758 XOR.rmOp.emit(this, DWORD, dst, src); 1759 } 1760 1761 public final void xorpd(Register dst, Register src) { 1762 emitByte(0x66); 1763 xorps(dst, src); 1764 } 1765 1766 public final void xorps(Register dst, Register src) { 1767 assert dst.getRegisterCategory().equals(AMD64.XMM) && src.getRegisterCategory().equals(AMD64.XMM); 1768 int encode = prefixAndEncode(dst.encoding, src.encoding); 1769 emitByte(0x0F); 1770 emitByte(0x57); 1771 emitByte(0xC0 | encode); 1772 } 1773 1774 protected final void decl(Register dst) { 1775 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) 1776 int encode = prefixAndEncode(dst.encoding); 1777 emitByte(0xFF); 1778 emitByte(0xC8 | encode); 1779 } 1780 1781 protected final void incl(Register dst) { 1782 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 1783 int encode = prefixAndEncode(dst.encoding); 1784 emitByte(0xFF); 1785 emitByte(0xC0 | encode); 1786 } 1787 1788 private int prefixAndEncode(int regEnc) { 1789 return prefixAndEncode(regEnc, false); 1790 } 1791 1792 private int prefixAndEncode(int regEnc, boolean byteinst) { 1793 if (regEnc >= 8) { 1794 emitByte(Prefix.REXB); 1795 return regEnc - 8; 1796 } else if (byteinst && regEnc >= 4) { 1797 emitByte(Prefix.REX); 1798 } 1799 return regEnc; 1800 } 1801 1802 private int prefixqAndEncode(int regEnc) { 1803 if (regEnc < 8) { 1804 emitByte(Prefix.REXW); 1805 return regEnc; 1806 } else { 1807 emitByte(Prefix.REXWB); 1808 return regEnc - 8; 1809 } 1810 } 1811 1812 private int prefixAndEncode(int dstEnc, int srcEnc) { 1813 return prefixAndEncode(dstEnc, false, srcEnc, false); 1814 } 1815 1816 private int prefixAndEncode(int dstEncoding, boolean dstIsByte, int srcEncoding, boolean srcIsByte) { 1817 int srcEnc = srcEncoding; 1818 int dstEnc = dstEncoding; 1819 if (dstEnc < 8) { 1820 if (srcEnc >= 8) { 1821 emitByte(Prefix.REXB); 1822 srcEnc -= 8; 1823 } else if ((srcIsByte && srcEnc >= 4) || (dstIsByte && dstEnc >= 4)) { 1824 emitByte(Prefix.REX); 1825 } 1826 } else { 1827 if (srcEnc < 8) { 1828 emitByte(Prefix.REXR); 1829 } else { 1830 emitByte(Prefix.REXRB); 1831 srcEnc -= 8; 1832 } 1833 dstEnc -= 8; 1834 } 1835 return dstEnc << 3 | srcEnc; 1836 } 1837 1838 /** 1839 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand 1840 * prefix. If the given operands exceed 3 bits, the 4th bit is encoded in the prefix. 1841 * 1842 * @param regEncoding the encoding of the register part of the ModRM-Byte 1843 * @param rmEncoding the encoding of the r/m part of the ModRM-Byte 1844 * @return the lower 6 bits of the ModRM-Byte that should be emitted 1845 */ 1846 private int prefixqAndEncode(int regEncoding, int rmEncoding) { 1847 int rmEnc = rmEncoding; 1848 int regEnc = regEncoding; 1849 if (regEnc < 8) { 1850 if (rmEnc < 8) { 1851 emitByte(Prefix.REXW); 1852 } else { 1853 emitByte(Prefix.REXWB); 1854 rmEnc -= 8; 1855 } 1856 } else { 1857 if (rmEnc < 8) { 1858 emitByte(Prefix.REXWR); 1859 } else { 1860 emitByte(Prefix.REXWRB); 1861 rmEnc -= 8; 1862 } 1863 regEnc -= 8; 1864 } 1865 return regEnc << 3 | rmEnc; 1866 } 1867 1868 private static boolean needsRex(Register reg) { 1869 return reg.encoding >= MinEncodingNeedsRex; 1870 } 1871 1872 private void prefix(AMD64Address adr) { 1873 if (needsRex(adr.getBase())) { 1874 if (needsRex(adr.getIndex())) { 1875 emitByte(Prefix.REXXB); 1876 } else { 1877 emitByte(Prefix.REXB); 1878 } 1879 } else { 1880 if (needsRex(adr.getIndex())) { 1881 emitByte(Prefix.REXX); 1882 } 1883 } 1884 } 1885 1886 private void prefixq(AMD64Address adr) { 1887 if (needsRex(adr.getBase())) { 1888 if (needsRex(adr.getIndex())) { 1889 emitByte(Prefix.REXWXB); 1890 } else { 1891 emitByte(Prefix.REXWB); 1892 } 1893 } else { 1894 if (needsRex(adr.getIndex())) { 1895 emitByte(Prefix.REXWX); 1896 } else { 1897 emitByte(Prefix.REXW); 1898 } 1899 } 1900 } 1901 1902 private void prefix(AMD64Address adr, Register reg) { 1903 prefix(adr, reg, false); 1904 } 1905 1906 private void prefix(AMD64Address adr, Register reg, boolean byteinst) { 1907 if (reg.encoding < 8) { 1908 if (needsRex(adr.getBase())) { 1909 if (needsRex(adr.getIndex())) { 1910 emitByte(Prefix.REXXB); 1911 } else { 1912 emitByte(Prefix.REXB); 1913 } 1914 } else { 1915 if (needsRex(adr.getIndex())) { 1916 emitByte(Prefix.REXX); 1917 } else if (byteinst && reg.encoding >= 4) { 1918 emitByte(Prefix.REX); 1919 } 1920 } 1921 } else { 1922 if (needsRex(adr.getBase())) { 1923 if (needsRex(adr.getIndex())) { 1924 emitByte(Prefix.REXRXB); 1925 } else { 1926 emitByte(Prefix.REXRB); 1927 } 1928 } else { 1929 if (needsRex(adr.getIndex())) { 1930 emitByte(Prefix.REXRX); 1931 } else { 1932 emitByte(Prefix.REXR); 1933 } 1934 } 1935 } 1936 } 1937 1938 private void prefixq(AMD64Address adr, Register src) { 1939 if (src.encoding < 8) { 1940 if (needsRex(adr.getBase())) { 1941 if (needsRex(adr.getIndex())) { 1942 emitByte(Prefix.REXWXB); 1943 } else { 1944 emitByte(Prefix.REXWB); 1945 } 1946 } else { 1947 if (needsRex(adr.getIndex())) { 1948 emitByte(Prefix.REXWX); 1949 } else { 1950 emitByte(Prefix.REXW); 1951 } 1952 } 1953 } else { 1954 if (needsRex(adr.getBase())) { 1955 if (needsRex(adr.getIndex())) { 1956 emitByte(Prefix.REXWRXB); 1957 } else { 1958 emitByte(Prefix.REXWRB); 1959 } 1960 } else { 1961 if (needsRex(adr.getIndex())) { 1962 emitByte(Prefix.REXWRX); 1963 } else { 1964 emitByte(Prefix.REXWR); 1965 } 1966 } 1967 } 1968 } 1969 1970 public final void addq(Register dst, int imm32) { 1971 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 1972 } 1973 1974 public final void addq(AMD64Address dst, int imm32) { 1975 ADD.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 1976 } 1977 1978 public final void addq(Register dst, Register src) { 1979 ADD.rmOp.emit(this, QWORD, dst, src); 1980 } 1981 1982 public final void addq(AMD64Address dst, Register src) { 1983 ADD.mrOp.emit(this, QWORD, dst, src); 1984 } 1985 1986 public final void andq(Register dst, int imm32) { 1987 AND.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 1988 } 1989 1990 public final void bswapq(Register reg) { 1991 int encode = prefixqAndEncode(reg.encoding); 1992 emitByte(0x0F); 1993 emitByte(0xC8 | encode); 1994 } 1995 1996 public final void cdqq() { 1997 emitByte(Prefix.REXW); 1998 emitByte(0x99); 1999 } 2000 2001 public final void cmovq(ConditionFlag cc, Register dst, Register src) { 2002 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2003 emitByte(0x0F); 2004 emitByte(0x40 | cc.getValue()); 2005 emitByte(0xC0 | encode); 2006 } 2007 2008 public final void cmovq(ConditionFlag cc, Register dst, AMD64Address src) { 2009 prefixq(src, dst); 2010 emitByte(0x0F); 2011 emitByte(0x40 | cc.getValue()); 2012 emitOperandHelper(dst, src); 2013 } 2014 2015 public final void cmpq(Register dst, int imm32) { 2016 CMP.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2017 } 2018 2019 public final void cmpq(Register dst, Register src) { 2020 CMP.rmOp.emit(this, QWORD, dst, src); 2021 } 2022 2023 public final void cmpq(Register dst, AMD64Address src) { 2024 CMP.rmOp.emit(this, QWORD, dst, src); 2025 } 2026 2027 public final void cmpxchgq(Register reg, AMD64Address adr) { 2028 prefixq(adr, reg); 2029 emitByte(0x0F); 2030 emitByte(0xB1); 2031 emitOperandHelper(reg, adr); 2032 } 2033 2034 protected final void decq(Register dst) { 2035 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2036 int encode = prefixqAndEncode(dst.encoding); 2037 emitByte(0xFF); 2038 emitByte(0xC8 | encode); 2039 } 2040 2041 public final void decq(AMD64Address dst) { 2042 DEC.emit(this, QWORD, dst); 2043 } 2044 2045 public final void incq(Register dst) { 2046 // Don't use it directly. Use Macroincrementq() instead. 2047 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) 2048 int encode = prefixqAndEncode(dst.encoding); 2049 emitByte(0xFF); 2050 emitByte(0xC0 | encode); 2051 } 2052 2053 public final void incq(AMD64Address dst) { 2054 INC.emit(this, QWORD, dst); 2055 } 2056 2057 public final void movq(Register dst, long imm64) { 2058 int encode = prefixqAndEncode(dst.encoding); 2059 emitByte(0xB8 | encode); 2060 emitLong(imm64); 2061 } 2062 2063 public final void movslq(Register dst, int imm32) { 2064 int encode = prefixqAndEncode(dst.encoding); 2065 emitByte(0xC7); 2066 emitByte(0xC0 | encode); 2067 emitInt(imm32); 2068 } 2069 2070 public final void movdq(Register dst, Register src) { 2071 2072 // table D-1 says MMX/SSE2 2073 emitByte(0x66); 2074 2075 if (dst.getRegisterCategory().equals(AMD64.XMM)) { 2076 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2077 emitByte(0x0F); 2078 emitByte(0x6E); 2079 emitByte(0xC0 | encode); 2080 } else if (src.getRegisterCategory().equals(AMD64.XMM)) { 2081 2082 // swap src/dst to get correct prefix 2083 int encode = prefixqAndEncode(src.encoding, dst.encoding); 2084 emitByte(0x0F); 2085 emitByte(0x7E); 2086 emitByte(0xC0 | encode); 2087 } else { 2088 throw new InternalError("should not reach here"); 2089 } 2090 } 2091 2092 public final void movdqu(Register dst, AMD64Address src) { 2093 emitByte(0xF3); 2094 prefix(src, dst); 2095 emitByte(0x0F); 2096 emitByte(0x6F); 2097 emitOperandHelper(dst, src); 2098 } 2099 2100 public final void movslq(AMD64Address dst, int imm32) { 2101 prefixq(dst); 2102 emitByte(0xC7); 2103 emitOperandHelper(0, dst); 2104 emitInt(imm32); 2105 } 2106 2107 public final void movslq(Register dst, AMD64Address src) { 2108 prefixq(src, dst); 2109 emitByte(0x63); 2110 emitOperandHelper(dst, src); 2111 } 2112 2113 public final void movslq(Register dst, Register src) { 2114 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2115 emitByte(0x63); 2116 emitByte(0xC0 | encode); 2117 } 2118 2119 public final void negq(Register dst) { 2120 int encode = prefixqAndEncode(dst.encoding); 2121 emitByte(0xF7); 2122 emitByte(0xD8 | encode); 2123 } 2124 2125 public final void shlq(Register dst, int imm8) { 2126 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2127 int encode = prefixqAndEncode(dst.encoding); 2128 if (imm8 == 1) { 2129 emitByte(0xD1); 2130 emitByte(0xE0 | encode); 2131 } else { 2132 emitByte(0xC1); 2133 emitByte(0xE0 | encode); 2134 emitByte(imm8); 2135 } 2136 } 2137 2138 public final void shrq(Register dst, int imm8) { 2139 assert isShiftCount(imm8 >> 1) : "illegal shift count"; 2140 int encode = prefixqAndEncode(dst.encoding); 2141 if (imm8 == 1) { 2142 emitByte(0xD1); 2143 emitByte(0xE8 | encode); 2144 } else { 2145 emitByte(0xC1); 2146 emitByte(0xE8 | encode); 2147 emitByte(imm8); 2148 } 2149 } 2150 2151 public final void subq(Register dst, int imm32) { 2152 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2153 } 2154 2155 public final void subq(AMD64Address dst, int imm32) { 2156 SUB.getMIOpcode(QWORD, isByte(imm32)).emit(this, QWORD, dst, imm32); 2157 } 2158 2159 public final void subqWide(Register dst, int imm32) { 2160 // don't use the sign-extending version, forcing a 32-bit immediate 2161 SUB.getMIOpcode(QWORD, false).emit(this, QWORD, dst, imm32); 2162 } 2163 2164 public final void subq(Register dst, Register src) { 2165 SUB.rmOp.emit(this, QWORD, dst, src); 2166 } 2167 2168 public final void testq(Register dst, Register src) { 2169 int encode = prefixqAndEncode(dst.encoding, src.encoding); 2170 emitByte(0x85); 2171 emitByte(0xC0 | encode); 2172 } 2173 2174 public final void xaddl(AMD64Address dst, Register src) { 2175 prefix(dst, src); 2176 emitByte(0x0F); 2177 emitByte(0xC1); 2178 emitOperandHelper(src, dst); 2179 } 2180 2181 public final void xaddq(AMD64Address dst, Register src) { 2182 prefixq(dst, src); 2183 emitByte(0x0F); 2184 emitByte(0xC1); 2185 emitOperandHelper(src, dst); 2186 } 2187 2188 public final void xchgl(Register dst, AMD64Address src) { 2189 prefix(src, dst); 2190 emitByte(0x87); 2191 emitOperandHelper(dst, src); 2192 } 2193 2194 public final void xchgq(Register dst, AMD64Address src) { 2195 prefixq(src, dst); 2196 emitByte(0x87); 2197 emitOperandHelper(dst, src); 2198 } 2199 2200 public final void membar(int barriers) { 2201 if (target.isMP) { 2202 // We only have to handle StoreLoad 2203 if ((barriers & STORE_LOAD) != 0) { 2204 // All usable chips support "locked" instructions which suffice 2205 // as barriers, and are much faster than the alternative of 2206 // using cpuid instruction. We use here a locked add [rsp],0. 2207 // This is conveniently otherwise a no-op except for blowing 2208 // flags. 2209 // Any change to this code may need to revisit other places in 2210 // the code where this idiom is used, in particular the 2211 // orderAccess code. 2212 lock(); 2213 addl(new AMD64Address(rsp, 0), 0); // Assert the lock# signal here 2214 } 2215 } 2216 } 2217 2218 @Override 2219 protected final void patchJumpTarget(int branch, int branchTarget) { 2220 int op = getByte(branch); 2221 assert op == 0xE8 // call 2222 || 2223 op == 0x00 // jump table entry 2224 || op == 0xE9 // jmp 2225 || op == 0xEB // short jmp 2226 || (op & 0xF0) == 0x70 // short jcc 2227 || op == 0x0F && (getByte(branch + 1) & 0xF0) == 0x80 // jcc 2228 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; 2229 2230 if (op == 0x00) { 2231 int offsetToJumpTableBase = getShort(branch + 1); 2232 int jumpTableBase = branch - offsetToJumpTableBase; 2233 int imm32 = branchTarget - jumpTableBase; 2234 emitInt(imm32, branch); 2235 } else if (op == 0xEB || (op & 0xF0) == 0x70) { 2236 2237 // short offset operators (jmp and jcc) 2238 final int imm8 = branchTarget - (branch + 2); 2239 /* 2240 * Since a wrongly patched short branch can potentially lead to working but really bad 2241 * behaving code we should always fail with an exception instead of having an assert. 2242 */ 2243 if (!NumUtil.isByte(imm8)) { 2244 throw new InternalError("branch displacement out of range: " + imm8); 2245 } 2246 emitByte(imm8, branch + 1); 2247 2248 } else { 2249 2250 int off = 1; 2251 if (op == 0x0F) { 2252 off = 2; 2253 } 2254 2255 int imm32 = branchTarget - (branch + 4 + off); 2256 emitInt(imm32, branch + off); 2257 } 2258 } 2259 2260 public void nullCheck(AMD64Address address) { 2261 testl(AMD64.rax, address); 2262 } 2263 2264 @Override 2265 public void align(int modulus) { 2266 if (position() % modulus != 0) { 2267 nop(modulus - (position() % modulus)); 2268 } 2269 } 2270 2271 /** 2272 * Emits a direct call instruction. Note that the actual call target is not specified, because 2273 * all calls need patching anyway. Therefore, 0 is emitted as the call target, and the user is 2274 * responsible to add the call address to the appropriate patching tables. 2275 */ 2276 public final void call() { 2277 emitByte(0xE8); 2278 emitInt(0); 2279 } 2280 2281 public final void call(Register src) { 2282 int encode = prefixAndEncode(src.encoding); 2283 emitByte(0xFF); 2284 emitByte(0xD0 | encode); 2285 } 2286 2287 public final void int3() { 2288 emitByte(0xCC); 2289 } 2290 2291 private void emitx87(int b1, int b2, int i) { 2292 assert 0 <= i && i < 8 : "illegal stack offset"; 2293 emitByte(b1); 2294 emitByte(b2 + i); 2295 } 2296 2297 public final void fldd(AMD64Address src) { 2298 emitByte(0xDD); 2299 emitOperandHelper(0, src); 2300 } 2301 2302 public final void flds(AMD64Address src) { 2303 emitByte(0xD9); 2304 emitOperandHelper(0, src); 2305 } 2306 2307 public final void fldln2() { 2308 emitByte(0xD9); 2309 emitByte(0xED); 2310 } 2311 2312 public final void fldlg2() { 2313 emitByte(0xD9); 2314 emitByte(0xEC); 2315 } 2316 2317 public final void fyl2x() { 2318 emitByte(0xD9); 2319 emitByte(0xF1); 2320 } 2321 2322 public final void fstps(AMD64Address src) { 2323 emitByte(0xD9); 2324 emitOperandHelper(3, src); 2325 } 2326 2327 public final void fstpd(AMD64Address src) { 2328 emitByte(0xDD); 2329 emitOperandHelper(3, src); 2330 } 2331 2332 private void emitFPUArith(int b1, int b2, int i) { 2333 assert 0 <= i && i < 8 : "illegal FPU register: " + i; 2334 emitByte(b1); 2335 emitByte(b2 + i); 2336 } 2337 2338 public void ffree(int i) { 2339 emitFPUArith(0xDD, 0xC0, i); 2340 } 2341 2342 public void fincstp() { 2343 emitByte(0xD9); 2344 emitByte(0xF7); 2345 } 2346 2347 public void fxch(int i) { 2348 emitFPUArith(0xD9, 0xC8, i); 2349 } 2350 2351 public void fnstswAX() { 2352 emitByte(0xDF); 2353 emitByte(0xE0); 2354 } 2355 2356 public void fwait() { 2357 emitByte(0x9B); 2358 } 2359 2360 public void fprem() { 2361 emitByte(0xD9); 2362 emitByte(0xF8); 2363 } 2364 2365 public final void fsin() { 2366 emitByte(0xD9); 2367 emitByte(0xFE); 2368 } 2369 2370 public final void fcos() { 2371 emitByte(0xD9); 2372 emitByte(0xFF); 2373 } 2374 2375 public final void fptan() { 2376 emitByte(0xD9); 2377 emitByte(0xF2); 2378 } 2379 2380 public final void fstp(int i) { 2381 emitx87(0xDD, 0xD8, i); 2382 } 2383 2384 @Override 2385 public AMD64Address makeAddress(Register base, int displacement) { 2386 return new AMD64Address(base, displacement); 2387 } 2388 2389 @Override 2390 public AMD64Address getPlaceholder() { 2391 return Placeholder; 2392 } 2393 2394 private void prefetchPrefix(AMD64Address src) { 2395 prefix(src); 2396 emitByte(0x0F); 2397 } 2398 2399 public void prefetchnta(AMD64Address src) { 2400 prefetchPrefix(src); 2401 emitByte(0x18); 2402 emitOperandHelper(0, src); 2403 } 2404 2405 void prefetchr(AMD64Address src) { 2406 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 2407 prefetchPrefix(src); 2408 emitByte(0x0D); 2409 emitOperandHelper(0, src); 2410 } 2411 2412 public void prefetcht0(AMD64Address src) { 2413 assert supports(CPUFeature.SSE); 2414 prefetchPrefix(src); 2415 emitByte(0x18); 2416 emitOperandHelper(1, src); 2417 } 2418 2419 public void prefetcht1(AMD64Address src) { 2420 assert supports(CPUFeature.SSE); 2421 prefetchPrefix(src); 2422 emitByte(0x18); 2423 emitOperandHelper(2, src); 2424 } 2425 2426 public void prefetcht2(AMD64Address src) { 2427 assert supports(CPUFeature.SSE); 2428 prefix(src); 2429 emitByte(0x0f); 2430 emitByte(0x18); 2431 emitOperandHelper(3, src); 2432 } 2433 2434 public void prefetchw(AMD64Address src) { 2435 assert supports(CPUFeature.AMD_3DNOW_PREFETCH); 2436 prefix(src); 2437 emitByte(0x0f); 2438 emitByte(0x0D); 2439 emitOperandHelper(1, src); 2440 } 2441 2442 /** 2443 * Emits an instruction which is considered to be illegal. This is used if we deliberately want 2444 * to crash the program (debugging etc.). 2445 */ 2446 public void illegal() { 2447 emitByte(0x0f); 2448 emitByte(0x0b); 2449 } 2450}