comparison graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64Assembler.java @ 6497:64b7dd2075c0

renamed projects com.oracle.max.asm* to com.oracle.graal.asm*
author Doug Simon <doug.simon@oracle.com>
date Wed, 03 Oct 2012 17:42:12 +0200
parents graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64Assembler.java@85c1b84f8fd9
children 6bc8aa568cb9
comparison
equal deleted inserted replaced
6496:16d1411409b4 6497:64b7dd2075c0
1 /*
2 * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23 package com.oracle.max.asm.amd64;
24
25 import static com.oracle.graal.api.code.ValueUtil.*;
26 import static com.oracle.max.asm.NumUtil.*;
27 import static com.oracle.max.asm.amd64.AMD64.*;
28 import static com.oracle.max.asm.amd64.AMD64AsmOptions.*;
29 import static com.oracle.max.criutils.MemoryBarriers.*;
30
31 import com.oracle.graal.api.code.*;
32 import com.oracle.graal.api.meta.*;
33 import com.oracle.max.asm.*;
34
35 /**
36 * This class implements an assembler that can encode most X86 instructions.
37 */
38 public class AMD64Assembler extends AbstractAssembler {
39 /**
40 * The kind for pointers and raw registers. Since we know we are 64 bit here, we can hardcode it.
41 */
42 private static final Kind Word = Kind.Long;
43
44 private static final int MinEncodingNeedsRex = 8;
45
46 /**
47 * The x86 condition codes used for conditional jumps/moves.
48 */
49 public enum ConditionFlag {
50 zero(0x4, "|zero|"),
51 notZero(0x5, "|nzero|"),
52 equal(0x4, "="),
53 notEqual(0x5, "!="),
54 less(0xc, "<"),
55 lessEqual(0xe, "<="),
56 greater(0xf, ">"),
57 greaterEqual(0xd, ">="),
58 below(0x2, "|<|"),
59 belowEqual(0x6, "|<=|"),
60 above(0x7, "|>|"),
61 aboveEqual(0x3, "|>=|"),
62 overflow(0x0, "|of|"),
63 noOverflow(0x1, "|nof|"),
64 carrySet(0x2, "|carry|"),
65 carryClear(0x3, "|ncarry|"),
66 negative(0x8, "|neg|"),
67 positive(0x9, "|pos|"),
68 parity(0xa, "|par|"),
69 noParity(0xb, "|npar|");
70
71 public final int value;
72 public final String operator;
73
74 private ConditionFlag(int value, String operator) {
75 this.value = value;
76 this.operator = operator;
77 }
78
79 public ConditionFlag negate() {
80 switch(this) {
81 case zero: return notZero;
82 case notZero: return zero;
83 case equal: return notEqual;
84 case notEqual: return equal;
85 case less: return greaterEqual;
86 case lessEqual: return greater;
87 case greater: return lessEqual;
88 case greaterEqual: return less;
89 case below: return aboveEqual;
90 case belowEqual: return above;
91 case above: return belowEqual;
92 case aboveEqual: return below;
93 case overflow: return noOverflow;
94 case noOverflow: return overflow;
95 case carrySet: return carryClear;
96 case carryClear: return carrySet;
97 case negative: return positive;
98 case positive: return negative;
99 case parity: return noParity;
100 case noParity: return parity;
101 }
102 throw new IllegalArgumentException();
103 }
104 }
105
106 /**
107 * Constants for X86 prefix bytes.
108 */
109 private static class Prefix {
110 private static final int REX = 0x40;
111 private static final int REXB = 0x41;
112 private static final int REXX = 0x42;
113 private static final int REXXB = 0x43;
114 private static final int REXR = 0x44;
115 private static final int REXRB = 0x45;
116 private static final int REXRX = 0x46;
117 private static final int REXRXB = 0x47;
118 private static final int REXW = 0x48;
119 private static final int REXWB = 0x49;
120 private static final int REXWX = 0x4A;
121 private static final int REXWXB = 0x4B;
122 private static final int REXWR = 0x4C;
123 private static final int REXWRB = 0x4D;
124 private static final int REXWRX = 0x4E;
125 private static final int REXWRXB = 0x4F;
126 }
127
128 /**
129 * The register to which {@link Register#Frame} and {@link Register#CallerFrame} are bound.
130 */
131 public final Register frameRegister;
132
133 /**
134 * Constructs an assembler for the AMD64 architecture.
135 *
136 * @param registerConfig the register configuration used to bind {@link Register#Frame} and
137 * {@link Register#CallerFrame} to physical registers. This value can be null if this assembler
138 * instance will not be used to assemble instructions using these logical registers.
139 */
140 public AMD64Assembler(TargetDescription target, RegisterConfig registerConfig) {
141 super(target);
142 this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister();
143 }
144
145 private static int encode(Register r) {
146 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
147 return r.encoding & 0x7;
148 }
149
150 private void emitArithB(int op1, int op2, Register dst, int imm8) {
151 assert dst.isByte() : "must have byte register";
152 assert isUByte(op1) && isUByte(op2) : "wrong opcode";
153 assert isUByte(imm8) : "not a byte";
154 assert (op1 & 0x01) == 0 : "should be 8bit operation";
155 emitByte(op1);
156 emitByte(op2 | encode(dst));
157 emitByte(imm8);
158 }
159
160 private void emitArith(int op1, int op2, Register dst, int imm32) {
161 assert isUByte(op1) && isUByte(op2) : "wrong opcode";
162 assert (op1 & 0x01) == 1 : "should be 32bit operation";
163 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set";
164 if (isByte(imm32)) {
165 emitByte(op1 | 0x02); // set sign bit
166 emitByte(op2 | encode(dst));
167 emitByte(imm32 & 0xFF);
168 } else {
169 emitByte(op1);
170 emitByte(op2 | encode(dst));
171 emitInt(imm32);
172 }
173 }
174
175 // immediate-to-memory forms
176 private void emitArithOperand(int op1, Register rm, Address adr, int imm32) {
177 assert (op1 & 0x01) == 1 : "should be 32bit operation";
178 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set";
179 if (isByte(imm32)) {
180 emitByte(op1 | 0x02); // set sign bit
181 emitOperandHelper(rm, adr);
182 emitByte(imm32 & 0xFF);
183 } else {
184 emitByte(op1);
185 emitOperandHelper(rm, adr);
186 emitInt(imm32);
187 }
188 }
189
190 private void emitArith(int op1, int op2, Register dst, Register src) {
191 assert isUByte(op1) && isUByte(op2) : "wrong opcode";
192 emitByte(op1);
193 emitByte(op2 | encode(dst) << 3 | encode(src));
194 }
195
196 private void emitOperandHelper(Register reg, Address addr) {
197 Register base = isLegal(addr.getBase()) ? asRegister(addr.getBase()) : Register.None;
198 Register index = isLegal(addr.getIndex()) ? asRegister(addr.getIndex()) : Register.None;
199
200 Address.Scale scale = addr.getScale();
201 int disp = addr.getDisplacement();
202
203 if (base == Register.Frame) {
204 assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration";
205 base = frameRegister;
206 // } else if (base == Register.CallerFrame) {
207 // assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration";
208 // base = frameRegister;
209 // disp += targetMethod.frameSize() + 8;
210 }
211
212 // Encode the registers as needed in the fields they are used in
213
214 assert reg != Register.None;
215 int regenc = encode(reg) << 3;
216
217 if (base == AMD64.rip) {
218 // [00 000 101] disp32
219 emitByte(0x05 | regenc);
220 emitInt(disp);
221 } else if (addr == Address.Placeholder) {
222 // [00 000 101] disp32
223 emitByte(0x05 | regenc);
224 emitInt(0);
225
226 } else if (base.isValid()) {
227 int baseenc = base.isValid() ? encode(base) : 0;
228 if (index.isValid()) {
229 int indexenc = encode(index) << 3;
230 // [base + indexscale + disp]
231 if (disp == 0 && base != rbp && (base != r13)) {
232 // [base + indexscale]
233 // [00 reg 100][ss index base]
234 assert index != rsp : "illegal addressing mode";
235 emitByte(0x04 | regenc);
236 emitByte(scale.log2 << 6 | indexenc | baseenc);
237 } else if (isByte(disp)) {
238 // [base + indexscale + imm8]
239 // [01 reg 100][ss index base] imm8
240 assert index != rsp : "illegal addressing mode";
241 emitByte(0x44 | regenc);
242 emitByte(scale.log2 << 6 | indexenc | baseenc);
243 emitByte(disp & 0xFF);
244 } else {
245 // [base + indexscale + disp32]
246 // [10 reg 100][ss index base] disp32
247 assert index != rsp : "illegal addressing mode";
248 emitByte(0x84 | regenc);
249 emitByte(scale.log2 << 6 | indexenc | baseenc);
250 emitInt(disp);
251 }
252 } else if (base == rsp || (base == r12)) {
253 // [rsp + disp]
254 if (disp == 0) {
255 // [rsp]
256 // [00 reg 100][00 100 100]
257 emitByte(0x04 | regenc);
258 emitByte(0x24);
259 } else if (isByte(disp)) {
260 // [rsp + imm8]
261 // [01 reg 100][00 100 100] disp8
262 emitByte(0x44 | regenc);
263 emitByte(0x24);
264 emitByte(disp & 0xFF);
265 } else {
266 // [rsp + imm32]
267 // [10 reg 100][00 100 100] disp32
268 emitByte(0x84 | regenc);
269 emitByte(0x24);
270 emitInt(disp);
271 }
272 } else {
273 // [base + disp]
274 assert base != rsp && (base != r12) : "illegal addressing mode";
275 if (disp == 0 && base != rbp && (base != r13)) {
276 // [base]
277 // [00 reg base]
278 emitByte(0x00 | regenc | baseenc);
279 } else if (isByte(disp)) {
280 // [base + disp8]
281 // [01 reg base] disp8
282 emitByte(0x40 | regenc | baseenc);
283 emitByte(disp & 0xFF);
284 } else {
285 // [base + disp32]
286 // [10 reg base] disp32
287 emitByte(0x80 | regenc | baseenc);
288 emitInt(disp);
289 }
290 }
291 } else {
292 if (index.isValid()) {
293 int indexenc = encode(index) << 3;
294 // [indexscale + disp]
295 // [00 reg 100][ss index 101] disp32
296 assert index != rsp : "illegal addressing mode";
297 emitByte(0x04 | regenc);
298 emitByte(scale.log2 << 6 | indexenc | 0x05);
299 emitInt(disp);
300 } else {
301 // [disp] ABSOLUTE
302 // [00 reg 100][00 100 101] disp32
303 emitByte(0x04 | regenc);
304 emitByte(0x25);
305 emitInt(disp);
306 }
307 }
308 }
309
310 public final void addl(Address dst, int imm32) {
311 prefix(dst);
312 emitArithOperand(0x81, rax, dst, imm32);
313 }
314
315 public final void addl(Address dst, Register src) {
316 prefix(dst, src);
317 emitByte(0x01);
318 emitOperandHelper(src, dst);
319 }
320
321 public final void addl(Register dst, int imm32) {
322 prefix(dst);
323 emitArith(0x81, 0xC0, dst, imm32);
324 }
325
326 public final void addl(Register dst, Address src) {
327 prefix(src, dst);
328 emitByte(0x03);
329 emitOperandHelper(dst, src);
330 }
331
332 public final void addl(Register dst, Register src) {
333 prefixAndEncode(dst.encoding, src.encoding);
334 emitArith(0x03, 0xC0, dst, src);
335 }
336
337 private void addrNop4() {
338 // 4 bytes: NOP DWORD PTR [EAX+0]
339 emitByte(0x0F);
340 emitByte(0x1F);
341 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
342 emitByte(0); // 8-bits offset (1 byte)
343 }
344
345 private void addrNop5() {
346 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
347 emitByte(0x0F);
348 emitByte(0x1F);
349 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
350 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
351 emitByte(0); // 8-bits offset (1 byte)
352 }
353
354 private void addrNop7() {
355 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
356 emitByte(0x0F);
357 emitByte(0x1F);
358 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
359 emitInt(0); // 32-bits offset (4 bytes)
360 }
361
362 private void addrNop8() {
363 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
364 emitByte(0x0F);
365 emitByte(0x1F);
366 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
367 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
368 emitInt(0); // 32-bits offset (4 bytes)
369 }
370
371 public final void addsd(Register dst, Register src) {
372 assert dst.isFpu() && src.isFpu();
373 emitByte(0xF2);
374 int encode = prefixAndEncode(dst.encoding, src.encoding);
375 emitByte(0x0F);
376 emitByte(0x58);
377 emitByte(0xC0 | encode);
378 }
379
380 public final void addsd(Register dst, Address src) {
381 assert dst.isFpu();
382 emitByte(0xF2);
383 prefix(src, dst);
384 emitByte(0x0F);
385 emitByte(0x58);
386 emitOperandHelper(dst, src);
387 }
388
389 public final void addss(Register dst, Register src) {
390 assert dst.isFpu() && src.isFpu();
391 emitByte(0xF3);
392 int encode = prefixAndEncode(dst.encoding, src.encoding);
393 emitByte(0x0F);
394 emitByte(0x58);
395 emitByte(0xC0 | encode);
396 }
397
398 public final void addss(Register dst, Address src) {
399 assert dst.isFpu();
400 emitByte(0xF3);
401 prefix(src, dst);
402 emitByte(0x0F);
403 emitByte(0x58);
404 emitOperandHelper(dst, src);
405 }
406
407 public final void andl(Register dst, int imm32) {
408 prefix(dst);
409 emitArith(0x81, 0xE0, dst, imm32);
410 }
411
412 public final void andl(Register dst, Address src) {
413 prefix(src, dst);
414 emitByte(0x23);
415 emitOperandHelper(dst, src);
416 }
417
418 public final void andl(Register dst, Register src) {
419 prefixAndEncode(dst.encoding, src.encoding);
420 emitArith(0x23, 0xC0, dst, src);
421 }
422
423 public final void bsfq(Register dst, Register src) {
424 int encode = prefixqAndEncode(dst.encoding, src.encoding);
425 emitByte(0x0F);
426 emitByte(0xBC);
427 emitByte(0xC0 | encode);
428 }
429
430 public final void bsfq(Register dst, Address src) {
431 prefixq(src, dst);
432 emitByte(0xBC);
433 emitOperandHelper(dst, src);
434 }
435
436 public final void bsrq(Register dst, Register src) {
437 int encode = prefixqAndEncode(dst.encoding, src.encoding);
438 emitByte(0x0F);
439 emitByte(0xBD);
440 emitByte(0xC0 | encode);
441 }
442
443
444 public final void bsrq(Register dst, Address src) {
445 prefixq(src, dst);
446 emitByte(0xBD);
447 emitOperandHelper(dst, src);
448 }
449
450 public final void bsrl(Register dst, Register src) {
451 int encode = prefixAndEncode(dst.encoding, src.encoding);
452 emitByte(0x0F);
453 emitByte(0xBD);
454 emitByte(0xC0 | encode);
455 }
456
457
458 public final void bsrl(Register dst, Address src) {
459 prefix(src, dst);
460 emitByte(0xBD);
461 emitOperandHelper(dst, src);
462 }
463
464 public final void bswapl(Register reg) { // bswap
465 int encode = prefixAndEncode(reg.encoding);
466 emitByte(0x0F);
467 emitByte(0xC8 | encode);
468 }
469
470 public final void btli(Address src, int imm8) {
471 prefixq(src);
472 emitByte(0x0F);
473 emitByte(0xBA);
474 emitOperandHelper(rsp, src);
475 emitByte(imm8);
476 }
477
478 public final void cdql() {
479 emitByte(0x99);
480 }
481
482 public final void cmovl(ConditionFlag cc, Register dst, Register src) {
483 int encode = prefixAndEncode(dst.encoding, src.encoding);
484 emitByte(0x0F);
485 emitByte(0x40 | cc.value);
486 emitByte(0xC0 | encode);
487 }
488
489 public final void cmovl(ConditionFlag cc, Register dst, Address src) {
490 prefix(src, dst);
491 emitByte(0x0F);
492 emitByte(0x40 | cc.value);
493 emitOperandHelper(dst, src);
494 }
495
496 public final void cmpb(Address dst, int imm8) {
497 prefix(dst);
498 emitByte(0x80);
499 emitOperandHelper(rdi, dst);
500 emitByte(imm8);
501 }
502
503 public final void cmpl(Address dst, int imm32) {
504 prefix(dst);
505 emitByte(0x81);
506 emitOperandHelper(rdi, dst);
507 emitInt(imm32);
508 }
509
510 public final void cmpl(Register dst, int imm32) {
511 prefix(dst);
512 emitArith(0x81, 0xF8, dst, imm32);
513 }
514
515 public final void cmpl(Register dst, Register src) {
516 prefixAndEncode(dst.encoding, src.encoding);
517 emitArith(0x3B, 0xC0, dst, src);
518 }
519
520 public final void cmpl(Register dst, Address src) {
521 prefix(src, dst);
522 emitByte(0x3B);
523 emitOperandHelper(dst, src);
524 }
525
526 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
527 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
528 // The ZF is set if the compared values were equal, and cleared otherwise.
529 public final void cmpxchgl(Register reg, Address adr) { // cmpxchg
530 if ((Atomics & 2) != 0) {
531 // caveat: no instructionmark, so this isn't relocatable.
532 // Emit a synthetic, non-atomic, CAS equivalent.
533 // Beware. The synthetic form sets all ICCs, not just ZF.
534 // cmpxchg r,[m] is equivalent to X86.rax, = CAS (m, X86.rax, r)
535 cmpl(rax, adr);
536 movl(rax, adr);
537 if (reg != rax) {
538 Label l = new Label();
539 jcc(ConditionFlag.notEqual, l);
540 movl(adr, reg);
541 bind(l);
542 }
543 } else {
544
545 prefix(adr, reg);
546 emitByte(0x0F);
547 emitByte(0xB1);
548 emitOperandHelper(reg, adr);
549 }
550 }
551
552 public final void comisd(Register dst, Address src) {
553 assert dst.isFpu();
554 // NOTE: dbx seems to decode this as comiss even though the
555 // 0x66 is there. Strangly ucomisd comes out correct
556 emitByte(0x66);
557 comiss(dst, src);
558 }
559
560 public final void comiss(Register dst, Address src) {
561 assert dst.isFpu();
562
563 prefix(src, dst);
564 emitByte(0x0F);
565 emitByte(0x2F);
566 emitOperandHelper(dst, src);
567 }
568
569 public final void cvtdq2pd(Register dst, Register src) {
570 assert dst.isFpu();
571 assert src.isFpu();
572
573 emitByte(0xF3);
574 int encode = prefixAndEncode(dst.encoding, src.encoding);
575 emitByte(0x0F);
576 emitByte(0xE6);
577 emitByte(0xC0 | encode);
578 }
579
580 public final void cvtdq2ps(Register dst, Register src) {
581 assert dst.isFpu();
582 assert src.isFpu();
583 int encode = prefixAndEncode(dst.encoding, src.encoding);
584 emitByte(0x0F);
585 emitByte(0x5B);
586 emitByte(0xC0 | encode);
587 }
588
589 public final void cvtsd2ss(Register dst, Register src) {
590 assert dst.isFpu();
591 assert src.isFpu();
592 emitByte(0xF2);
593 int encode = prefixAndEncode(dst.encoding, src.encoding);
594 emitByte(0x0F);
595 emitByte(0x5A);
596 emitByte(0xC0 | encode);
597 }
598
599 public final void cvtsi2sdl(Register dst, Register src) {
600 assert dst.isFpu();
601 emitByte(0xF2);
602 int encode = prefixAndEncode(dst.encoding, src.encoding);
603 emitByte(0x0F);
604 emitByte(0x2A);
605 emitByte(0xC0 | encode);
606 }
607
608 public final void cvtsi2ssl(Register dst, Register src) {
609 assert dst.isFpu();
610 emitByte(0xF3);
611 int encode = prefixAndEncode(dst.encoding, src.encoding);
612 emitByte(0x0F);
613 emitByte(0x2A);
614 emitByte(0xC0 | encode);
615 }
616
617 public final void cvtss2sd(Register dst, Register src) {
618 assert dst.isFpu();
619 assert src.isFpu();
620 emitByte(0xF3);
621 int encode = prefixAndEncode(dst.encoding, src.encoding);
622 emitByte(0x0F);
623 emitByte(0x5A);
624 emitByte(0xC0 | encode);
625 }
626
627 public final void cvttsd2sil(Register dst, Register src) {
628 assert src.isFpu();
629 emitByte(0xF2);
630 int encode = prefixAndEncode(dst.encoding, src.encoding);
631 emitByte(0x0F);
632 emitByte(0x2C);
633 emitByte(0xC0 | encode);
634 }
635
636 public final void cvttss2sil(Register dst, Register src) {
637 assert src.isFpu();
638 emitByte(0xF3);
639 int encode = prefixAndEncode(dst.encoding, src.encoding);
640 emitByte(0x0F);
641 emitByte(0x2C);
642 emitByte(0xC0 | encode);
643 }
644
645 public final void decl(Address dst) {
646 // Don't use it directly. Use Macrodecrement() instead.
647 prefix(dst);
648 emitByte(0xFF);
649 emitOperandHelper(rcx, dst);
650 }
651
652 public final void divsd(Register dst, Address src) {
653 assert dst.isFpu();
654 emitByte(0xF2);
655 prefix(src, dst);
656 emitByte(0x0F);
657 emitByte(0x5E);
658 emitOperandHelper(dst, src);
659 }
660
661 public final void divsd(Register dst, Register src) {
662 assert dst.isFpu();
663 assert src.isFpu();
664 emitByte(0xF2);
665 int encode = prefixAndEncode(dst.encoding, src.encoding);
666 emitByte(0x0F);
667 emitByte(0x5E);
668 emitByte(0xC0 | encode);
669 }
670
671 public final void divss(Register dst, Address src) {
672 assert dst.isFpu();
673 emitByte(0xF3);
674 prefix(src, dst);
675 emitByte(0x0F);
676 emitByte(0x5E);
677 emitOperandHelper(dst, src);
678 }
679
680 public final void divss(Register dst, Register src) {
681 assert dst.isFpu();
682 assert src.isFpu();
683 emitByte(0xF3);
684 int encode = prefixAndEncode(dst.encoding, src.encoding);
685 emitByte(0x0F);
686 emitByte(0x5E);
687 emitByte(0xC0 | encode);
688 }
689
690 public final void hlt() {
691 emitByte(0xF4);
692 }
693
694 public final void idivl(Register src) {
695 int encode = prefixAndEncode(src.encoding);
696 emitByte(0xF7);
697 emitByte(0xF8 | encode);
698 }
699
700 public final void divl(Register src) {
701 int encode = prefixAndEncode(src.encoding);
702 emitByte(0xF7);
703 emitByte(0xF0 | encode);
704 }
705
706 public final void imull(Register dst, Register src) {
707 int encode = prefixAndEncode(dst.encoding, src.encoding);
708 emitByte(0x0F);
709 emitByte(0xAF);
710 emitByte(0xC0 | encode);
711 }
712
713 public final void imull(Register dst, Register src, int value) {
714 int encode = prefixAndEncode(dst.encoding, src.encoding);
715 if (isByte(value)) {
716 emitByte(0x6B);
717 emitByte(0xC0 | encode);
718 emitByte(value & 0xFF);
719 } else {
720 emitByte(0x69);
721 emitByte(0xC0 | encode);
722 emitInt(value);
723 }
724 }
725
726 public final void incl(Address dst) {
727 // Don't use it directly. Use Macroincrement() instead.
728 prefix(dst);
729 emitByte(0xFF);
730 emitOperandHelper(rax, dst);
731 }
732
733 public final void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) {
734 int shortSize = 2;
735 int longSize = 6;
736 long disp = jumpTarget - codeBuffer.position();
737 if (!forceDisp32 && isByte(disp - shortSize)) {
738 // 0111 tttn #8-bit disp
739 emitByte(0x70 | cc.value);
740 emitByte((int) ((disp - shortSize) & 0xFF));
741 } else {
742 // 0000 1111 1000 tttn #32-bit disp
743 assert isInt(disp - longSize) : "must be 32bit offset (call4)";
744 emitByte(0x0F);
745 emitByte(0x80 | cc.value);
746 emitInt((int) (disp - longSize));
747 }
748 }
749
750 public final void jcc(ConditionFlag cc, Label l) {
751 assert (0 <= cc.value) && (cc.value < 16) : "illegal cc";
752 if (l.isBound()) {
753 jcc(cc, l.position(), false);
754 } else {
755 // Note: could eliminate cond. jumps to this jump if condition
756 // is the same however, seems to be rather unlikely case.
757 // Note: use jccb() if label to be bound is very close to get
758 // an 8-bit displacement
759 l.addPatchAt(codeBuffer.position());
760 emitByte(0x0F);
761 emitByte(0x80 | cc.value);
762 emitInt(0);
763 }
764
765 }
766
767 public final void jccb(ConditionFlag cc, Label l) {
768 if (l.isBound()) {
769 int shortSize = 2;
770 int entry = l.position();
771 assert isByte(entry - (codeBuffer.position() + shortSize)) : "Dispacement too large for a short jmp";
772 long disp = entry - codeBuffer.position();
773 // 0111 tttn #8-bit disp
774 emitByte(0x70 | cc.value);
775 emitByte((int) ((disp - shortSize) & 0xFF));
776 } else {
777
778 l.addPatchAt(codeBuffer.position());
779 emitByte(0x70 | cc.value);
780 emitByte(0);
781 }
782 }
783
784 public final void jmp(Address adr) {
785 prefix(adr);
786 emitByte(0xFF);
787 emitOperandHelper(rsp, adr);
788 }
789
790 public final void jmp(int jumpTarget, boolean forceDisp32) {
791 int shortSize = 2;
792 int longSize = 5;
793 long disp = jumpTarget - codeBuffer.position();
794 if (!forceDisp32 && isByte(disp - shortSize)) {
795 emitByte(0xEB);
796 emitByte((int) ((disp - shortSize) & 0xFF));
797 } else {
798 emitByte(0xE9);
799 emitInt((int) (disp - longSize));
800 }
801 }
802
803 @Override
804 public final void jmp(Label l) {
805 if (l.isBound()) {
806 jmp(l.position(), false);
807 } else {
808 // By default, forward jumps are always 32-bit displacements, since
809 // we can't yet know where the label will be bound. If you're sure that
810 // the forward jump will not run beyond 256 bytes, use jmpb to
811 // force an 8-bit displacement.
812
813 l.addPatchAt(codeBuffer.position());
814 emitByte(0xE9);
815 emitInt(0);
816 }
817 }
818
819 public final void jmp(Register entry) {
820 int encode = prefixAndEncode(entry.encoding);
821 emitByte(0xFF);
822 emitByte(0xE0 | encode);
823 }
824
825 public final void jmpb(Label l) {
826 if (l.isBound()) {
827 int shortSize = 2;
828 int entry = l.position();
829 assert isByte((entry - codeBuffer.position()) + shortSize) : "Dispacement too large for a short jmp";
830 long offs = entry - codeBuffer.position();
831 emitByte(0xEB);
832 emitByte((int) ((offs - shortSize) & 0xFF));
833 } else {
834
835 l.addPatchAt(codeBuffer.position());
836 emitByte(0xEB);
837 emitByte(0);
838 }
839 }
840
841 public final void leaq(Register dst, Address src) {
842 prefixq(src, dst);
843 emitByte(0x8D);
844 emitOperandHelper(dst, src);
845 }
846
847 public final void enter(int imm16, int imm8) {
848 emitByte(0xC8);
849 emitShort(imm16);
850 emitByte(imm8);
851 }
852
853 public final void leave() {
854 emitByte(0xC9);
855 }
856
857 public final void lock() {
858 if ((Atomics & 1) != 0) {
859 // Emit either nothing, a NOP, or a NOP: prefix
860 emitByte(0x90);
861 } else {
862 emitByte(0xF0);
863 }
864 }
865
866 // Emit mfence instruction
867 public final void mfence() {
868 emitByte(0x0F);
869 emitByte(0xAE);
870 emitByte(0xF0);
871 }
872
873 public final void mov(Register dst, Register src) {
874 movq(dst, src);
875 }
876
877 public final void movapd(Register dst, Register src) {
878 assert dst.isFpu();
879 assert src.isFpu();
880 int dstenc = dst.encoding;
881 int srcenc = src.encoding;
882 emitByte(0x66);
883 if (dstenc < 8) {
884 if (srcenc >= 8) {
885 emitByte(Prefix.REXB);
886 srcenc -= 8;
887 }
888 } else {
889 if (srcenc < 8) {
890 emitByte(Prefix.REXR);
891 } else {
892 emitByte(Prefix.REXRB);
893 srcenc -= 8;
894 }
895 dstenc -= 8;
896 }
897 emitByte(0x0F);
898 emitByte(0x28);
899 emitByte(0xC0 | dstenc << 3 | srcenc);
900 }
901
902 public final void movaps(Register dst, Register src) {
903 assert dst.isFpu();
904 assert src.isFpu();
905 int dstenc = dst.encoding;
906 int srcenc = src.encoding;
907 if (dstenc < 8) {
908 if (srcenc >= 8) {
909 emitByte(Prefix.REXB);
910 srcenc -= 8;
911 }
912 } else {
913 if (srcenc < 8) {
914 emitByte(Prefix.REXR);
915 } else {
916 emitByte(Prefix.REXRB);
917 srcenc -= 8;
918 }
919 dstenc -= 8;
920 }
921 emitByte(0x0F);
922 emitByte(0x28);
923 emitByte(0xC0 | dstenc << 3 | srcenc);
924 }
925
926 public final void movb(Register dst, Address src) {
927 prefix(src, dst); // , true)
928 emitByte(0x8A);
929 emitOperandHelper(dst, src);
930 }
931
932 public final void movb(Address dst, int imm8) {
933 prefix(dst);
934 emitByte(0xC6);
935 emitOperandHelper(rax, dst);
936 emitByte(imm8);
937 }
938
939 public final void movb(Address dst, Register src) {
940 assert src.isByte() : "must have byte register";
941 prefix(dst, src); // , true)
942 emitByte(0x88);
943 emitOperandHelper(src, dst);
944 }
945
946 public final void movdl(Register dst, Register src) {
947 if (dst.isFpu()) {
948 assert !src.isFpu() : "does this hold?";
949 emitByte(0x66);
950 int encode = prefixAndEncode(dst.encoding, src.encoding);
951 emitByte(0x0F);
952 emitByte(0x6E);
953 emitByte(0xC0 | encode);
954 } else if (src.isFpu()) {
955 assert !dst.isFpu();
956 emitByte(0x66);
957 // swap src/dst to get correct prefix
958 int encode = prefixAndEncode(src.encoding, dst.encoding);
959 emitByte(0x0F);
960 emitByte(0x7E);
961 emitByte(0xC0 | encode);
962 }
963 }
964
965 public final void movdqa(Register dst, Address src) {
966 assert dst.isFpu();
967 emitByte(0x66);
968 prefix(src, dst);
969 emitByte(0x0F);
970 emitByte(0x6F);
971 emitOperandHelper(dst, src);
972 }
973
974 public final void movdqa(Register dst, Register src) {
975 assert dst.isFpu();
976 emitByte(0x66);
977 int encode = prefixqAndEncode(dst.encoding, src.encoding);
978 emitByte(0x0F);
979 emitByte(0x6F);
980 emitByte(0xC0 | encode);
981 }
982
983 public final void movdqa(Address dst, Register src) {
984 assert src.isFpu();
985 emitByte(0x66);
986 prefix(dst, src);
987 emitByte(0x0F);
988 emitByte(0x7F);
989 emitOperandHelper(src, dst);
990 }
991
992 public final void movdqu(Register dst, Address src) {
993 assert dst.isFpu();
994 emitByte(0xF3);
995 prefix(src, dst);
996 emitByte(0x0F);
997 emitByte(0x6F);
998 emitOperandHelper(dst, src);
999 }
1000
1001 public final void movdqu(Register dst, Register src) {
1002 assert dst.isFpu();
1003 assert src.isFpu();
1004
1005 emitByte(0xF3);
1006 int encode = prefixqAndEncode(dst.encoding, src.encoding);
1007 emitByte(0x0F);
1008 emitByte(0x6F);
1009 emitByte(0xC0 | encode);
1010 }
1011
1012 public final void movdqu(Address dst, Register src) {
1013 assert src.isFpu();
1014
1015 emitByte(0xF3);
1016 prefix(dst, src);
1017 emitByte(0x0F);
1018 emitByte(0x7F);
1019 emitOperandHelper(src, dst);
1020 }
1021
1022 public final void movl(Register dst, int imm32) {
1023 int encode = prefixAndEncode(dst.encoding);
1024 emitByte(0xB8 | encode);
1025 emitInt(imm32);
1026 }
1027
1028 public final void movl(Register dst, Register src) {
1029 int encode = prefixAndEncode(dst.encoding, src.encoding);
1030 emitByte(0x8B);
1031 emitByte(0xC0 | encode);
1032 }
1033
1034 public final void movl(Register dst, Address src) {
1035 prefix(src, dst);
1036 emitByte(0x8B);
1037 emitOperandHelper(dst, src);
1038 }
1039
1040 public final void movl(Address dst, int imm32) {
1041 prefix(dst);
1042 emitByte(0xC7);
1043 emitOperandHelper(rax, dst);
1044 emitInt(imm32);
1045 }
1046
1047 public final void movl(Address dst, Register src) {
1048 prefix(dst, src);
1049 emitByte(0x89);
1050 emitOperandHelper(src, dst);
1051 }
1052
1053 /**
1054 * New CPUs require use of movsd and movss to avoid partial register stall
1055 * when loading from memory. But for old Opteron use movlpd instead of movsd.
1056 * The selection is done in {@link AMD64MacroAssembler#movdbl(Register, Address)}
1057 * and {@link AMD64MacroAssembler#movflt(Register, Register)}.
1058 */
1059 public final void movlpd(Register dst, Address src) {
1060 assert dst.isFpu();
1061 emitByte(0x66);
1062 prefix(src, dst);
1063 emitByte(0x0F);
1064 emitByte(0x12);
1065 emitOperandHelper(dst, src);
1066 }
1067
1068 public final void movlpd(Address dst, Register src) {
1069 assert src.isFpu();
1070 emitByte(0x66);
1071 prefix(dst, src);
1072 emitByte(0x0F);
1073 emitByte(0x13);
1074 emitOperandHelper(src, dst);
1075 }
1076
1077 public final void movq(Register dst, Address src) {
1078 if (dst.isFpu()) {
1079 emitByte(0xF3);
1080 prefixq(src, dst);
1081 emitByte(0x0F);
1082 emitByte(0x7E);
1083 emitOperandHelper(dst, src);
1084 } else {
1085 prefixq(src, dst);
1086 emitByte(0x8B);
1087 emitOperandHelper(dst, src);
1088 }
1089 }
1090
1091 public final void movq(Register dst, Register src) {
1092 int encode = prefixqAndEncode(dst.encoding, src.encoding);
1093 emitByte(0x8B);
1094 emitByte(0xC0 | encode);
1095 }
1096
1097 public final void movq(Address dst, Register src) {
1098 if (src.isFpu()) {
1099 emitByte(0x66);
1100 prefixq(dst, src);
1101 emitByte(0x0F);
1102 emitByte(0xD6);
1103 emitOperandHelper(src, dst);
1104 } else {
1105 prefixq(dst, src);
1106 emitByte(0x89);
1107 emitOperandHelper(src, dst);
1108 }
1109 }
1110
1111 public final void movsxb(Register dst, Address src) { // movsxb
1112 prefix(src, dst);
1113 emitByte(0x0F);
1114 emitByte(0xBE);
1115 emitOperandHelper(dst, src);
1116 }
1117
1118 public final void movsxb(Register dst, Register src) { // movsxb
1119 int encode = prefixAndEncode(dst.encoding, src.encoding, true);
1120 emitByte(0x0F);
1121 emitByte(0xBE);
1122 emitByte(0xC0 | encode);
1123 }
1124
1125 public final void movsd(Register dst, Register src) {
1126 assert dst.isFpu();
1127 assert src.isFpu();
1128 emitByte(0xF2);
1129 int encode = prefixAndEncode(dst.encoding, src.encoding);
1130 emitByte(0x0F);
1131 emitByte(0x10);
1132 emitByte(0xC0 | encode);
1133 }
1134
1135 public final void movsd(Register dst, Address src) {
1136 assert dst.isFpu();
1137 emitByte(0xF2);
1138 prefix(src, dst);
1139 emitByte(0x0F);
1140 emitByte(0x10);
1141 emitOperandHelper(dst, src);
1142 }
1143
1144 public final void movsd(Address dst, Register src) {
1145 assert src.isFpu();
1146 emitByte(0xF2);
1147 prefix(dst, src);
1148 emitByte(0x0F);
1149 emitByte(0x11);
1150 emitOperandHelper(src, dst);
1151 }
1152
1153 public final void movss(Register dst, Register src) {
1154 assert dst.isFpu();
1155 assert src.isFpu();
1156 emitByte(0xF3);
1157 int encode = prefixAndEncode(dst.encoding, src.encoding);
1158 emitByte(0x0F);
1159 emitByte(0x10);
1160 emitByte(0xC0 | encode);
1161 }
1162
1163 public final void movss(Register dst, Address src) {
1164 assert dst.isFpu();
1165 emitByte(0xF3);
1166 prefix(src, dst);
1167 emitByte(0x0F);
1168 emitByte(0x10);
1169 emitOperandHelper(dst, src);
1170 }
1171
1172 public final void movss(Address dst, Register src) {
1173 assert src.isFpu();
1174 emitByte(0xF3);
1175 prefix(dst, src);
1176 emitByte(0x0F);
1177 emitByte(0x11);
1178 emitOperandHelper(src, dst);
1179 }
1180
1181 public final void movswl(Register dst, Address src) {
1182 prefix(src, dst);
1183 emitByte(0x0F);
1184 emitByte(0xBF);
1185 emitOperandHelper(dst, src);
1186 }
1187
1188 public final void movsxw(Register dst, Register src) { // movsxw
1189 int encode = prefixAndEncode(dst.encoding, src.encoding);
1190 emitByte(0x0F);
1191 emitByte(0xBF);
1192 emitByte(0xC0 | encode);
1193 }
1194
1195 public final void movsxw(Register dst, Address src) { // movsxw
1196 prefix(src, dst);
1197 emitByte(0x0F);
1198 emitByte(0xBF);
1199 emitOperandHelper(dst, src);
1200 }
1201
1202 public final void movzxd(Register dst, Register src) { // movzxd
1203 int encode = prefixAndEncode(dst.encoding, src.encoding);
1204 emitByte(0x63);
1205 emitByte(0xC0 | encode);
1206 }
1207
1208 public final void movzxd(Register dst, Address src) { // movzxd
1209 prefix(src, dst);
1210 emitByte(0x63);
1211 emitOperandHelper(dst, src);
1212 }
1213
1214 public final void movw(Address dst, int imm16) {
1215 emitByte(0x66); // switch to 16-bit mode
1216 prefix(dst);
1217 emitByte(0xC7);
1218 emitOperandHelper(rax, dst);
1219 emitShort(imm16);
1220 }
1221
1222 public final void movw(Register dst, Address src) {
1223 emitByte(0x66);
1224 prefix(src, dst);
1225 emitByte(0x8B);
1226 emitOperandHelper(dst, src);
1227 }
1228
1229 public final void movw(Address dst, Register src) {
1230 emitByte(0x66);
1231 prefix(dst, src);
1232 emitByte(0x89);
1233 emitOperandHelper(src, dst);
1234 }
1235
1236 public final void movzxb(Register dst, Address src) { // movzxb
1237 prefix(src, dst);
1238 emitByte(0x0F);
1239 emitByte(0xB6);
1240 emitOperandHelper(dst, src);
1241 }
1242
1243 public final void movzxb(Register dst, Register src) { // movzxb
1244 int encode = prefixAndEncode(dst.encoding, src.encoding, true);
1245 emitByte(0x0F);
1246 emitByte(0xB6);
1247 emitByte(0xC0 | encode);
1248 }
1249
1250 public final void movzxl(Register dst, Address src) { // movzxw
1251 prefix(src, dst);
1252 emitByte(0x0F);
1253 emitByte(0xB7);
1254 emitOperandHelper(dst, src);
1255 }
1256
1257 public final void movzxl(Register dst, Register src) { // movzxw
1258 int encode = prefixAndEncode(dst.encoding, src.encoding);
1259 emitByte(0x0F);
1260 emitByte(0xB7);
1261 emitByte(0xC0 | encode);
1262 }
1263
1264 public final void mull(Address src) {
1265 prefix(src);
1266 emitByte(0xF7);
1267 emitOperandHelper(rsp, src);
1268 }
1269
1270 public final void mulsd(Register dst, Address src) {
1271 assert dst.isFpu();
1272 emitByte(0xF2);
1273 prefix(src, dst);
1274 emitByte(0x0F);
1275 emitByte(0x59);
1276 emitOperandHelper(dst, src);
1277 }
1278
1279 public final void mulsd(Register dst, Register src) {
1280 assert dst.isFpu();
1281 assert src.isFpu();
1282
1283 emitByte(0xF2);
1284 int encode = prefixAndEncode(dst.encoding, src.encoding);
1285 emitByte(0x0F);
1286 emitByte(0x59);
1287 emitByte(0xC0 | encode);
1288 }
1289
1290 public final void mulss(Register dst, Address src) {
1291 assert dst.isFpu();
1292
1293 emitByte(0xF3);
1294 prefix(src, dst);
1295 emitByte(0x0F);
1296 emitByte(0x59);
1297 emitOperandHelper(dst, src);
1298 }
1299
1300 public final void mulss(Register dst, Register src) {
1301 assert dst.isFpu();
1302 assert src.isFpu();
1303 emitByte(0xF3);
1304 int encode = prefixAndEncode(dst.encoding, src.encoding);
1305 emitByte(0x0F);
1306 emitByte(0x59);
1307 emitByte(0xC0 | encode);
1308 }
1309
1310 public final void negl(Register dst) {
1311 int encode = prefixAndEncode(dst.encoding);
1312 emitByte(0xF7);
1313 emitByte(0xD8 | encode);
1314 }
1315
1316 public final void ensureUniquePC() {
1317 nop();
1318 }
1319
1320 public final void nop() {
1321 nop(1);
1322 }
1323
1324 public void nop(int count) {
1325 int i = count;
1326 if (UseNormalNop) {
1327 assert i > 0 : " ";
1328 // The fancy nops aren't currently recognized by debuggers making it a
1329 // pain to disassemble code while debugging. If assert are on clearly
1330 // speed is not an issue so simply use the single byte traditional nop
1331 // to do alignment.
1332
1333 for (; i > 0; i--) {
1334 emitByte(0x90);
1335 }
1336 return;
1337 }
1338
1339 if (UseAddressNop) {
1340 //
1341 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
1342 // 1: 0x90
1343 // 2: 0x66 0x90
1344 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1345 // 4: 0x0F 0x1F 0x40 0x00
1346 // 5: 0x0F 0x1F 0x44 0x00 0x00
1347 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1348 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1349 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1350 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1351 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1352 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1353
1354 // The rest coding is AMD specific - use consecutive Address nops
1355
1356 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
1357 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
1358 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1359 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1360 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1361 // Size prefixes (0x66) are added for larger sizes
1362
1363 while (i >= 22) {
1364 i -= 11;
1365 emitByte(0x66); // size prefix
1366 emitByte(0x66); // size prefix
1367 emitByte(0x66); // size prefix
1368 addrNop8();
1369 }
1370 // Generate first nop for size between 21-12
1371 switch (i) {
1372 case 21:
1373 i -= 1;
1374 emitByte(0x66); // size prefix
1375 // fall through
1376 case 20:
1377 // fall through
1378 case 19:
1379 i -= 1;
1380 emitByte(0x66); // size prefix
1381 // fall through
1382 case 18:
1383 // fall through
1384 case 17:
1385 i -= 1;
1386 emitByte(0x66); // size prefix
1387 // fall through
1388 case 16:
1389 // fall through
1390 case 15:
1391 i -= 8;
1392 addrNop8();
1393 break;
1394 case 14:
1395 case 13:
1396 i -= 7;
1397 addrNop7();
1398 break;
1399 case 12:
1400 i -= 6;
1401 emitByte(0x66); // size prefix
1402 addrNop5();
1403 break;
1404 default:
1405 assert i < 12;
1406 }
1407
1408 // Generate second nop for size between 11-1
1409 switch (i) {
1410 case 11:
1411 emitByte(0x66); // size prefix
1412 emitByte(0x66); // size prefix
1413 emitByte(0x66); // size prefix
1414 addrNop8();
1415 break;
1416 case 10:
1417 emitByte(0x66); // size prefix
1418 emitByte(0x66); // size prefix
1419 addrNop8();
1420 break;
1421 case 9:
1422 emitByte(0x66); // size prefix
1423 addrNop8();
1424 break;
1425 case 8:
1426 addrNop8();
1427 break;
1428 case 7:
1429 addrNop7();
1430 break;
1431 case 6:
1432 emitByte(0x66); // size prefix
1433 addrNop5();
1434 break;
1435 case 5:
1436 addrNop5();
1437 break;
1438 case 4:
1439 addrNop4();
1440 break;
1441 case 3:
1442 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
1443 emitByte(0x66); // size prefix
1444 emitByte(0x66); // size prefix
1445 emitByte(0x90); // nop
1446 break;
1447 case 2:
1448 emitByte(0x66); // size prefix
1449 emitByte(0x90); // nop
1450 break;
1451 case 1:
1452 emitByte(0x90); // nop
1453 break;
1454 default:
1455 assert i == 0;
1456 }
1457 return;
1458 }
1459
1460 // Using nops with size prefixes "0x66 0x90".
1461 // From AMD Optimization Guide:
1462 // 1: 0x90
1463 // 2: 0x66 0x90
1464 // 3: 0x66 0x66 0x90
1465 // 4: 0x66 0x66 0x66 0x90
1466 // 5: 0x66 0x66 0x90 0x66 0x90
1467 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
1468 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
1469 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
1470 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
1471 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
1472 //
1473 while (i > 12) {
1474 i -= 4;
1475 emitByte(0x66); // size prefix
1476 emitByte(0x66);
1477 emitByte(0x66);
1478 emitByte(0x90); // nop
1479 }
1480 // 1 - 12 nops
1481 if (i > 8) {
1482 if (i > 9) {
1483 i -= 1;
1484 emitByte(0x66);
1485 }
1486 i -= 3;
1487 emitByte(0x66);
1488 emitByte(0x66);
1489 emitByte(0x90);
1490 }
1491 // 1 - 8 nops
1492 if (i > 4) {
1493 if (i > 6) {
1494 i -= 1;
1495 emitByte(0x66);
1496 }
1497 i -= 3;
1498 emitByte(0x66);
1499 emitByte(0x66);
1500 emitByte(0x90);
1501 }
1502 switch (i) {
1503 case 4:
1504 emitByte(0x66);
1505 emitByte(0x66);
1506 emitByte(0x66);
1507 emitByte(0x90);
1508 break;
1509 case 3:
1510 emitByte(0x66);
1511 emitByte(0x66);
1512 emitByte(0x90);
1513 break;
1514 case 2:
1515 emitByte(0x66);
1516 emitByte(0x90);
1517 break;
1518 case 1:
1519 emitByte(0x90);
1520 break;
1521 default:
1522 assert i == 0;
1523 }
1524 }
1525
1526 public final void notl(Register dst) {
1527 int encode = prefixAndEncode(dst.encoding);
1528 emitByte(0xF7);
1529 emitByte(0xD0 | encode);
1530 }
1531
1532 public final void orl(Address dst, int imm32) {
1533 prefix(dst);
1534 emitByte(0x81);
1535 emitOperandHelper(rcx, dst);
1536 emitInt(imm32);
1537 }
1538
1539 public final void orl(Register dst, int imm32) {
1540 prefix(dst);
1541 emitArith(0x81, 0xC8, dst, imm32);
1542 }
1543
1544 public final void orl(Register dst, Address src) {
1545 prefix(src, dst);
1546 emitByte(0x0B);
1547 emitOperandHelper(dst, src);
1548 }
1549
1550 public final void orl(Register dst, Register src) {
1551 prefixAndEncode(dst.encoding, src.encoding);
1552 emitArith(0x0B, 0xC0, dst, src);
1553 }
1554
1555 // generic
1556 public final void pop(Register dst) {
1557 int encode = prefixAndEncode(dst.encoding);
1558 emitByte(0x58 | encode);
1559 }
1560
1561 public final void prefetchPrefix(Address src) {
1562 prefix(src);
1563 emitByte(0x0F);
1564 }
1565
1566 public final void prefetchnta(Address src) {
1567 prefetchPrefix(src);
1568 emitByte(0x18);
1569 emitOperandHelper(rax, src); // 0, src
1570 }
1571
1572 public final void prefetchr(Address src) {
1573 prefetchPrefix(src);
1574 emitByte(0x0D);
1575 emitOperandHelper(rax, src); // 0, src
1576 }
1577
1578 public final void prefetcht0(Address src) {
1579 prefetchPrefix(src);
1580 emitByte(0x18);
1581 emitOperandHelper(rcx, src); // 1, src
1582
1583 }
1584
1585 public final void prefetcht1(Address src) {
1586 prefetchPrefix(src);
1587 emitByte(0x18);
1588 emitOperandHelper(rdx, src); // 2, src
1589 }
1590
1591 public final void prefetcht2(Address src) {
1592 prefetchPrefix(src);
1593 emitByte(0x18);
1594 emitOperandHelper(rbx, src); // 3, src
1595 }
1596
1597 public final void prefetchw(Address src) {
1598 prefetchPrefix(src);
1599 emitByte(0x0D);
1600 emitOperandHelper(rcx, src); // 1, src
1601 }
1602
1603 public final void pshufd(Register dst, Register src, int mode) {
1604 assert dst.isFpu();
1605 assert src.isFpu();
1606 assert isUByte(mode) : "invalid value";
1607
1608 emitByte(0x66);
1609 int encode = prefixAndEncode(dst.encoding, src.encoding);
1610 emitByte(0x0F);
1611 emitByte(0x70);
1612 emitByte(0xC0 | encode);
1613 emitByte(mode & 0xFF);
1614 }
1615
1616 public final void pshufd(Register dst, Address src, int mode) {
1617 assert dst.isFpu();
1618 assert isUByte(mode) : "invalid value";
1619
1620 emitByte(0x66);
1621 prefix(src, dst);
1622 emitByte(0x0F);
1623 emitByte(0x70);
1624 emitOperandHelper(dst, src);
1625 emitByte(mode & 0xFF);
1626
1627 }
1628
1629 public final void pshuflw(Register dst, Register src, int mode) {
1630 assert dst.isFpu();
1631 assert src.isFpu();
1632 assert isUByte(mode) : "invalid value";
1633
1634 emitByte(0xF2);
1635 int encode = prefixAndEncode(dst.encoding, src.encoding);
1636 emitByte(0x0F);
1637 emitByte(0x70);
1638 emitByte(0xC0 | encode);
1639 emitByte(mode & 0xFF);
1640 }
1641
1642 public final void pshuflw(Register dst, Address src, int mode) {
1643 assert dst.isFpu();
1644 assert isUByte(mode) : "invalid value";
1645
1646 emitByte(0xF2);
1647 prefix(src, dst); // QQ new
1648 emitByte(0x0F);
1649 emitByte(0x70);
1650 emitOperandHelper(dst, src);
1651 emitByte(mode & 0xFF);
1652 }
1653
1654 public final void psrlq(Register dst, int shift) {
1655 assert dst.isFpu();
1656 // HMM Table D-1 says sse2 or mmx
1657
1658 int encode = prefixqAndEncode(xmm2.encoding, dst.encoding);
1659 emitByte(0x66);
1660 emitByte(0x0F);
1661 emitByte(0x73);
1662 emitByte(0xC0 | encode);
1663 emitByte(shift);
1664 }
1665
1666 public final void punpcklbw(Register dst, Register src) {
1667 assert dst.isFpu();
1668 assert src.isFpu();
1669 emitByte(0x66);
1670 int encode = prefixAndEncode(dst.encoding, src.encoding);
1671 emitByte(0x0F);
1672 emitByte(0x60);
1673 emitByte(0xC0 | encode);
1674 }
1675
1676 public final void push(int imm32) {
1677 // in 64bits we push 64bits onto the stack but only
1678 // take a 32bit immediate
1679 emitByte(0x68);
1680 emitInt(imm32);
1681 }
1682
1683 public final void push(Register src) {
1684 int encode = prefixAndEncode(src.encoding);
1685 emitByte(0x50 | encode);
1686 }
1687
1688 public final void pushf() {
1689 emitByte(0x9C);
1690 }
1691
1692 public final void pxor(Register dst, Address src) {
1693 assert dst.isFpu();
1694
1695 emitByte(0x66);
1696 prefix(src, dst);
1697 emitByte(0x0F);
1698 emitByte(0xEF);
1699 emitOperandHelper(dst, src);
1700 }
1701
1702 public final void pxor(Register dst, Register src) {
1703 assert dst.isFpu();
1704 assert src.isFpu();
1705
1706 emitByte(0x66);
1707 int encode = prefixAndEncode(dst.encoding, src.encoding);
1708 emitByte(0x0F);
1709 emitByte(0xEF);
1710 emitByte(0xC0 | encode);
1711
1712 }
1713
1714 public final void rcll(Register dst, int imm8) {
1715 assert isShiftCount(imm8) : "illegal shift count";
1716 int encode = prefixAndEncode(dst.encoding);
1717 if (imm8 == 1) {
1718 emitByte(0xD1);
1719 emitByte(0xD0 | encode);
1720 } else {
1721 emitByte(0xC1);
1722 emitByte(0xD0 | encode);
1723 emitByte(imm8);
1724 }
1725 }
1726
1727 public final void pause() {
1728 emitByte(0xF3);
1729 emitByte(0x90);
1730 }
1731
1732 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx heap words.
1733 public final void repeatMoveWords() {
1734 emitByte(0xF3);
1735 emitByte(Prefix.REXW);
1736 emitByte(0xA5);
1737 }
1738
1739 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx bytes.
1740 public final void repeatMoveBytes() {
1741 emitByte(0xF3);
1742 emitByte(Prefix.REXW);
1743 emitByte(0xA4);
1744 }
1745
1746 // sets X86.rcx pointer sized words with X86.rax, value at [edi]
1747 // generic
1748 public final void repSet() { // repSet
1749 emitByte(0xF3);
1750 // STOSQ
1751 emitByte(Prefix.REXW);
1752 emitByte(0xAB);
1753 }
1754
1755 // scans X86.rcx pointer sized words at [edi] for occurance of X86.rax,
1756 // generic
1757 public final void repneScan() { // repneScan
1758 emitByte(0xF2);
1759 // SCASQ
1760 emitByte(Prefix.REXW);
1761 emitByte(0xAF);
1762 }
1763
1764 // scans X86.rcx 4 byte words at [edi] for occurance of X86.rax,
1765 // generic
1766 public final void repneScanl() { // repneScan
1767 emitByte(0xF2);
1768 // SCASL
1769 emitByte(0xAF);
1770 }
1771
1772 public final void ret(int imm16) {
1773 if (imm16 == 0) {
1774 emitByte(0xC3);
1775 } else {
1776 emitByte(0xC2);
1777 emitShort(imm16);
1778 }
1779 }
1780
1781 public final void sarl(Register dst, int imm8) {
1782 int encode = prefixAndEncode(dst.encoding);
1783 assert isShiftCount(imm8) : "illegal shift count";
1784 if (imm8 == 1) {
1785 emitByte(0xD1);
1786 emitByte(0xF8 | encode);
1787 } else {
1788 emitByte(0xC1);
1789 emitByte(0xF8 | encode);
1790 emitByte(imm8);
1791 }
1792 }
1793
1794 public final void sarl(Register dst) {
1795 int encode = prefixAndEncode(dst.encoding);
1796 emitByte(0xD3);
1797 emitByte(0xF8 | encode);
1798 }
1799
1800 public final void sbbl(Address dst, int imm32) {
1801 prefix(dst);
1802 emitArithOperand(0x81, rbx, dst, imm32);
1803 }
1804
1805 public final void sbbl(Register dst, int imm32) {
1806 prefix(dst);
1807 emitArith(0x81, 0xD8, dst, imm32);
1808 }
1809
1810 public final void sbbl(Register dst, Address src) {
1811 prefix(src, dst);
1812 emitByte(0x1B);
1813 emitOperandHelper(dst, src);
1814 }
1815
1816 public final void sbbl(Register dst, Register src) {
1817 prefixAndEncode(dst.encoding, src.encoding);
1818 emitArith(0x1B, 0xC0, dst, src);
1819 }
1820
1821 public final void setb(ConditionFlag cc, Register dst) {
1822 assert 0 <= cc.value && cc.value < 16 : "illegal cc";
1823 int encode = prefixAndEncode(dst.encoding, true);
1824 emitByte(0x0F);
1825 emitByte(0x90 | cc.value);
1826 emitByte(0xC0 | encode);
1827 }
1828
1829 public final void shll(Register dst, int imm8) {
1830 assert isShiftCount(imm8) : "illegal shift count";
1831 int encode = prefixAndEncode(dst.encoding);
1832 if (imm8 == 1) {
1833 emitByte(0xD1);
1834 emitByte(0xE0 | encode);
1835 } else {
1836 emitByte(0xC1);
1837 emitByte(0xE0 | encode);
1838 emitByte(imm8);
1839 }
1840 }
1841
1842 public final void shll(Register dst) {
1843 int encode = prefixAndEncode(dst.encoding);
1844 emitByte(0xD3);
1845 emitByte(0xE0 | encode);
1846 }
1847
1848 public final void shrl(Register dst, int imm8) {
1849 assert isShiftCount(imm8) : "illegal shift count";
1850 int encode = prefixAndEncode(dst.encoding);
1851 emitByte(0xC1);
1852 emitByte(0xE8 | encode);
1853 emitByte(imm8);
1854 }
1855
1856 public final void shrl(Register dst) {
1857 int encode = prefixAndEncode(dst.encoding);
1858 emitByte(0xD3);
1859 emitByte(0xE8 | encode);
1860 }
1861
1862 // copies a single word from [esi] to [edi]
1863 public final void smovl() {
1864 emitByte(0xA5);
1865 }
1866
1867 public final void sqrtsd(Register dst, Register src) {
1868 assert dst.isFpu();
1869 assert src.isFpu();
1870 // HMM Table D-1 says sse2
1871 // assert is64 || target.supportsSSE();
1872 emitByte(0xF2);
1873 int encode = prefixAndEncode(dst.encoding, src.encoding);
1874 emitByte(0x0F);
1875 emitByte(0x51);
1876 emitByte(0xC0 | encode);
1877 }
1878
1879 public final void subl(Address dst, int imm32) {
1880 prefix(dst);
1881 if (isByte(imm32)) {
1882 emitByte(0x83);
1883 emitOperandHelper(rbp, dst);
1884 emitByte(imm32 & 0xFF);
1885 } else {
1886 emitByte(0x81);
1887 emitOperandHelper(rbp, dst);
1888 emitInt(imm32);
1889 }
1890 }
1891
1892 public final void subl(Register dst, int imm32) {
1893 prefix(dst);
1894 emitArith(0x81, 0xE8, dst, imm32);
1895 }
1896
1897 public final void subl(Address dst, Register src) {
1898 prefix(dst, src);
1899 emitByte(0x29);
1900 emitOperandHelper(src, dst);
1901 }
1902
1903 public final void subl(Register dst, Address src) {
1904 prefix(src, dst);
1905 emitByte(0x2B);
1906 emitOperandHelper(dst, src);
1907 }
1908
1909 public final void subl(Register dst, Register src) {
1910 prefixAndEncode(dst.encoding, src.encoding);
1911 emitArith(0x2B, 0xC0, dst, src);
1912 }
1913
1914 public final void subsd(Register dst, Register src) {
1915 assert dst.isFpu();
1916 assert src.isFpu();
1917 emitByte(0xF2);
1918 int encode = prefixAndEncode(dst.encoding, src.encoding);
1919 emitByte(0x0F);
1920 emitByte(0x5C);
1921 emitByte(0xC0 | encode);
1922 }
1923
1924 public final void subsd(Register dst, Address src) {
1925 assert dst.isFpu();
1926
1927 emitByte(0xF2);
1928 prefix(src, dst);
1929 emitByte(0x0F);
1930 emitByte(0x5C);
1931 emitOperandHelper(dst, src);
1932 }
1933
1934 public final void subss(Register dst, Register src) {
1935 assert dst.isFpu();
1936 assert src.isFpu();
1937 emitByte(0xF3);
1938 int encode = prefixAndEncode(dst.encoding, src.encoding);
1939 emitByte(0x0F);
1940 emitByte(0x5C);
1941 emitByte(0xC0 | encode);
1942 }
1943
1944 public final void subss(Register dst, Address src) {
1945 assert dst.isFpu();
1946
1947 emitByte(0xF3);
1948 prefix(src, dst);
1949 emitByte(0x0F);
1950 emitByte(0x5C);
1951 emitOperandHelper(dst, src);
1952 }
1953
1954 public final void testb(Register dst, int imm8) {
1955 prefixAndEncode(dst.encoding, true);
1956 emitArithB(0xF6, 0xC0, dst, imm8);
1957 }
1958
1959 public final void testl(Register dst, int imm32) {
1960 // not using emitArith because test
1961 // doesn't support sign-extension of
1962 // 8bit operands
1963 int encode = dst.encoding;
1964 if (encode == 0) {
1965 emitByte(0xA9);
1966 } else {
1967 encode = prefixAndEncode(encode);
1968 emitByte(0xF7);
1969 emitByte(0xC0 | encode);
1970 }
1971 emitInt(imm32);
1972 }
1973
1974 public final void testl(Register dst, Register src) {
1975 prefixAndEncode(dst.encoding, src.encoding);
1976 emitArith(0x85, 0xC0, dst, src);
1977 }
1978
1979 public final void testl(Register dst, Address src) {
1980 prefix(src, dst);
1981 emitByte(0x85);
1982 emitOperandHelper(dst, src);
1983 }
1984
1985 public final void ucomisd(Register dst, Address src) {
1986 assert dst.isFpu();
1987 emitByte(0x66);
1988 ucomiss(dst, src);
1989 }
1990
1991 public final void ucomisd(Register dst, Register src) {
1992 assert dst.isFpu();
1993 assert src.isFpu();
1994 emitByte(0x66);
1995 ucomiss(dst, src);
1996 }
1997
1998 public final void ucomiss(Register dst, Address src) {
1999 assert dst.isFpu();
2000
2001 prefix(src, dst);
2002 emitByte(0x0F);
2003 emitByte(0x2E);
2004 emitOperandHelper(dst, src);
2005 }
2006
2007 public final void ucomiss(Register dst, Register src) {
2008 assert dst.isFpu();
2009 assert src.isFpu();
2010 int encode = prefixAndEncode(dst.encoding, src.encoding);
2011 emitByte(0x0F);
2012 emitByte(0x2E);
2013 emitByte(0xC0 | encode);
2014 }
2015
2016 public final void xaddl(Address dst, Register src) {
2017 assert src.isFpu();
2018
2019 prefix(dst, src);
2020 emitByte(0x0F);
2021 emitByte(0xC1);
2022 emitOperandHelper(src, dst);
2023 }
2024
2025 public final void xchgl(Register dst, Address src) { // xchg
2026 prefix(src, dst);
2027 emitByte(0x87);
2028 emitOperandHelper(dst, src);
2029 }
2030
2031 public final void xchgl(Register dst, Register src) {
2032 int encode = prefixAndEncode(dst.encoding, src.encoding);
2033 emitByte(0x87);
2034 emitByte(0xc0 | encode);
2035 }
2036
2037 public final void xorl(Register dst, int imm32) {
2038 prefix(dst);
2039 emitArith(0x81, 0xF0, dst, imm32);
2040 }
2041
2042 public final void xorl(Register dst, Address src) {
2043 prefix(src, dst);
2044 emitByte(0x33);
2045 emitOperandHelper(dst, src);
2046 }
2047
2048 public final void xorl(Register dst, Register src) {
2049 prefixAndEncode(dst.encoding, src.encoding);
2050 emitArith(0x33, 0xC0, dst, src);
2051 }
2052
2053 public final void andpd(Register dst, Register src) {
2054 emitByte(0x66);
2055 andps(dst, src);
2056 }
2057
2058 public final void andpd(Register dst, Address src) {
2059 emitByte(0x66);
2060 andps(dst, src);
2061 }
2062
2063 public final void andps(Register dst, Register src) {
2064 assert dst.isFpu() && src.isFpu();
2065 int encode = prefixAndEncode(dst.encoding, src.encoding);
2066 emitByte(0x0F);
2067 emitByte(0x54);
2068 emitByte(0xC0 | encode);
2069 }
2070
2071 public final void andps(Register dst, Address src) {
2072 assert dst.isFpu();
2073 prefix(src, dst);
2074 emitByte(0x0F);
2075 emitByte(0x54);
2076 emitOperandHelper(dst, src);
2077 }
2078
2079 public final void orpd(Register dst, Register src) {
2080 emitByte(0x66);
2081 orps(dst, src);
2082 }
2083
2084 public final void orpd(Register dst, Address src) {
2085 emitByte(0x66);
2086 orps(dst, src);
2087 }
2088
2089 public final void orps(Register dst, Register src) {
2090 assert dst.isFpu() && src.isFpu();
2091 int encode = prefixAndEncode(dst.encoding, src.encoding);
2092 emitByte(0x0F);
2093 emitByte(0x56);
2094 emitByte(0xC0 | encode);
2095 }
2096
2097 public final void orps(Register dst, Address src) {
2098 assert dst.isFpu();
2099 prefix(src, dst);
2100 emitByte(0x0F);
2101 emitByte(0x56);
2102 emitOperandHelper(dst, src);
2103 }
2104
2105 public final void xorpd(Register dst, Register src) {
2106 emitByte(0x66);
2107 xorps(dst, src);
2108 }
2109
2110 public final void xorpd(Register dst, Address src) {
2111 emitByte(0x66);
2112 xorps(dst, src);
2113 }
2114
2115 public final void xorps(Register dst, Register src) {
2116 assert dst.isFpu() && src.isFpu();
2117 int encode = prefixAndEncode(dst.encoding, src.encoding);
2118 emitByte(0x0F);
2119 emitByte(0x57);
2120 emitByte(0xC0 | encode);
2121 }
2122
2123 public final void xorps(Register dst, Address src) {
2124 assert dst.isFpu();
2125 prefix(src, dst);
2126 emitByte(0x0F);
2127 emitByte(0x57);
2128 emitOperandHelper(dst, src);
2129 }
2130
2131 // 32bit only pieces of the assembler
2132
2133 public final void decl(Register dst) {
2134 // Don't use it directly. Use Macrodecrementl() instead.
2135 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2136 int encode = prefixAndEncode(dst.encoding);
2137 emitByte(0xFF);
2138 emitByte(0xC8 | encode);
2139 }
2140
2141 public final void incl(Register dst) {
2142 // Don't use it directly. Use Macroincrementl() instead.
2143 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2144 int encode = prefixAndEncode(dst.encoding);
2145 emitByte(0xFF);
2146 emitByte(0xC0 | encode);
2147 }
2148
2149 int prefixAndEncode(int regEnc) {
2150 return prefixAndEncode(regEnc, false);
2151 }
2152
2153 int prefixAndEncode(int regEnc, boolean byteinst) {
2154 if (regEnc >= 8) {
2155 emitByte(Prefix.REXB);
2156 return regEnc - 8;
2157 } else if (byteinst && regEnc >= 4) {
2158 emitByte(Prefix.REX);
2159 }
2160 return regEnc;
2161 }
2162
2163 int prefixqAndEncode(int regEnc) {
2164 if (regEnc < 8) {
2165 emitByte(Prefix.REXW);
2166 return regEnc;
2167 } else {
2168 emitByte(Prefix.REXWB);
2169 return regEnc - 8;
2170 }
2171 }
2172
2173 int prefixAndEncode(int dstEnc, int srcEnc) {
2174 return prefixAndEncode(dstEnc, srcEnc, false);
2175 }
2176
2177 int prefixAndEncode(int dstEncoding, int srcEncoding, boolean byteinst) {
2178 int srcEnc = srcEncoding;
2179 int dstEnc = dstEncoding;
2180 if (dstEnc < 8) {
2181 if (srcEnc >= 8) {
2182 emitByte(Prefix.REXB);
2183 srcEnc -= 8;
2184 } else if (byteinst && srcEnc >= 4) {
2185 emitByte(Prefix.REX);
2186 }
2187 } else {
2188 if (srcEnc < 8) {
2189 emitByte(Prefix.REXR);
2190 } else {
2191 emitByte(Prefix.REXRB);
2192 srcEnc -= 8;
2193 }
2194 dstEnc -= 8;
2195 }
2196 return dstEnc << 3 | srcEnc;
2197 }
2198
2199 /**
2200 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand prefix. If the given
2201 * operands exceed 3 bits, the 4th bit is encoded in the prefix.
2202 *
2203 * @param regEncoding the encoding of the register part of the ModRM-Byte
2204 * @param rmEncoding the encoding of the r/m part of the ModRM-Byte
2205 * @return the lower 6 bits of the ModRM-Byte that should be emitted
2206 */
2207 private int prefixqAndEncode(int regEncoding, int rmEncoding) {
2208 int rmEnc = rmEncoding;
2209 int regEnc = regEncoding;
2210 if (regEnc < 8) {
2211 if (rmEnc < 8) {
2212 emitByte(Prefix.REXW);
2213 } else {
2214 emitByte(Prefix.REXWB);
2215 rmEnc -= 8;
2216 }
2217 } else {
2218 if (rmEnc < 8) {
2219 emitByte(Prefix.REXWR);
2220 } else {
2221 emitByte(Prefix.REXWRB);
2222 rmEnc -= 8;
2223 }
2224 regEnc -= 8;
2225 }
2226 return regEnc << 3 | rmEnc;
2227 }
2228
2229 private void prefix(Register reg) {
2230 if (reg.encoding >= 8) {
2231 emitByte(Prefix.REXB);
2232 }
2233 }
2234
2235 private static boolean needsRex(Value value) {
2236 return isRegister(value) && asRegister(value).encoding >= MinEncodingNeedsRex;
2237 }
2238
2239
2240 private void prefix(Address adr) {
2241 if (needsRex(adr.getBase())) {
2242 if (needsRex(adr.getIndex())) {
2243 emitByte(Prefix.REXXB);
2244 } else {
2245 emitByte(Prefix.REXB);
2246 }
2247 } else {
2248 if (needsRex(adr.getIndex())) {
2249 emitByte(Prefix.REXX);
2250 }
2251 }
2252 }
2253
2254 private void prefixq(Address adr) {
2255 if (needsRex(adr.getBase())) {
2256 if (needsRex(adr.getIndex())) {
2257 emitByte(Prefix.REXWXB);
2258 } else {
2259 emitByte(Prefix.REXWB);
2260 }
2261 } else {
2262 if (needsRex(adr.getIndex())) {
2263 emitByte(Prefix.REXWX);
2264 } else {
2265 emitByte(Prefix.REXW);
2266 }
2267 }
2268 }
2269
2270 private void prefix(Address adr, Register reg) {
2271 if (reg.encoding < 8) {
2272 if (needsRex(adr.getBase())) {
2273 if (needsRex(adr.getIndex())) {
2274 emitByte(Prefix.REXXB);
2275 } else {
2276 emitByte(Prefix.REXB);
2277 }
2278 } else {
2279 if (needsRex(adr.getIndex())) {
2280 emitByte(Prefix.REXX);
2281 } else if (reg.encoding >= 4) {
2282 emitByte(Prefix.REX);
2283 }
2284 }
2285 } else {
2286 if (needsRex(adr.getBase())) {
2287 if (needsRex(adr.getIndex())) {
2288 emitByte(Prefix.REXRXB);
2289 } else {
2290 emitByte(Prefix.REXRB);
2291 }
2292 } else {
2293 if (needsRex(adr.getIndex())) {
2294 emitByte(Prefix.REXRX);
2295 } else {
2296 emitByte(Prefix.REXR);
2297 }
2298 }
2299 }
2300 }
2301
2302 private void prefixq(Address adr, Register src) {
2303 if (src.encoding < 8) {
2304 if (needsRex(adr.getBase())) {
2305 if (needsRex(adr.getIndex())) {
2306 emitByte(Prefix.REXWXB);
2307 } else {
2308 emitByte(Prefix.REXWB);
2309 }
2310 } else {
2311 if (needsRex(adr.getIndex())) {
2312 emitByte(Prefix.REXWX);
2313 } else {
2314 emitByte(Prefix.REXW);
2315 }
2316 }
2317 } else {
2318 if (needsRex(adr.getBase())) {
2319 if (needsRex(adr.getIndex())) {
2320 emitByte(Prefix.REXWRXB);
2321 } else {
2322 emitByte(Prefix.REXWRB);
2323 }
2324 } else {
2325 if (needsRex(adr.getIndex())) {
2326 emitByte(Prefix.REXWRX);
2327 } else {
2328 emitByte(Prefix.REXWR);
2329 }
2330 }
2331 }
2332 }
2333
2334 public final void addq(Address dst, int imm32) {
2335 prefixq(dst);
2336 emitArithOperand(0x81, rax, dst, imm32);
2337 }
2338
2339 public final void addq(Address dst, Register src) {
2340 prefixq(dst, src);
2341 emitByte(0x01);
2342 emitOperandHelper(src, dst);
2343 }
2344
2345 public final void addq(Register dst, int imm32) {
2346 prefixqAndEncode(dst.encoding);
2347 emitArith(0x81, 0xC0, dst, imm32);
2348 }
2349
2350 public final void addq(Register dst, Address src) {
2351 prefixq(src, dst);
2352 emitByte(0x03);
2353 emitOperandHelper(dst, src);
2354 }
2355
2356 public final void addq(Register dst, Register src) {
2357 prefixqAndEncode(dst.encoding, src.encoding);
2358 emitArith(0x03, 0xC0, dst, src);
2359 }
2360
2361 public final void andq(Register dst, int imm32) {
2362 prefixqAndEncode(dst.encoding);
2363 emitArith(0x81, 0xE0, dst, imm32);
2364 }
2365
2366 public final void andq(Register dst, Address src) {
2367 prefixq(src, dst);
2368 emitByte(0x23);
2369 emitOperandHelper(dst, src);
2370 }
2371
2372 public final void andq(Register dst, Register src) {
2373 prefixqAndEncode(dst.encoding, src.encoding);
2374 emitArith(0x23, 0xC0, dst, src);
2375 }
2376
2377 public final void bswapq(Register reg) {
2378 int encode = prefixqAndEncode(reg.encoding);
2379 emitByte(0x0F);
2380 emitByte(0xC8 | encode);
2381 }
2382
2383 public final void cdqq() {
2384 emitByte(Prefix.REXW);
2385 emitByte(0x99);
2386 }
2387
2388 public final void cmovq(ConditionFlag cc, Register dst, Register src) {
2389 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2390 emitByte(0x0F);
2391 emitByte(0x40 | cc.value);
2392 emitByte(0xC0 | encode);
2393 }
2394
2395 public final void cmovq(ConditionFlag cc, Register dst, Address src) {
2396 prefixq(src, dst);
2397 emitByte(0x0F);
2398 emitByte(0x40 | cc.value);
2399 emitOperandHelper(dst, src);
2400 }
2401
2402 public final void cmpq(Address dst, int imm32) {
2403 prefixq(dst);
2404 emitByte(0x81);
2405 emitOperandHelper(rdi, dst);
2406 emitInt(imm32);
2407 }
2408
2409 public final void cmpq(Register dst, int imm32) {
2410 prefixqAndEncode(dst.encoding);
2411 emitArith(0x81, 0xF8, dst, imm32);
2412 }
2413
2414 public final void cmpq(Address dst, Register src) {
2415 prefixq(dst, src);
2416 emitByte(0x3B);
2417 emitOperandHelper(src, dst);
2418 }
2419
2420 public final void cmpq(Register dst, Register src) {
2421 prefixqAndEncode(dst.encoding, src.encoding);
2422 emitArith(0x3B, 0xC0, dst, src);
2423 }
2424
2425 public final void cmpq(Register dst, Address src) {
2426 prefixq(src, dst);
2427 emitByte(0x3B);
2428 emitOperandHelper(dst, src);
2429 }
2430
2431 public final void cmpxchgq(Register reg, Address adr) {
2432 prefixq(adr, reg);
2433 emitByte(0x0F);
2434 emitByte(0xB1);
2435 emitOperandHelper(reg, adr);
2436 }
2437
2438 public final void cvtsi2sdq(Register dst, Register src) {
2439 assert dst.isFpu();
2440 emitByte(0xF2);
2441 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2442 emitByte(0x0F);
2443 emitByte(0x2A);
2444 emitByte(0xC0 | encode);
2445 }
2446
2447 public final void cvtsi2ssq(Register dst, Register src) {
2448 assert dst.isFpu();
2449 emitByte(0xF3);
2450 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2451 emitByte(0x0F);
2452 emitByte(0x2A);
2453 emitByte(0xC0 | encode);
2454 }
2455
2456 public final void cvttsd2siq(Register dst, Register src) {
2457 assert src.isFpu();
2458 emitByte(0xF2);
2459 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2460 emitByte(0x0F);
2461 emitByte(0x2C);
2462 emitByte(0xC0 | encode);
2463 }
2464
2465 public final void cvttss2siq(Register dst, Register src) {
2466 assert src.isFpu();
2467 emitByte(0xF3);
2468 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2469 emitByte(0x0F);
2470 emitByte(0x2C);
2471 emitByte(0xC0 | encode);
2472 }
2473
2474 public final void decq(Register dst) {
2475 // Don't use it directly. Use Macrodecrementq() instead.
2476 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2477 int encode = prefixqAndEncode(dst.encoding);
2478 emitByte(0xFF);
2479 emitByte(0xC8 | encode);
2480 }
2481
2482 public final void decq(Address dst) {
2483 // Don't use it directly. Use Macrodecrementq() instead.
2484 prefixq(dst);
2485 emitByte(0xFF);
2486 emitOperandHelper(rcx, dst);
2487 }
2488
2489 public final void divq(Register src) {
2490 int encode = prefixqAndEncode(src.encoding);
2491 emitByte(0xF7);
2492 emitByte(0xF0 | encode);
2493 }
2494
2495 public final void idivq(Register src) {
2496 int encode = prefixqAndEncode(src.encoding);
2497 emitByte(0xF7);
2498 emitByte(0xF8 | encode);
2499 }
2500
2501 public final void imulq(Register dst, Register src) {
2502 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2503 emitByte(0x0F);
2504 emitByte(0xAF);
2505 emitByte(0xC0 | encode);
2506 }
2507
2508 public final void imulq(Register dst, Register src, int value) {
2509 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2510 if (isByte(value)) {
2511 emitByte(0x6B);
2512 emitByte(0xC0 | encode);
2513 emitByte(value);
2514 } else {
2515 emitByte(0x69);
2516 emitByte(0xC0 | encode);
2517 emitInt(value);
2518 }
2519 }
2520
2521 public final void incq(Register dst) {
2522 // Don't use it directly. Use Macroincrementq() instead.
2523 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2524 int encode = prefixqAndEncode(dst.encoding);
2525 emitByte(0xFF);
2526 emitByte(0xC0 | encode);
2527 }
2528
2529 public final void incq(Address dst) {
2530 // Don't use it directly. Use Macroincrementq() instead.
2531 prefixq(dst);
2532 emitByte(0xFF);
2533 emitOperandHelper(rax, dst);
2534 }
2535
2536 public final void movq(Register dst, long imm64) {
2537 int encode = prefixqAndEncode(dst.encoding);
2538 emitByte(0xB8 | encode);
2539 emitLong(imm64);
2540 }
2541
2542 public final void movdq(Register dst, Register src) {
2543
2544 // table D-1 says MMX/SSE2
2545 emitByte(0x66);
2546
2547 if (dst.isFpu()) {
2548 assert dst.isFpu();
2549 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2550 emitByte(0x0F);
2551 emitByte(0x6E);
2552 emitByte(0xC0 | encode);
2553 } else if (src.isFpu()) {
2554
2555 // swap src/dst to get correct prefix
2556 int encode = prefixqAndEncode(src.encoding, dst.encoding);
2557 emitByte(0x0F);
2558 emitByte(0x7E);
2559 emitByte(0xC0 | encode);
2560 } else {
2561 throw new InternalError("should not reach here");
2562 }
2563 }
2564
2565 public final void movsbq(Register dst, Address src) {
2566 prefixq(src, dst);
2567 emitByte(0x0F);
2568 emitByte(0xBE);
2569 emitOperandHelper(dst, src);
2570 }
2571
2572 public final void movsbq(Register dst, Register src) {
2573 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2574 emitByte(0x0F);
2575 emitByte(0xBE);
2576 emitByte(0xC0 | encode);
2577 }
2578
2579 public final void movslq(Register dst, int imm32) {
2580 int encode = prefixqAndEncode(dst.encoding);
2581 emitByte(0xC7 | encode);
2582 emitInt(imm32);
2583 // dbx shows movslq(X86.rcx, 3) as movq $0x0000000049000000,(%X86.rbx)
2584 // and movslq(X86.r8, 3); as movl $0x0000000048000000,(%X86.rbx)
2585 // as a result we shouldn't use until tested at runtime...
2586 throw new InternalError("untested");
2587 }
2588
2589 public final void movslq(Address dst, int imm32) {
2590 prefixq(dst);
2591 emitByte(0xC7);
2592 emitOperandHelper(rax, dst);
2593 emitInt(imm32);
2594 }
2595
2596 public final void movslq(Register dst, Address src) {
2597 prefixq(src, dst);
2598 emitByte(0x63);
2599 emitOperandHelper(dst, src);
2600 }
2601
2602 public final void movslq(Register dst, Register src) {
2603 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2604 emitByte(0x63);
2605 emitByte(0xC0 | encode);
2606 }
2607
2608 public final void movswq(Register dst, Address src) {
2609 prefixq(src, dst);
2610 emitByte(0x0F);
2611 emitByte(0xBF);
2612 emitOperandHelper(dst, src);
2613 }
2614
2615 public final void movswq(Register dst, Register src) {
2616 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2617 emitByte(0x0F);
2618 emitByte(0xBF);
2619 emitByte(0xC0 | encode);
2620 }
2621
2622 public final void movzbq(Register dst, Address src) {
2623 prefixq(src, dst);
2624 emitByte(0x0F);
2625 emitByte(0xB6);
2626 emitOperandHelper(dst, src);
2627 }
2628
2629 public final void movzbq(Register dst, Register src) {
2630 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2631 emitByte(0x0F);
2632 emitByte(0xB6);
2633 emitByte(0xC0 | encode);
2634 }
2635
2636 public final void movzwq(Register dst, Address src) {
2637 prefixq(src, dst);
2638 emitByte(0x0F);
2639 emitByte(0xB7);
2640 emitOperandHelper(dst, src);
2641 }
2642
2643 public final void movzwq(Register dst, Register src) {
2644 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2645 emitByte(0x0F);
2646 emitByte(0xB7);
2647 emitByte(0xC0 | encode);
2648 }
2649
2650 public final void negq(Register dst) {
2651 int encode = prefixqAndEncode(dst.encoding);
2652 emitByte(0xF7);
2653 emitByte(0xD8 | encode);
2654 }
2655
2656 public final void notq(Register dst) {
2657 int encode = prefixqAndEncode(dst.encoding);
2658 emitByte(0xF7);
2659 emitByte(0xD0 | encode);
2660 }
2661
2662 public final void orq(Address dst, int imm32) {
2663 prefixq(dst);
2664 emitByte(0x81);
2665 emitOperandHelper(rcx, dst);
2666 emitInt(imm32);
2667 }
2668
2669 public final void orq(Register dst, int imm32) {
2670 prefixqAndEncode(dst.encoding);
2671 emitArith(0x81, 0xC8, dst, imm32);
2672 }
2673
2674 public final void orq(Register dst, Address src) {
2675 prefixq(src, dst);
2676 emitByte(0x0B);
2677 emitOperandHelper(dst, src);
2678 }
2679
2680 public final void orq(Register dst, Register src) {
2681 prefixqAndEncode(dst.encoding, src.encoding);
2682 emitArith(0x0B, 0xC0, dst, src);
2683 }
2684
2685 public final void popq(Address dst) {
2686 prefixq(dst);
2687 emitByte(0x8F);
2688 emitOperandHelper(rax, dst);
2689 }
2690
2691 public final void pushq(Address src) {
2692 prefixq(src);
2693 emitByte(0xFF);
2694 emitOperandHelper(rsi, src);
2695 }
2696
2697 public final void rclq(Register dst, int imm8) {
2698 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2699 int encode = prefixqAndEncode(dst.encoding);
2700 if (imm8 == 1) {
2701 emitByte(0xD1);
2702 emitByte(0xD0 | encode);
2703 } else {
2704 emitByte(0xC1);
2705 emitByte(0xD0 | encode);
2706 emitByte(imm8);
2707 }
2708 }
2709
2710 public final void sarq(Register dst, int imm8) {
2711 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2712 int encode = prefixqAndEncode(dst.encoding);
2713 if (imm8 == 1) {
2714 emitByte(0xD1);
2715 emitByte(0xF8 | encode);
2716 } else {
2717 emitByte(0xC1);
2718 emitByte(0xF8 | encode);
2719 emitByte(imm8);
2720 }
2721 }
2722
2723 public final void sarq(Register dst) {
2724 int encode = prefixqAndEncode(dst.encoding);
2725 emitByte(0xD3);
2726 emitByte(0xF8 | encode);
2727 }
2728
2729 public final void shlq(Register dst, int imm8) {
2730 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2731 int encode = prefixqAndEncode(dst.encoding);
2732 if (imm8 == 1) {
2733 emitByte(0xD1);
2734 emitByte(0xE0 | encode);
2735 } else {
2736 emitByte(0xC1);
2737 emitByte(0xE0 | encode);
2738 emitByte(imm8);
2739 }
2740 }
2741
2742 public final void shlq(Register dst) {
2743 int encode = prefixqAndEncode(dst.encoding);
2744 emitByte(0xD3);
2745 emitByte(0xE0 | encode);
2746 }
2747
2748 public final void shrq(Register dst, int imm8) {
2749 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2750 int encode = prefixqAndEncode(dst.encoding);
2751 emitByte(0xC1);
2752 emitByte(0xE8 | encode);
2753 emitByte(imm8);
2754 }
2755
2756 public final void shrq(Register dst) {
2757 int encode = prefixqAndEncode(dst.encoding);
2758 emitByte(0xD3);
2759 emitByte(0xE8 | encode);
2760 }
2761
2762 public final void sqrtsd(Register dst, Address src) {
2763 assert dst.isFpu();
2764
2765 emitByte(0xF2);
2766 prefix(src, dst);
2767 emitByte(0x0F);
2768 emitByte(0x51);
2769 emitOperandHelper(dst, src);
2770 }
2771
2772 public final void subq(Address dst, int imm32) {
2773 prefixq(dst);
2774 if (isByte(imm32)) {
2775 emitByte(0x83);
2776 emitOperandHelper(rbp, dst);
2777 emitByte(imm32 & 0xFF);
2778 } else {
2779 emitByte(0x81);
2780 emitOperandHelper(rbp, dst);
2781 emitInt(imm32);
2782 }
2783 }
2784
2785 public final void subq(Register dst, int imm32) {
2786 prefixqAndEncode(dst.encoding);
2787 emitArith(0x81, 0xE8, dst, imm32);
2788 }
2789
2790 public final void subq(Address dst, Register src) {
2791 prefixq(dst, src);
2792 emitByte(0x29);
2793 emitOperandHelper(src, dst);
2794 }
2795
2796 public final void subq(Register dst, Address src) {
2797 prefixq(src, dst);
2798 emitByte(0x2B);
2799 emitOperandHelper(dst, src);
2800 }
2801
2802 public final void subq(Register dst, Register src) {
2803 prefixqAndEncode(dst.encoding, src.encoding);
2804 emitArith(0x2B, 0xC0, dst, src);
2805 }
2806
2807 public final void testq(Register dst, int imm32) {
2808 // not using emitArith because test
2809 // doesn't support sign-extension of
2810 // 8bit operands
2811 int encode = dst.encoding;
2812 if (encode == 0) {
2813 emitByte(Prefix.REXW);
2814 emitByte(0xA9);
2815 } else {
2816 encode = prefixqAndEncode(encode);
2817 emitByte(0xF7);
2818 emitByte(0xC0 | encode);
2819 }
2820 emitInt(imm32);
2821 }
2822
2823 public final void testq(Register dst, Register src) {
2824 prefixqAndEncode(dst.encoding, src.encoding);
2825 emitArith(0x85, 0xC0, dst, src);
2826 }
2827
2828 public final void xaddq(Address dst, Register src) {
2829 prefixq(dst, src);
2830 emitByte(0x0F);
2831 emitByte(0xC1);
2832 emitOperandHelper(src, dst);
2833 }
2834
2835 public final void xchgq(Register dst, Address src) {
2836 prefixq(src, dst);
2837 emitByte(0x87);
2838 emitOperandHelper(dst, src);
2839 }
2840
2841 public final void xchgq(Register dst, Register src) {
2842 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2843 emitByte(0x87);
2844 emitByte(0xc0 | encode);
2845 }
2846
2847 public final void xorq(Register dst, int imm32) {
2848 prefixqAndEncode(dst.encoding);
2849 emitArith(0x81, 0xF0, dst, imm32);
2850 }
2851
2852 public final void xorq(Register dst, Register src) {
2853 prefixqAndEncode(dst.encoding, src.encoding);
2854 emitArith(0x33, 0xC0, dst, src);
2855 }
2856
2857 public final void xorq(Register dst, Address src) {
2858
2859 prefixq(src, dst);
2860 emitByte(0x33);
2861 emitOperandHelper(dst, src);
2862
2863 }
2864
2865 public final void membar(int barriers) {
2866 if (target.isMP) {
2867 // We only have to handle StoreLoad
2868 if ((barriers & STORE_LOAD) != 0) {
2869 // All usable chips support "locked" instructions which suffice
2870 // as barriers, and are much faster than the alternative of
2871 // using cpuid instruction. We use here a locked add [rsp],0.
2872 // This is conveniently otherwise a no-op except for blowing
2873 // flags.
2874 // Any change to this code may need to revisit other places in
2875 // the code where this idiom is used, in particular the
2876 // orderAccess code.
2877 lock();
2878 addl(new Address(Word, RSP, 0), 0); // Assert the lock# signal here
2879 }
2880 }
2881 }
2882
2883 @Override
2884 protected final void patchJumpTarget(int branch, int branchTarget) {
2885 int op = codeBuffer.getByte(branch);
2886 assert op == 0xE8 // call
2887 || op == 0x00 // jump table entry
2888 || op == 0xE9 // jmp
2889 || op == 0xEB // short jmp
2890 || (op & 0xF0) == 0x70 // short jcc
2891 || op == 0x0F && (codeBuffer.getByte(branch + 1) & 0xF0) == 0x80 // jcc
2892 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
2893
2894 if (op == 0x00) {
2895 int offsetToJumpTableBase = codeBuffer.getShort(branch + 1);
2896 int jumpTableBase = branch - offsetToJumpTableBase;
2897 int imm32 = branchTarget - jumpTableBase;
2898 codeBuffer.emitInt(imm32, branch);
2899 } else if (op == 0xEB || (op & 0xF0) == 0x70) {
2900
2901 // short offset operators (jmp and jcc)
2902 int imm8 = branchTarget - (branch + 2);
2903 codeBuffer.emitByte(imm8, branch + 1);
2904
2905 } else {
2906
2907 int off = 1;
2908 if (op == 0x0F) {
2909 off = 2;
2910 }
2911
2912 int imm32 = branchTarget - (branch + 4 + off);
2913 codeBuffer.emitInt(imm32, branch + off);
2914 }
2915 }
2916
2917 public void nullCheck(Register r) {
2918 testl(AMD64.rax, new Address(Word, r.asValue(Word), 0));
2919 }
2920
2921 @Override
2922 public void align(int modulus) {
2923 if (codeBuffer.position() % modulus != 0) {
2924 nop(modulus - (codeBuffer.position() % modulus));
2925 }
2926 }
2927
2928 public void pushfq() {
2929 emitByte(0x9c);
2930 }
2931
2932 public void popfq() {
2933 emitByte(0x9D);
2934 }
2935
2936 /**
2937 * Makes sure that a subsequent {@linkplain #call} does not fail the alignment check.
2938 */
2939 public final void alignForPatchableDirectCall() {
2940 int dispStart = codeBuffer.position() + 1;
2941 int mask = target.wordSize - 1;
2942 if ((dispStart & ~mask) != ((dispStart + 3) & ~mask)) {
2943 nop(target.wordSize - (dispStart & mask));
2944 assert ((codeBuffer.position() + 1) & mask) == 0;
2945 }
2946 }
2947
2948 /**
2949 * Emits a direct call instruction. Note that the actual call target is not specified, because all calls
2950 * need patching anyway. Therefore, 0 is emitted as the call target, and the user is responsible
2951 * to add the call address to the appropriate patching tables.
2952 */
2953 public final void call() {
2954 emitByte(0xE8);
2955 emitInt(0);
2956 }
2957
2958 public final void call(Register src) {
2959 int encode = prefixAndEncode(src.encoding);
2960 emitByte(0xFF);
2961 emitByte(0xD0 | encode);
2962 }
2963
2964 public void int3() {
2965 emitByte(0xCC);
2966 }
2967
2968 public void enter(short imm16, byte imm8) {
2969 emitByte(0xC8);
2970 // appended:
2971 emitByte(imm16 & 0xff);
2972 emitByte((imm16 >> 8) & 0xff);
2973 emitByte(imm8);
2974 }
2975
2976 private void emitx87(int b1, int b2, int i) {
2977 assert 0 <= i && i < 8 : "illegal stack offset";
2978 emitByte(b1);
2979 emitByte(b2 + i);
2980 }
2981
2982 public void fld(Address src) {
2983 emitByte(0xDD);
2984 emitOperandHelper(rax, src);
2985 }
2986
2987 public void fld(int i) {
2988 emitx87(0xD9, 0xC0, i);
2989 }
2990
2991 public void fldln2() {
2992 emitByte(0xD9);
2993 emitByte(0xED);
2994 }
2995
2996 public void fldlg2() {
2997 emitByte(0xD9);
2998 emitByte(0xEC);
2999 }
3000
3001 public void fyl2x() {
3002 emitByte(0xD9);
3003 emitByte(0xF1);
3004 }
3005
3006 public void fstp(Address src) {
3007 emitByte(0xDD);
3008 emitOperandHelper(rbx, src);
3009 }
3010
3011 public void fsin() {
3012 emitByte(0xD9);
3013 emitByte(0xFE);
3014 }
3015
3016 public void fcos() {
3017 emitByte(0xD9);
3018 emitByte(0xFF);
3019 }
3020
3021 public void fptan() {
3022 emitByte(0xD9);
3023 emitByte(0xF2);
3024 }
3025
3026 public void fstp(int i) {
3027 emitx87(0xDD, 0xD8, i);
3028 }
3029
3030 @Override
3031 public void bangStack(int disp) {
3032 movq(new Address(target.wordKind, AMD64.RSP, -disp), AMD64.rax);
3033 }
3034 }