comparison graal/com.oracle.max.asm/src/com/oracle/max/asm/target/amd64/AMD64Assembler.java @ 3733:e233f5660da4

Added Java files from Maxine project.
author Thomas Wuerthinger <thomas.wuerthinger@oracle.com>
date Sat, 17 Dec 2011 19:59:18 +0100
parents
children bc8527f3071c
comparison
equal deleted inserted replaced
3732:3e2e8b8abdaf 3733:e233f5660da4
1 /*
2 * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23 package com.oracle.max.asm.target.amd64;
24
25 import static com.oracle.max.asm.NumUtil.*;
26 import static com.oracle.max.asm.target.amd64.AMD64.*;
27 import static com.oracle.max.cri.intrinsics.MemoryBarriers.*;
28
29 import com.oracle.max.asm.*;
30 import com.sun.cri.ci.*;
31 import com.sun.cri.ri.*;
32
33 /**
34 * This class implements an assembler that can encode most X86 instructions.
35 */
36 public class AMD64Assembler extends AbstractAssembler {
37 /**
38 * The kind for pointers and raw registers. Since we know we are 64 bit here, we can hardcode it.
39 */
40 private static final CiKind Word = CiKind.Long;
41
42 private static final int MinEncodingNeedsRex = 8;
43
44 /**
45 * The x86 condition codes used for conditional jumps/moves.
46 */
47 public enum ConditionFlag {
48 zero(0x4),
49 notZero(0x5),
50 equal(0x4),
51 notEqual(0x5),
52 less(0xc),
53 lessEqual(0xe),
54 greater(0xf),
55 greaterEqual(0xd),
56 below(0x2),
57 belowEqual(0x6),
58 above(0x7),
59 aboveEqual(0x3),
60 overflow(0x0),
61 noOverflow(0x1),
62 carrySet(0x2),
63 carryClear(0x3),
64 negative(0x8),
65 positive(0x9),
66 parity(0xa),
67 noParity(0xb);
68
69 public final int value;
70
71 private ConditionFlag(int value) {
72 this.value = value;
73 }
74
75 public static final ConditionFlag[] values = values();
76 }
77
78 /**
79 * Constants for X86 prefix bytes.
80 */
81 private class Prefix {
82 private static final int REX = 0x40;
83 private static final int REXB = 0x41;
84 private static final int REXX = 0x42;
85 private static final int REXXB = 0x43;
86 private static final int REXR = 0x44;
87 private static final int REXRB = 0x45;
88 private static final int REXRX = 0x46;
89 private static final int REXRXB = 0x47;
90 private static final int REXW = 0x48;
91 private static final int REXWB = 0x49;
92 private static final int REXWX = 0x4A;
93 private static final int REXWXB = 0x4B;
94 private static final int REXWR = 0x4C;
95 private static final int REXWRB = 0x4D;
96 private static final int REXWRX = 0x4E;
97 private static final int REXWRXB = 0x4F;
98 }
99
100 /**
101 * The register to which {@link CiRegister#Frame} and {@link CiRegister#CallerFrame} are bound.
102 */
103 public final CiRegister frameRegister;
104
105 /**
106 * Constructs an assembler for the AMD64 architecture.
107 *
108 * @param registerConfig the register configuration used to bind {@link CiRegister#Frame} and
109 * {@link CiRegister#CallerFrame} to physical registers. This value can be null if this assembler
110 * instance will not be used to assemble instructions using these logical registers.
111 */
112 public AMD64Assembler(CiTarget target, RiRegisterConfig registerConfig) {
113 super(target);
114 this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister();
115 }
116
117 private static int encode(CiRegister r) {
118 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding;
119 return r.encoding & 0x7;
120 }
121
122 private void emitArithB(int op1, int op2, CiRegister dst, int imm8) {
123 assert dst.isByte() : "must have byte register";
124 assert isUByte(op1) && isUByte(op2) : "wrong opcode";
125 assert isUByte(imm8) : "not a byte";
126 assert (op1 & 0x01) == 0 : "should be 8bit operation";
127 emitByte(op1);
128 emitByte(op2 | encode(dst));
129 emitByte(imm8);
130 }
131
132 private void emitArith(int op1, int op2, CiRegister dst, int imm32) {
133 assert isUByte(op1) && isUByte(op2) : "wrong opcode";
134 assert (op1 & 0x01) == 1 : "should be 32bit operation";
135 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set";
136 if (isByte(imm32)) {
137 emitByte(op1 | 0x02); // set sign bit
138 emitByte(op2 | encode(dst));
139 emitByte(imm32 & 0xFF);
140 } else {
141 emitByte(op1);
142 emitByte(op2 | encode(dst));
143 emitInt(imm32);
144 }
145 }
146
147 // immediate-to-memory forms
148 private void emitArithOperand(int op1, CiRegister rm, CiAddress adr, int imm32) {
149 assert (op1 & 0x01) == 1 : "should be 32bit operation";
150 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set";
151 if (isByte(imm32)) {
152 emitByte(op1 | 0x02); // set sign bit
153 emitOperandHelper(rm, adr);
154 emitByte(imm32 & 0xFF);
155 } else {
156 emitByte(op1);
157 emitOperandHelper(rm, adr);
158 emitInt(imm32);
159 }
160 }
161
162 private void emitArith(int op1, int op2, CiRegister dst, CiRegister src) {
163 assert isUByte(op1) && isUByte(op2) : "wrong opcode";
164 emitByte(op1);
165 emitByte(op2 | encode(dst) << 3 | encode(src));
166 }
167
168 private void emitOperandHelper(CiRegister reg, CiAddress addr) {
169 CiRegister base = addr.base();
170
171 CiRegister index = addr.index();
172 CiAddress.Scale scale = addr.scale;
173 int disp = addr.displacement;
174
175 if (base == CiRegister.Frame) {
176 assert frameRegister != null : "cannot use register " + CiRegister.Frame + " in assembler with null register configuration";
177 base = frameRegister;
178 // } else if (base == CiRegister.CallerFrame) {
179 // assert frameRegister != null : "cannot use register " + CiRegister.Frame + " in assembler with null register configuration";
180 // base = frameRegister;
181 // disp += targetMethod.frameSize() + 8;
182 }
183
184 // Encode the registers as needed in the fields they are used in
185
186 assert reg != CiRegister.None;
187 int regenc = encode(reg) << 3;
188
189 if (base == AMD64.rip) {
190 // [00 000 101] disp32
191 emitByte(0x05 | regenc);
192 emitInt(disp);
193 } else if (addr == CiAddress.Placeholder) {
194 // [00 000 101] disp32
195 emitByte(0x05 | regenc);
196 emitInt(0);
197
198 } else if (base.isValid()) {
199 int baseenc = base.isValid() ? encode(base) : 0;
200 if (index.isValid()) {
201 int indexenc = encode(index) << 3;
202 // [base + indexscale + disp]
203 if (disp == 0 && base != rbp && (base != r13)) {
204 // [base + indexscale]
205 // [00 reg 100][ss index base]
206 assert index != rsp : "illegal addressing mode";
207 emitByte(0x04 | regenc);
208 emitByte(scale.log2 << 6 | indexenc | baseenc);
209 } else if (isByte(disp)) {
210 // [base + indexscale + imm8]
211 // [01 reg 100][ss index base] imm8
212 assert index != rsp : "illegal addressing mode";
213 emitByte(0x44 | regenc);
214 emitByte(scale.log2 << 6 | indexenc | baseenc);
215 emitByte(disp & 0xFF);
216 } else {
217 // [base + indexscale + disp32]
218 // [10 reg 100][ss index base] disp32
219 assert index != rsp : "illegal addressing mode";
220 emitByte(0x84 | regenc);
221 emitByte(scale.log2 << 6 | indexenc | baseenc);
222 emitInt(disp);
223 }
224 } else if (base == rsp || (base == r12)) {
225 // [rsp + disp]
226 if (disp == 0) {
227 // [rsp]
228 // [00 reg 100][00 100 100]
229 emitByte(0x04 | regenc);
230 emitByte(0x24);
231 } else if (isByte(disp)) {
232 // [rsp + imm8]
233 // [01 reg 100][00 100 100] disp8
234 emitByte(0x44 | regenc);
235 emitByte(0x24);
236 emitByte(disp & 0xFF);
237 } else {
238 // [rsp + imm32]
239 // [10 reg 100][00 100 100] disp32
240 emitByte(0x84 | regenc);
241 emitByte(0x24);
242 emitInt(disp);
243 }
244 } else {
245 // [base + disp]
246 assert base != rsp && (base != r12) : "illegal addressing mode";
247 if (disp == 0 && base != rbp && (base != r13)) {
248 // [base]
249 // [00 reg base]
250 emitByte(0x00 | regenc | baseenc);
251 } else if (isByte(disp)) {
252 // [base + disp8]
253 // [01 reg base] disp8
254 emitByte(0x40 | regenc | baseenc);
255 emitByte(disp & 0xFF);
256 } else {
257 // [base + disp32]
258 // [10 reg base] disp32
259 emitByte(0x80 | regenc | baseenc);
260 emitInt(disp);
261 }
262 }
263 } else {
264 if (index.isValid()) {
265 int indexenc = encode(index) << 3;
266 // [indexscale + disp]
267 // [00 reg 100][ss index 101] disp32
268 assert index != rsp : "illegal addressing mode";
269 emitByte(0x04 | regenc);
270 emitByte(scale.log2 << 6 | indexenc | 0x05);
271 emitInt(disp);
272 } else {
273 // [disp] ABSOLUTE
274 // [00 reg 100][00 100 101] disp32
275 emitByte(0x04 | regenc);
276 emitByte(0x25);
277 emitInt(disp);
278 }
279 }
280 }
281
282 public final void addl(CiAddress dst, int imm32) {
283 prefix(dst);
284 emitArithOperand(0x81, rax, dst, imm32);
285 }
286
287 public final void addl(CiAddress dst, CiRegister src) {
288 prefix(dst, src);
289 emitByte(0x01);
290 emitOperandHelper(src, dst);
291 }
292
293 public final void addl(CiRegister dst, int imm32) {
294 prefix(dst);
295 emitArith(0x81, 0xC0, dst, imm32);
296 }
297
298 public final void addl(CiRegister dst, CiAddress src) {
299 prefix(src, dst);
300 emitByte(0x03);
301 emitOperandHelper(dst, src);
302 }
303
304 public final void addl(CiRegister dst, CiRegister src) {
305 prefixAndEncode(dst.encoding, src.encoding);
306 emitArith(0x03, 0xC0, dst, src);
307 }
308
309 private void addrNop4() {
310 // 4 bytes: NOP DWORD PTR [EAX+0]
311 emitByte(0x0F);
312 emitByte(0x1F);
313 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc);
314 emitByte(0); // 8-bits offset (1 byte)
315 }
316
317 private void addrNop5() {
318 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
319 emitByte(0x0F);
320 emitByte(0x1F);
321 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4);
322 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
323 emitByte(0); // 8-bits offset (1 byte)
324 }
325
326 private void addrNop7() {
327 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
328 emitByte(0x0F);
329 emitByte(0x1F);
330 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc);
331 emitInt(0); // 32-bits offset (4 bytes)
332 }
333
334 private void addrNop8() {
335 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
336 emitByte(0x0F);
337 emitByte(0x1F);
338 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4);
339 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc);
340 emitInt(0); // 32-bits offset (4 bytes)
341 }
342
343 public final void addsd(CiRegister dst, CiRegister src) {
344 assert dst.isFpu() && src.isFpu();
345 emitByte(0xF2);
346 int encode = prefixAndEncode(dst.encoding, src.encoding);
347 emitByte(0x0F);
348 emitByte(0x58);
349 emitByte(0xC0 | encode);
350 }
351
352 public final void addsd(CiRegister dst, CiAddress src) {
353 assert dst.isFpu();
354 emitByte(0xF2);
355 prefix(src, dst);
356 emitByte(0x0F);
357 emitByte(0x58);
358 emitOperandHelper(dst, src);
359 }
360
361 public final void addss(CiRegister dst, CiRegister src) {
362 assert dst.isFpu() && src.isFpu();
363 emitByte(0xF3);
364 int encode = prefixAndEncode(dst.encoding, src.encoding);
365 emitByte(0x0F);
366 emitByte(0x58);
367 emitByte(0xC0 | encode);
368 }
369
370 public final void addss(CiRegister dst, CiAddress src) {
371 assert dst.isFpu();
372 emitByte(0xF3);
373 prefix(src, dst);
374 emitByte(0x0F);
375 emitByte(0x58);
376 emitOperandHelper(dst, src);
377 }
378
379 public final void andl(CiRegister dst, int imm32) {
380 prefix(dst);
381 emitArith(0x81, 0xE0, dst, imm32);
382 }
383
384 public final void andl(CiRegister dst, CiAddress src) {
385 prefix(src, dst);
386 emitByte(0x23);
387 emitOperandHelper(dst, src);
388 }
389
390 public final void andl(CiRegister dst, CiRegister src) {
391 prefixAndEncode(dst.encoding, src.encoding);
392 emitArith(0x23, 0xC0, dst, src);
393 }
394
395 public final void bsfq(CiRegister dst, CiRegister src) {
396 int encode = prefixqAndEncode(dst.encoding, src.encoding);
397 emitByte(0x0F);
398 emitByte(0xBC);
399 emitByte(0xC0 | encode);
400 }
401
402 public final void bsfq(CiRegister dst, CiAddress src) {
403 prefixq(src, dst);
404 emitByte(0xBC);
405 emitOperandHelper(dst, src);
406 }
407
408 public final void bsrq(CiRegister dst, CiRegister src) {
409 int encode = prefixqAndEncode(dst.encoding, src.encoding);
410 emitByte(0x0F);
411 emitByte(0xBD);
412 emitByte(0xC0 | encode);
413 }
414
415
416 public final void bsrq(CiRegister dst, CiAddress src) {
417 prefixq(src, dst);
418 emitByte(0xBD);
419 emitOperandHelper(dst, src);
420 }
421
422 public final void bswapl(CiRegister reg) { // bswap
423 int encode = prefixAndEncode(reg.encoding);
424 emitByte(0x0F);
425 emitByte(0xC8 | encode);
426 }
427
428 public final void btli(CiAddress src, int imm8) {
429 prefixq(src);
430 emitByte(0x0F);
431 emitByte(0xBA);
432 emitOperandHelper(rsp, src);
433 emitByte(imm8);
434 }
435
436 public final void cdql() {
437 emitByte(0x99);
438 }
439
440 public final void cmovl(ConditionFlag cc, CiRegister dst, CiRegister src) {
441 int encode = prefixAndEncode(dst.encoding, src.encoding);
442 emitByte(0x0F);
443 emitByte(0x40 | cc.value);
444 emitByte(0xC0 | encode);
445 }
446
447 public final void cmovl(ConditionFlag cc, CiRegister dst, CiAddress src) {
448 prefix(src, dst);
449 emitByte(0x0F);
450 emitByte(0x40 | cc.value);
451 emitOperandHelper(dst, src);
452 }
453
454 public final void cmpb(CiAddress dst, int imm8) {
455 prefix(dst);
456 emitByte(0x80);
457 emitOperandHelper(rdi, dst);
458 emitByte(imm8);
459 }
460
461 public final void cmpl(CiAddress dst, int imm32) {
462 prefix(dst);
463 emitByte(0x81);
464 emitOperandHelper(rdi, dst);
465 emitInt(imm32);
466 }
467
468 public final void cmpl(CiRegister dst, int imm32) {
469 prefix(dst);
470 emitArith(0x81, 0xF8, dst, imm32);
471 }
472
473 public final void cmpl(CiRegister dst, CiRegister src) {
474 prefixAndEncode(dst.encoding, src.encoding);
475 emitArith(0x3B, 0xC0, dst, src);
476 }
477
478 public final void cmpl(CiRegister dst, CiAddress src) {
479 prefix(src, dst);
480 emitByte(0x3B);
481 emitOperandHelper(dst, src);
482 }
483
484 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax,
485 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,.
486 // The ZF is set if the compared values were equal, and cleared otherwise.
487 public final void cmpxchgl(CiRegister reg, CiAddress adr) { // cmpxchg
488 if ((AsmOptions.Atomics & 2) != 0) {
489 // caveat: no instructionmark, so this isn't relocatable.
490 // Emit a synthetic, non-atomic, CAS equivalent.
491 // Beware. The synthetic form sets all ICCs, not just ZF.
492 // cmpxchg r,[m] is equivalent to X86.rax, = CAS (m, X86.rax, r)
493 cmpl(rax, adr);
494 movl(rax, adr);
495 if (reg != rax) {
496 Label l = new Label();
497 jcc(ConditionFlag.notEqual, l);
498 movl(adr, reg);
499 bind(l);
500 }
501 } else {
502
503 prefix(adr, reg);
504 emitByte(0x0F);
505 emitByte(0xB1);
506 emitOperandHelper(reg, adr);
507 }
508 }
509
510 public final void comisd(CiRegister dst, CiAddress src) {
511 assert dst.isFpu();
512 // NOTE: dbx seems to decode this as comiss even though the
513 // 0x66 is there. Strangly ucomisd comes out correct
514 emitByte(0x66);
515 comiss(dst, src);
516 }
517
518 public final void comiss(CiRegister dst, CiAddress src) {
519 assert dst.isFpu();
520
521 prefix(src, dst);
522 emitByte(0x0F);
523 emitByte(0x2F);
524 emitOperandHelper(dst, src);
525 }
526
527 public final void cvtdq2pd(CiRegister dst, CiRegister src) {
528 assert dst.isFpu();
529 assert src.isFpu();
530
531 emitByte(0xF3);
532 int encode = prefixAndEncode(dst.encoding, src.encoding);
533 emitByte(0x0F);
534 emitByte(0xE6);
535 emitByte(0xC0 | encode);
536 }
537
538 public final void cvtdq2ps(CiRegister dst, CiRegister src) {
539 assert dst.isFpu();
540 assert src.isFpu();
541 int encode = prefixAndEncode(dst.encoding, src.encoding);
542 emitByte(0x0F);
543 emitByte(0x5B);
544 emitByte(0xC0 | encode);
545 }
546
547 public final void cvtsd2ss(CiRegister dst, CiRegister src) {
548 assert dst.isFpu();
549 assert src.isFpu();
550 emitByte(0xF2);
551 int encode = prefixAndEncode(dst.encoding, src.encoding);
552 emitByte(0x0F);
553 emitByte(0x5A);
554 emitByte(0xC0 | encode);
555 }
556
557 public final void cvtsi2sdl(CiRegister dst, CiRegister src) {
558 assert dst.isFpu();
559 emitByte(0xF2);
560 int encode = prefixAndEncode(dst.encoding, src.encoding);
561 emitByte(0x0F);
562 emitByte(0x2A);
563 emitByte(0xC0 | encode);
564 }
565
566 public final void cvtsi2ssl(CiRegister dst, CiRegister src) {
567 assert dst.isFpu();
568 emitByte(0xF3);
569 int encode = prefixAndEncode(dst.encoding, src.encoding);
570 emitByte(0x0F);
571 emitByte(0x2A);
572 emitByte(0xC0 | encode);
573 }
574
575 public final void cvtss2sd(CiRegister dst, CiRegister src) {
576 assert dst.isFpu();
577 assert src.isFpu();
578 emitByte(0xF3);
579 int encode = prefixAndEncode(dst.encoding, src.encoding);
580 emitByte(0x0F);
581 emitByte(0x5A);
582 emitByte(0xC0 | encode);
583 }
584
585 public final void cvttsd2sil(CiRegister dst, CiRegister src) {
586 assert src.isFpu();
587 emitByte(0xF2);
588 int encode = prefixAndEncode(dst.encoding, src.encoding);
589 emitByte(0x0F);
590 emitByte(0x2C);
591 emitByte(0xC0 | encode);
592 }
593
594 public final void cvttss2sil(CiRegister dst, CiRegister src) {
595 assert src.isFpu();
596 emitByte(0xF3);
597 int encode = prefixAndEncode(dst.encoding, src.encoding);
598 emitByte(0x0F);
599 emitByte(0x2C);
600 emitByte(0xC0 | encode);
601 }
602
603 public final void decl(CiAddress dst) {
604 // Don't use it directly. Use Macrodecrement() instead.
605 prefix(dst);
606 emitByte(0xFF);
607 emitOperandHelper(rcx, dst);
608 }
609
610 public final void divsd(CiRegister dst, CiAddress src) {
611 assert dst.isFpu();
612 emitByte(0xF2);
613 prefix(src, dst);
614 emitByte(0x0F);
615 emitByte(0x5E);
616 emitOperandHelper(dst, src);
617 }
618
619 public final void divsd(CiRegister dst, CiRegister src) {
620 assert dst.isFpu();
621 assert src.isFpu();
622 emitByte(0xF2);
623 int encode = prefixAndEncode(dst.encoding, src.encoding);
624 emitByte(0x0F);
625 emitByte(0x5E);
626 emitByte(0xC0 | encode);
627 }
628
629 public final void divss(CiRegister dst, CiAddress src) {
630 assert dst.isFpu();
631 emitByte(0xF3);
632 prefix(src, dst);
633 emitByte(0x0F);
634 emitByte(0x5E);
635 emitOperandHelper(dst, src);
636 }
637
638 public final void divss(CiRegister dst, CiRegister src) {
639 assert dst.isFpu();
640 assert src.isFpu();
641 emitByte(0xF3);
642 int encode = prefixAndEncode(dst.encoding, src.encoding);
643 emitByte(0x0F);
644 emitByte(0x5E);
645 emitByte(0xC0 | encode);
646 }
647
648 public final void hlt() {
649 emitByte(0xF4);
650 }
651
652 public final void idivl(CiRegister src) {
653 int encode = prefixAndEncode(src.encoding);
654 emitByte(0xF7);
655 emitByte(0xF8 | encode);
656 }
657
658 public final void divl(CiRegister src) {
659 int encode = prefixAndEncode(src.encoding);
660 emitByte(0xF7);
661 emitByte(0xF0 | encode);
662 }
663
664 public final void imull(CiRegister dst, CiRegister src) {
665 int encode = prefixAndEncode(dst.encoding, src.encoding);
666 emitByte(0x0F);
667 emitByte(0xAF);
668 emitByte(0xC0 | encode);
669 }
670
671 public final void imull(CiRegister dst, CiRegister src, int value) {
672 int encode = prefixAndEncode(dst.encoding, src.encoding);
673 if (isByte(value)) {
674 emitByte(0x6B);
675 emitByte(0xC0 | encode);
676 emitByte(value & 0xFF);
677 } else {
678 emitByte(0x69);
679 emitByte(0xC0 | encode);
680 emitInt(value);
681 }
682 }
683
684 public final void incl(CiAddress dst) {
685 // Don't use it directly. Use Macroincrement() instead.
686 prefix(dst);
687 emitByte(0xFF);
688 emitOperandHelper(rax, dst);
689 }
690
691 public final void jcc(ConditionFlag cc, int target, boolean forceDisp32) {
692 int shortSize = 2;
693 int longSize = 6;
694 long disp = target - codeBuffer.position();
695 if (!forceDisp32 && isByte(disp - shortSize)) {
696 // 0111 tttn #8-bit disp
697 emitByte(0x70 | cc.value);
698 emitByte((int) ((disp - shortSize) & 0xFF));
699 } else {
700 // 0000 1111 1000 tttn #32-bit disp
701 assert isInt(disp - longSize) : "must be 32bit offset (call4)";
702 emitByte(0x0F);
703 emitByte(0x80 | cc.value);
704 emitInt((int) (disp - longSize));
705 }
706 }
707
708 public final void jcc(ConditionFlag cc, Label l) {
709 assert (0 <= cc.value) && (cc.value < 16) : "illegal cc";
710 if (l.isBound()) {
711 jcc(cc, l.position(), false);
712 } else {
713 // Note: could eliminate cond. jumps to this jump if condition
714 // is the same however, seems to be rather unlikely case.
715 // Note: use jccb() if label to be bound is very close to get
716 // an 8-bit displacement
717 l.addPatchAt(codeBuffer.position());
718 emitByte(0x0F);
719 emitByte(0x80 | cc.value);
720 emitInt(0);
721 }
722
723 }
724
725 public final void jccb(ConditionFlag cc, Label l) {
726 if (l.isBound()) {
727 int shortSize = 2;
728 int entry = l.position();
729 assert isByte(entry - (codeBuffer.position() + shortSize)) : "Dispacement too large for a short jmp";
730 long disp = entry - codeBuffer.position();
731 // 0111 tttn #8-bit disp
732 emitByte(0x70 | cc.value);
733 emitByte((int) ((disp - shortSize) & 0xFF));
734 } else {
735
736 l.addPatchAt(codeBuffer.position());
737 emitByte(0x70 | cc.value);
738 emitByte(0);
739 }
740 }
741
742 public final void jmp(CiAddress adr) {
743 prefix(adr);
744 emitByte(0xFF);
745 emitOperandHelper(rsp, adr);
746 }
747
748 public final void jmp(int target, boolean forceDisp32) {
749 int shortSize = 2;
750 int longSize = 5;
751 long disp = target - codeBuffer.position();
752 if (!forceDisp32 && isByte(disp - shortSize)) {
753 emitByte(0xEB);
754 emitByte((int) ((disp - shortSize) & 0xFF));
755 } else {
756 emitByte(0xE9);
757 emitInt((int) (disp - longSize));
758 }
759 }
760
761 public final void jmp(Label l) {
762 if (l.isBound()) {
763 jmp(l.position(), false);
764 } else {
765 // By default, forward jumps are always 32-bit displacements, since
766 // we can't yet know where the label will be bound. If you're sure that
767 // the forward jump will not run beyond 256 bytes, use jmpb to
768 // force an 8-bit displacement.
769
770 l.addPatchAt(codeBuffer.position());
771 emitByte(0xE9);
772 emitInt(0);
773 }
774 }
775
776 public final void jmp(CiRegister entry) {
777 int encode = prefixAndEncode(entry.encoding);
778 emitByte(0xFF);
779 emitByte(0xE0 | encode);
780 }
781
782 public final void jmpb(Label l) {
783 if (l.isBound()) {
784 int shortSize = 2;
785 int entry = l.position();
786 assert isByte((entry - codeBuffer.position()) + shortSize) : "Dispacement too large for a short jmp";
787 long offs = entry - codeBuffer.position();
788 emitByte(0xEB);
789 emitByte((int) ((offs - shortSize) & 0xFF));
790 } else {
791
792 l.addPatchAt(codeBuffer.position());
793 emitByte(0xEB);
794 emitByte(0);
795 }
796 }
797
798 public final void leaq(CiRegister dst, CiAddress src) {
799 prefixq(src, dst);
800 emitByte(0x8D);
801 emitOperandHelper(dst, src);
802 }
803
804 public final void enter(int imm16, int imm8) {
805 emitByte(0xC8);
806 emitShort(imm16);
807 emitByte(imm8);
808 }
809
810 public final void leave() {
811 emitByte(0xC9);
812 }
813
814 public final void lock() {
815 if ((AsmOptions.Atomics & 1) != 0) {
816 // Emit either nothing, a NOP, or a NOP: prefix
817 emitByte(0x90);
818 } else {
819 emitByte(0xF0);
820 }
821 }
822
823 // Emit mfence instruction
824 public final void mfence() {
825 emitByte(0x0F);
826 emitByte(0xAE);
827 emitByte(0xF0);
828 }
829
830 public final void mov(CiRegister dst, CiRegister src) {
831 movq(dst, src);
832 }
833
834 public final void movapd(CiRegister dst, CiRegister src) {
835 assert dst.isFpu();
836 assert src.isFpu();
837 int dstenc = dst.encoding;
838 int srcenc = src.encoding;
839 emitByte(0x66);
840 if (dstenc < 8) {
841 if (srcenc >= 8) {
842 emitByte(Prefix.REXB);
843 srcenc -= 8;
844 }
845 } else {
846 if (srcenc < 8) {
847 emitByte(Prefix.REXR);
848 } else {
849 emitByte(Prefix.REXRB);
850 srcenc -= 8;
851 }
852 dstenc -= 8;
853 }
854 emitByte(0x0F);
855 emitByte(0x28);
856 emitByte(0xC0 | dstenc << 3 | srcenc);
857 }
858
859 public final void movaps(CiRegister dst, CiRegister src) {
860 assert dst.isFpu();
861 assert src.isFpu();
862 int dstenc = dst.encoding;
863 int srcenc = src.encoding;
864 if (dstenc < 8) {
865 if (srcenc >= 8) {
866 emitByte(Prefix.REXB);
867 srcenc -= 8;
868 }
869 } else {
870 if (srcenc < 8) {
871 emitByte(Prefix.REXR);
872 } else {
873 emitByte(Prefix.REXRB);
874 srcenc -= 8;
875 }
876 dstenc -= 8;
877 }
878 emitByte(0x0F);
879 emitByte(0x28);
880 emitByte(0xC0 | dstenc << 3 | srcenc);
881 }
882
883 public final void movb(CiRegister dst, CiAddress src) {
884 prefix(src, dst); // , true)
885 emitByte(0x8A);
886 emitOperandHelper(dst, src);
887 }
888
889 public final void movb(CiAddress dst, int imm8) {
890 prefix(dst);
891 emitByte(0xC6);
892 emitOperandHelper(rax, dst);
893 emitByte(imm8);
894 }
895
896 public final void movb(CiAddress dst, CiRegister src) {
897 assert src.isByte() : "must have byte register";
898 prefix(dst, src); // , true)
899 emitByte(0x88);
900 emitOperandHelper(src, dst);
901 }
902
903 public final void movdl(CiRegister dst, CiRegister src) {
904 if (dst.isFpu()) {
905 assert !src.isFpu() : "does this hold?";
906 emitByte(0x66);
907 int encode = prefixAndEncode(dst.encoding, src.encoding);
908 emitByte(0x0F);
909 emitByte(0x6E);
910 emitByte(0xC0 | encode);
911 } else if (src.isFpu()) {
912 assert !dst.isFpu();
913 emitByte(0x66);
914 // swap src/dst to get correct prefix
915 int encode = prefixAndEncode(src.encoding, dst.encoding);
916 emitByte(0x0F);
917 emitByte(0x7E);
918 emitByte(0xC0 | encode);
919 }
920 }
921
922 public final void movdqa(CiRegister dst, CiAddress src) {
923 assert dst.isFpu();
924 emitByte(0x66);
925 prefix(src, dst);
926 emitByte(0x0F);
927 emitByte(0x6F);
928 emitOperandHelper(dst, src);
929 }
930
931 public final void movdqa(CiRegister dst, CiRegister src) {
932 assert dst.isFpu();
933 emitByte(0x66);
934 int encode = prefixqAndEncode(dst.encoding, src.encoding);
935 emitByte(0x0F);
936 emitByte(0x6F);
937 emitByte(0xC0 | encode);
938 }
939
940 public final void movdqa(CiAddress dst, CiRegister src) {
941 assert src.isFpu();
942 emitByte(0x66);
943 prefix(dst, src);
944 emitByte(0x0F);
945 emitByte(0x7F);
946 emitOperandHelper(src, dst);
947 }
948
949 public final void movdqu(CiRegister dst, CiAddress src) {
950 assert dst.isFpu();
951 emitByte(0xF3);
952 prefix(src, dst);
953 emitByte(0x0F);
954 emitByte(0x6F);
955 emitOperandHelper(dst, src);
956 }
957
958 public final void movdqu(CiRegister dst, CiRegister src) {
959 assert dst.isFpu();
960 assert src.isFpu();
961
962 emitByte(0xF3);
963 int encode = prefixqAndEncode(dst.encoding, src.encoding);
964 emitByte(0x0F);
965 emitByte(0x6F);
966 emitByte(0xC0 | encode);
967 }
968
969 public final void movdqu(CiAddress dst, CiRegister src) {
970 assert src.isFpu();
971
972 emitByte(0xF3);
973 prefix(dst, src);
974 emitByte(0x0F);
975 emitByte(0x7F);
976 emitOperandHelper(src, dst);
977 }
978
979 public final void movl(CiRegister dst, int imm32) {
980 int encode = prefixAndEncode(dst.encoding);
981 emitByte(0xB8 | encode);
982 emitInt(imm32);
983 }
984
985 public final void movl(CiRegister dst, CiRegister src) {
986 int encode = prefixAndEncode(dst.encoding, src.encoding);
987 emitByte(0x8B);
988 emitByte(0xC0 | encode);
989 }
990
991 public final void movl(CiRegister dst, CiAddress src) {
992 prefix(src, dst);
993 emitByte(0x8B);
994 emitOperandHelper(dst, src);
995 }
996
997 public final void movl(CiAddress dst, int imm32) {
998 prefix(dst);
999 emitByte(0xC7);
1000 emitOperandHelper(rax, dst);
1001 emitInt(imm32);
1002 }
1003
1004 public final void movl(CiAddress dst, CiRegister src) {
1005 prefix(dst, src);
1006 emitByte(0x89);
1007 emitOperandHelper(src, dst);
1008 }
1009
1010 /**
1011 * New CPUs require use of movsd and movss to avoid partial register stall
1012 * when loading from memory. But for old Opteron use movlpd instead of movsd.
1013 * The selection is done in {@link AMD64MacroAssembler#movdbl(CiRegister, CiAddress)}
1014 * and {@link AMD64MacroAssembler#movflt(CiRegister, CiRegister)}.
1015 */
1016 public final void movlpd(CiRegister dst, CiAddress src) {
1017 assert dst.isFpu();
1018 emitByte(0x66);
1019 prefix(src, dst);
1020 emitByte(0x0F);
1021 emitByte(0x12);
1022 emitOperandHelper(dst, src);
1023 }
1024
1025 public final void movlpd(CiAddress dst, CiRegister src) {
1026 assert src.isFpu();
1027 emitByte(0x66);
1028 prefix(dst, src);
1029 emitByte(0x0F);
1030 emitByte(0x13);
1031 emitOperandHelper(src, dst);
1032 }
1033
1034 public final void movq(CiRegister dst, CiAddress src) {
1035 if (dst.isFpu()) {
1036 emitByte(0xF3);
1037 prefixq(src, dst);
1038 emitByte(0x0F);
1039 emitByte(0x7E);
1040 emitOperandHelper(dst, src);
1041 } else {
1042 prefixq(src, dst);
1043 emitByte(0x8B);
1044 emitOperandHelper(dst, src);
1045 }
1046 }
1047
1048 public final void movq(CiRegister dst, CiRegister src) {
1049 int encode = prefixqAndEncode(dst.encoding, src.encoding);
1050 emitByte(0x8B);
1051 emitByte(0xC0 | encode);
1052 }
1053
1054 public final void movq(CiAddress dst, CiRegister src) {
1055 if (src.isFpu()) {
1056 emitByte(0x66);
1057 prefixq(dst, src);
1058 emitByte(0x0F);
1059 emitByte(0xD6);
1060 emitOperandHelper(src, dst);
1061 } else {
1062 prefixq(dst, src);
1063 emitByte(0x89);
1064 emitOperandHelper(src, dst);
1065 }
1066 }
1067
1068 public final void movsxb(CiRegister dst, CiAddress src) { // movsxb
1069 prefix(src, dst);
1070 emitByte(0x0F);
1071 emitByte(0xBE);
1072 emitOperandHelper(dst, src);
1073 }
1074
1075 public final void movsxb(CiRegister dst, CiRegister src) { // movsxb
1076 int encode = prefixAndEncode(dst.encoding, src.encoding, true);
1077 emitByte(0x0F);
1078 emitByte(0xBE);
1079 emitByte(0xC0 | encode);
1080 }
1081
1082 public final void movsd(CiRegister dst, CiRegister src) {
1083 assert dst.isFpu();
1084 assert src.isFpu();
1085 emitByte(0xF2);
1086 int encode = prefixAndEncode(dst.encoding, src.encoding);
1087 emitByte(0x0F);
1088 emitByte(0x10);
1089 emitByte(0xC0 | encode);
1090 }
1091
1092 public final void movsd(CiRegister dst, CiAddress src) {
1093 assert dst.isFpu();
1094 emitByte(0xF2);
1095 prefix(src, dst);
1096 emitByte(0x0F);
1097 emitByte(0x10);
1098 emitOperandHelper(dst, src);
1099 }
1100
1101 public final void movsd(CiAddress dst, CiRegister src) {
1102 assert src.isFpu();
1103 emitByte(0xF2);
1104 prefix(dst, src);
1105 emitByte(0x0F);
1106 emitByte(0x11);
1107 emitOperandHelper(src, dst);
1108 }
1109
1110 public final void movss(CiRegister dst, CiRegister src) {
1111 assert dst.isFpu();
1112 assert src.isFpu();
1113 emitByte(0xF3);
1114 int encode = prefixAndEncode(dst.encoding, src.encoding);
1115 emitByte(0x0F);
1116 emitByte(0x10);
1117 emitByte(0xC0 | encode);
1118 }
1119
1120 public final void movss(CiRegister dst, CiAddress src) {
1121 assert dst.isFpu();
1122 emitByte(0xF3);
1123 prefix(src, dst);
1124 emitByte(0x0F);
1125 emitByte(0x10);
1126 emitOperandHelper(dst, src);
1127 }
1128
1129 public final void movss(CiAddress dst, CiRegister src) {
1130 assert src.isFpu();
1131 emitByte(0xF3);
1132 prefix(dst, src);
1133 emitByte(0x0F);
1134 emitByte(0x11);
1135 emitOperandHelper(src, dst);
1136 }
1137
1138 public final void movswl(CiRegister dst, CiAddress src) {
1139 prefix(src, dst);
1140 emitByte(0x0F);
1141 emitByte(0xBF);
1142 emitOperandHelper(dst, src);
1143 }
1144
1145 public final void movsxw(CiRegister dst, CiRegister src) { // movsxw
1146 int encode = prefixAndEncode(dst.encoding, src.encoding);
1147 emitByte(0x0F);
1148 emitByte(0xBF);
1149 emitByte(0xC0 | encode);
1150 }
1151
1152 public final void movsxw(CiRegister dst, CiAddress src) { // movsxw
1153 prefix(src, dst);
1154 emitByte(0x0F);
1155 emitByte(0xBF);
1156 emitOperandHelper(dst, src);
1157 }
1158
1159 public final void movzxd(CiRegister dst, CiRegister src) { // movzxd
1160 int encode = prefixAndEncode(dst.encoding, src.encoding);
1161 emitByte(0x63);
1162 emitByte(0xC0 | encode);
1163 }
1164
1165 public final void movzxd(CiRegister dst, CiAddress src) { // movzxd
1166 prefix(src, dst);
1167 emitByte(0x63);
1168 emitOperandHelper(dst, src);
1169 }
1170
1171 public final void movw(CiAddress dst, int imm16) {
1172 emitByte(0x66); // switch to 16-bit mode
1173 prefix(dst);
1174 emitByte(0xC7);
1175 emitOperandHelper(rax, dst);
1176 emitShort(imm16);
1177 }
1178
1179 public final void movw(CiRegister dst, CiAddress src) {
1180 emitByte(0x66);
1181 prefix(src, dst);
1182 emitByte(0x8B);
1183 emitOperandHelper(dst, src);
1184 }
1185
1186 public final void movw(CiAddress dst, CiRegister src) {
1187 emitByte(0x66);
1188 prefix(dst, src);
1189 emitByte(0x89);
1190 emitOperandHelper(src, dst);
1191 }
1192
1193 public final void movzxb(CiRegister dst, CiAddress src) { // movzxb
1194 prefix(src, dst);
1195 emitByte(0x0F);
1196 emitByte(0xB6);
1197 emitOperandHelper(dst, src);
1198 }
1199
1200 public final void movzxb(CiRegister dst, CiRegister src) { // movzxb
1201 int encode = prefixAndEncode(dst.encoding, src.encoding, true);
1202 emitByte(0x0F);
1203 emitByte(0xB6);
1204 emitByte(0xC0 | encode);
1205 }
1206
1207 public final void movzxl(CiRegister dst, CiAddress src) { // movzxw
1208 prefix(src, dst);
1209 emitByte(0x0F);
1210 emitByte(0xB7);
1211 emitOperandHelper(dst, src);
1212 }
1213
1214 public final void movzxl(CiRegister dst, CiRegister src) { // movzxw
1215 int encode = prefixAndEncode(dst.encoding, src.encoding);
1216 emitByte(0x0F);
1217 emitByte(0xB7);
1218 emitByte(0xC0 | encode);
1219 }
1220
1221 public final void mull(CiAddress src) {
1222 prefix(src);
1223 emitByte(0xF7);
1224 emitOperandHelper(rsp, src);
1225 }
1226
1227 public final void mulsd(CiRegister dst, CiAddress src) {
1228 assert dst.isFpu();
1229 emitByte(0xF2);
1230 prefix(src, dst);
1231 emitByte(0x0F);
1232 emitByte(0x59);
1233 emitOperandHelper(dst, src);
1234 }
1235
1236 public final void mulsd(CiRegister dst, CiRegister src) {
1237 assert dst.isFpu();
1238 assert src.isFpu();
1239
1240 emitByte(0xF2);
1241 int encode = prefixAndEncode(dst.encoding, src.encoding);
1242 emitByte(0x0F);
1243 emitByte(0x59);
1244 emitByte(0xC0 | encode);
1245 }
1246
1247 public final void mulss(CiRegister dst, CiAddress src) {
1248 assert dst.isFpu();
1249
1250 emitByte(0xF3);
1251 prefix(src, dst);
1252 emitByte(0x0F);
1253 emitByte(0x59);
1254 emitOperandHelper(dst, src);
1255 }
1256
1257 public final void mulss(CiRegister dst, CiRegister src) {
1258 assert dst.isFpu();
1259 assert src.isFpu();
1260 emitByte(0xF3);
1261 int encode = prefixAndEncode(dst.encoding, src.encoding);
1262 emitByte(0x0F);
1263 emitByte(0x59);
1264 emitByte(0xC0 | encode);
1265 }
1266
1267 public final void negl(CiRegister dst) {
1268 int encode = prefixAndEncode(dst.encoding);
1269 emitByte(0xF7);
1270 emitByte(0xD8 | encode);
1271 }
1272
1273 public final void ensureUniquePC() {
1274 nop();
1275 }
1276
1277 public final void nop() {
1278 nop(1);
1279 }
1280
1281 public void nop(int i) {
1282 if (AsmOptions.UseNormalNop) {
1283 assert i > 0 : " ";
1284 // The fancy nops aren't currently recognized by debuggers making it a
1285 // pain to disassemble code while debugging. If assert are on clearly
1286 // speed is not an issue so simply use the single byte traditional nop
1287 // to do alignment.
1288
1289 for (; i > 0; i--) {
1290 emitByte(0x90);
1291 }
1292 return;
1293 }
1294
1295 if (AsmOptions.UseAddressNop) {
1296 //
1297 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD.
1298 // 1: 0x90
1299 // 2: 0x66 0x90
1300 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
1301 // 4: 0x0F 0x1F 0x40 0x00
1302 // 5: 0x0F 0x1F 0x44 0x00 0x00
1303 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
1304 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1305 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1306 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1307 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1308 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1309
1310 // The rest coding is AMD specific - use consecutive Address nops
1311
1312 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
1313 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00
1314 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1315 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
1316 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
1317 // Size prefixes (0x66) are added for larger sizes
1318
1319 while (i >= 22) {
1320 i -= 11;
1321 emitByte(0x66); // size prefix
1322 emitByte(0x66); // size prefix
1323 emitByte(0x66); // size prefix
1324 addrNop8();
1325 }
1326 // Generate first nop for size between 21-12
1327 switch (i) {
1328 case 21:
1329 i -= 1;
1330 emitByte(0x66); // size prefix
1331 // fall through
1332 case 20:
1333 // fall through
1334 case 19:
1335 i -= 1;
1336 emitByte(0x66); // size prefix
1337 // fall through
1338 case 18:
1339 // fall through
1340 case 17:
1341 i -= 1;
1342 emitByte(0x66); // size prefix
1343 // fall through
1344 case 16:
1345 // fall through
1346 case 15:
1347 i -= 8;
1348 addrNop8();
1349 break;
1350 case 14:
1351 case 13:
1352 i -= 7;
1353 addrNop7();
1354 break;
1355 case 12:
1356 i -= 6;
1357 emitByte(0x66); // size prefix
1358 addrNop5();
1359 break;
1360 default:
1361 assert i < 12;
1362 }
1363
1364 // Generate second nop for size between 11-1
1365 switch (i) {
1366 case 11:
1367 emitByte(0x66); // size prefix
1368 emitByte(0x66); // size prefix
1369 emitByte(0x66); // size prefix
1370 addrNop8();
1371 break;
1372 case 10:
1373 emitByte(0x66); // size prefix
1374 emitByte(0x66); // size prefix
1375 addrNop8();
1376 break;
1377 case 9:
1378 emitByte(0x66); // size prefix
1379 addrNop8();
1380 break;
1381 case 8:
1382 addrNop8();
1383 break;
1384 case 7:
1385 addrNop7();
1386 break;
1387 case 6:
1388 emitByte(0x66); // size prefix
1389 addrNop5();
1390 break;
1391 case 5:
1392 addrNop5();
1393 break;
1394 case 4:
1395 addrNop4();
1396 break;
1397 case 3:
1398 // Don't use "0x0F 0x1F 0x00" - need patching safe padding
1399 emitByte(0x66); // size prefix
1400 emitByte(0x66); // size prefix
1401 emitByte(0x90); // nop
1402 break;
1403 case 2:
1404 emitByte(0x66); // size prefix
1405 emitByte(0x90); // nop
1406 break;
1407 case 1:
1408 emitByte(0x90); // nop
1409 break;
1410 default:
1411 assert i == 0;
1412 }
1413 return;
1414 }
1415
1416 // Using nops with size prefixes "0x66 0x90".
1417 // From AMD Optimization Guide:
1418 // 1: 0x90
1419 // 2: 0x66 0x90
1420 // 3: 0x66 0x66 0x90
1421 // 4: 0x66 0x66 0x66 0x90
1422 // 5: 0x66 0x66 0x90 0x66 0x90
1423 // 6: 0x66 0x66 0x90 0x66 0x66 0x90
1424 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90
1425 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90
1426 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
1427 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90
1428 //
1429 while (i > 12) {
1430 i -= 4;
1431 emitByte(0x66); // size prefix
1432 emitByte(0x66);
1433 emitByte(0x66);
1434 emitByte(0x90); // nop
1435 }
1436 // 1 - 12 nops
1437 if (i > 8) {
1438 if (i > 9) {
1439 i -= 1;
1440 emitByte(0x66);
1441 }
1442 i -= 3;
1443 emitByte(0x66);
1444 emitByte(0x66);
1445 emitByte(0x90);
1446 }
1447 // 1 - 8 nops
1448 if (i > 4) {
1449 if (i > 6) {
1450 i -= 1;
1451 emitByte(0x66);
1452 }
1453 i -= 3;
1454 emitByte(0x66);
1455 emitByte(0x66);
1456 emitByte(0x90);
1457 }
1458 switch (i) {
1459 case 4:
1460 emitByte(0x66);
1461 emitByte(0x66);
1462 emitByte(0x66);
1463 emitByte(0x90);
1464 break;
1465 case 3:
1466 emitByte(0x66);
1467 emitByte(0x66);
1468 emitByte(0x90);
1469 break;
1470 case 2:
1471 emitByte(0x66);
1472 emitByte(0x90);
1473 break;
1474 case 1:
1475 emitByte(0x90);
1476 break;
1477 default:
1478 assert i == 0;
1479 }
1480 }
1481
1482 public final void notl(CiRegister dst) {
1483 int encode = prefixAndEncode(dst.encoding);
1484 emitByte(0xF7);
1485 emitByte(0xD0 | encode);
1486 }
1487
1488 public final void orl(CiAddress dst, int imm32) {
1489 prefix(dst);
1490 emitByte(0x81);
1491 emitOperandHelper(rcx, dst);
1492 emitInt(imm32);
1493 }
1494
1495 public final void orl(CiRegister dst, int imm32) {
1496 prefix(dst);
1497 emitArith(0x81, 0xC8, dst, imm32);
1498 }
1499
1500 public final void orl(CiRegister dst, CiAddress src) {
1501 prefix(src, dst);
1502 emitByte(0x0B);
1503 emitOperandHelper(dst, src);
1504 }
1505
1506 public final void orl(CiRegister dst, CiRegister src) {
1507 prefixAndEncode(dst.encoding, src.encoding);
1508 emitArith(0x0B, 0xC0, dst, src);
1509 }
1510
1511 // generic
1512 public final void pop(CiRegister dst) {
1513 int encode = prefixAndEncode(dst.encoding);
1514 emitByte(0x58 | encode);
1515 }
1516
1517 public final void popl(CiAddress dst) {
1518 // NOTE: this will adjust stack by 8byte on 64bits
1519 prefix(dst);
1520 emitByte(0x8F);
1521 emitOperandHelper(rax, dst);
1522 }
1523
1524 public final void prefetchPrefix(CiAddress src) {
1525 prefix(src);
1526 emitByte(0x0F);
1527 }
1528
1529 public final void prefetchnta(CiAddress src) {
1530 prefetchPrefix(src);
1531 emitByte(0x18);
1532 emitOperandHelper(rax, src); // 0, src
1533 }
1534
1535 public final void prefetchr(CiAddress src) {
1536 prefetchPrefix(src);
1537 emitByte(0x0D);
1538 emitOperandHelper(rax, src); // 0, src
1539 }
1540
1541 public final void prefetcht0(CiAddress src) {
1542 prefetchPrefix(src);
1543 emitByte(0x18);
1544 emitOperandHelper(rcx, src); // 1, src
1545
1546 }
1547
1548 public final void prefetcht1(CiAddress src) {
1549 prefetchPrefix(src);
1550 emitByte(0x18);
1551 emitOperandHelper(rdx, src); // 2, src
1552 }
1553
1554 public final void prefetcht2(CiAddress src) {
1555 prefetchPrefix(src);
1556 emitByte(0x18);
1557 emitOperandHelper(rbx, src); // 3, src
1558 }
1559
1560 public final void prefetchw(CiAddress src) {
1561 prefetchPrefix(src);
1562 emitByte(0x0D);
1563 emitOperandHelper(rcx, src); // 1, src
1564 }
1565
1566 public final void pshufd(CiRegister dst, CiRegister src, int mode) {
1567 assert dst.isFpu();
1568 assert src.isFpu();
1569 assert isUByte(mode) : "invalid value";
1570
1571 emitByte(0x66);
1572 int encode = prefixAndEncode(dst.encoding, src.encoding);
1573 emitByte(0x0F);
1574 emitByte(0x70);
1575 emitByte(0xC0 | encode);
1576 emitByte(mode & 0xFF);
1577 }
1578
1579 public final void pshufd(CiRegister dst, CiAddress src, int mode) {
1580 assert dst.isFpu();
1581 assert isUByte(mode) : "invalid value";
1582
1583 emitByte(0x66);
1584 prefix(src, dst);
1585 emitByte(0x0F);
1586 emitByte(0x70);
1587 emitOperandHelper(dst, src);
1588 emitByte(mode & 0xFF);
1589
1590 }
1591
1592 public final void pshuflw(CiRegister dst, CiRegister src, int mode) {
1593 assert dst.isFpu();
1594 assert src.isFpu();
1595 assert isUByte(mode) : "invalid value";
1596
1597 emitByte(0xF2);
1598 int encode = prefixAndEncode(dst.encoding, src.encoding);
1599 emitByte(0x0F);
1600 emitByte(0x70);
1601 emitByte(0xC0 | encode);
1602 emitByte(mode & 0xFF);
1603 }
1604
1605 public final void pshuflw(CiRegister dst, CiAddress src, int mode) {
1606 assert dst.isFpu();
1607 assert isUByte(mode) : "invalid value";
1608
1609 emitByte(0xF2);
1610 prefix(src, dst); // QQ new
1611 emitByte(0x0F);
1612 emitByte(0x70);
1613 emitOperandHelper(dst, src);
1614 emitByte(mode & 0xFF);
1615 }
1616
1617 public final void psrlq(CiRegister dst, int shift) {
1618 assert dst.isFpu();
1619 // HMM Table D-1 says sse2 or mmx
1620
1621 int encode = prefixqAndEncode(xmm2.encoding, dst.encoding);
1622 emitByte(0x66);
1623 emitByte(0x0F);
1624 emitByte(0x73);
1625 emitByte(0xC0 | encode);
1626 emitByte(shift);
1627 }
1628
1629 public final void punpcklbw(CiRegister dst, CiRegister src) {
1630 assert dst.isFpu();
1631 assert src.isFpu();
1632 emitByte(0x66);
1633 int encode = prefixAndEncode(dst.encoding, src.encoding);
1634 emitByte(0x0F);
1635 emitByte(0x60);
1636 emitByte(0xC0 | encode);
1637 }
1638
1639 public final void push(int imm32) {
1640 // in 64bits we push 64bits onto the stack but only
1641 // take a 32bit immediate
1642 emitByte(0x68);
1643 emitInt(imm32);
1644 }
1645
1646 public final void push(CiRegister src) {
1647 int encode = prefixAndEncode(src.encoding);
1648 emitByte(0x50 | encode);
1649 }
1650
1651 public final void pushf() {
1652 emitByte(0x9C);
1653 }
1654
1655 public final void pushl(CiAddress src) {
1656 // Note this will push 64bit on 64bit
1657 prefix(src);
1658 emitByte(0xFF);
1659 emitOperandHelper(rsi, src);
1660 }
1661
1662 public final void pxor(CiRegister dst, CiAddress src) {
1663 assert dst.isFpu();
1664
1665 emitByte(0x66);
1666 prefix(src, dst);
1667 emitByte(0x0F);
1668 emitByte(0xEF);
1669 emitOperandHelper(dst, src);
1670 }
1671
1672 public final void pxor(CiRegister dst, CiRegister src) {
1673 assert dst.isFpu();
1674 assert src.isFpu();
1675
1676 emitByte(0x66);
1677 int encode = prefixAndEncode(dst.encoding, src.encoding);
1678 emitByte(0x0F);
1679 emitByte(0xEF);
1680 emitByte(0xC0 | encode);
1681
1682 }
1683
1684 public final void rcll(CiRegister dst, int imm8) {
1685 assert isShiftCount(imm8) : "illegal shift count";
1686 int encode = prefixAndEncode(dst.encoding);
1687 if (imm8 == 1) {
1688 emitByte(0xD1);
1689 emitByte(0xD0 | encode);
1690 } else {
1691 emitByte(0xC1);
1692 emitByte(0xD0 | encode);
1693 emitByte(imm8);
1694 }
1695 }
1696
1697 public final void pause() {
1698 emitByte(0xF3);
1699 emitByte(0x90);
1700 }
1701
1702 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx heap words.
1703 public final void repeatMoveWords() {
1704 emitByte(0xF3);
1705 emitByte(Prefix.REXW);
1706 emitByte(0xA5);
1707 }
1708
1709 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx bytes.
1710 public final void repeatMoveBytes() {
1711 emitByte(0xF3);
1712 emitByte(Prefix.REXW);
1713 emitByte(0xA4);
1714 }
1715
1716 // sets X86.rcx pointer sized words with X86.rax, value at [edi]
1717 // generic
1718 public final void repSet() { // repSet
1719 emitByte(0xF3);
1720 // STOSQ
1721 emitByte(Prefix.REXW);
1722 emitByte(0xAB);
1723 }
1724
1725 // scans X86.rcx pointer sized words at [edi] for occurance of X86.rax,
1726 // generic
1727 public final void repneScan() { // repneScan
1728 emitByte(0xF2);
1729 // SCASQ
1730 emitByte(Prefix.REXW);
1731 emitByte(0xAF);
1732 }
1733
1734 // scans X86.rcx 4 byte words at [edi] for occurance of X86.rax,
1735 // generic
1736 public final void repneScanl() { // repneScan
1737 emitByte(0xF2);
1738 // SCASL
1739 emitByte(0xAF);
1740 }
1741
1742 public final void ret(int imm16) {
1743 if (imm16 == 0) {
1744 emitByte(0xC3);
1745 } else {
1746 emitByte(0xC2);
1747 emitShort(imm16);
1748 }
1749 }
1750
1751 public final void sarl(CiRegister dst, int imm8) {
1752 int encode = prefixAndEncode(dst.encoding);
1753 assert isShiftCount(imm8) : "illegal shift count";
1754 if (imm8 == 1) {
1755 emitByte(0xD1);
1756 emitByte(0xF8 | encode);
1757 } else {
1758 emitByte(0xC1);
1759 emitByte(0xF8 | encode);
1760 emitByte(imm8);
1761 }
1762 }
1763
1764 public final void sarl(CiRegister dst) {
1765 int encode = prefixAndEncode(dst.encoding);
1766 emitByte(0xD3);
1767 emitByte(0xF8 | encode);
1768 }
1769
1770 public final void sbbl(CiAddress dst, int imm32) {
1771 prefix(dst);
1772 emitArithOperand(0x81, rbx, dst, imm32);
1773 }
1774
1775 public final void sbbl(CiRegister dst, int imm32) {
1776 prefix(dst);
1777 emitArith(0x81, 0xD8, dst, imm32);
1778 }
1779
1780 public final void sbbl(CiRegister dst, CiAddress src) {
1781 prefix(src, dst);
1782 emitByte(0x1B);
1783 emitOperandHelper(dst, src);
1784 }
1785
1786 public final void sbbl(CiRegister dst, CiRegister src) {
1787 prefixAndEncode(dst.encoding, src.encoding);
1788 emitArith(0x1B, 0xC0, dst, src);
1789 }
1790
1791 public final void setb(ConditionFlag cc, CiRegister dst) {
1792 assert 0 <= cc.value && cc.value < 16 : "illegal cc";
1793 int encode = prefixAndEncode(dst.encoding, true);
1794 emitByte(0x0F);
1795 emitByte(0x90 | cc.value);
1796 emitByte(0xC0 | encode);
1797 }
1798
1799 public final void shll(CiRegister dst, int imm8) {
1800 assert isShiftCount(imm8) : "illegal shift count";
1801 int encode = prefixAndEncode(dst.encoding);
1802 if (imm8 == 1) {
1803 emitByte(0xD1);
1804 emitByte(0xE0 | encode);
1805 } else {
1806 emitByte(0xC1);
1807 emitByte(0xE0 | encode);
1808 emitByte(imm8);
1809 }
1810 }
1811
1812 public final void shll(CiRegister dst) {
1813 int encode = prefixAndEncode(dst.encoding);
1814 emitByte(0xD3);
1815 emitByte(0xE0 | encode);
1816 }
1817
1818 public final void shrl(CiRegister dst, int imm8) {
1819 assert isShiftCount(imm8) : "illegal shift count";
1820 int encode = prefixAndEncode(dst.encoding);
1821 emitByte(0xC1);
1822 emitByte(0xE8 | encode);
1823 emitByte(imm8);
1824 }
1825
1826 public final void shrl(CiRegister dst) {
1827 int encode = prefixAndEncode(dst.encoding);
1828 emitByte(0xD3);
1829 emitByte(0xE8 | encode);
1830 }
1831
1832 // copies a single word from [esi] to [edi]
1833 public final void smovl() {
1834 emitByte(0xA5);
1835 }
1836
1837 public final void sqrtsd(CiRegister dst, CiRegister src) {
1838 assert dst.isFpu();
1839 assert src.isFpu();
1840 // HMM Table D-1 says sse2
1841 // assert is64 || target.supportsSSE();
1842 emitByte(0xF2);
1843 int encode = prefixAndEncode(dst.encoding, src.encoding);
1844 emitByte(0x0F);
1845 emitByte(0x51);
1846 emitByte(0xC0 | encode);
1847 }
1848
1849 public final void subl(CiAddress dst, int imm32) {
1850 prefix(dst);
1851 if (isByte(imm32)) {
1852 emitByte(0x83);
1853 emitOperandHelper(rbp, dst);
1854 emitByte(imm32 & 0xFF);
1855 } else {
1856 emitByte(0x81);
1857 emitOperandHelper(rbp, dst);
1858 emitInt(imm32);
1859 }
1860 }
1861
1862 public final void subl(CiRegister dst, int imm32) {
1863 prefix(dst);
1864 emitArith(0x81, 0xE8, dst, imm32);
1865 }
1866
1867 public final void subl(CiAddress dst, CiRegister src) {
1868 prefix(dst, src);
1869 emitByte(0x29);
1870 emitOperandHelper(src, dst);
1871 }
1872
1873 public final void subl(CiRegister dst, CiAddress src) {
1874 prefix(src, dst);
1875 emitByte(0x2B);
1876 emitOperandHelper(dst, src);
1877 }
1878
1879 public final void subl(CiRegister dst, CiRegister src) {
1880 prefixAndEncode(dst.encoding, src.encoding);
1881 emitArith(0x2B, 0xC0, dst, src);
1882 }
1883
1884 public final void subsd(CiRegister dst, CiRegister src) {
1885 assert dst.isFpu();
1886 assert src.isFpu();
1887 emitByte(0xF2);
1888 int encode = prefixAndEncode(dst.encoding, src.encoding);
1889 emitByte(0x0F);
1890 emitByte(0x5C);
1891 emitByte(0xC0 | encode);
1892 }
1893
1894 public final void subsd(CiRegister dst, CiAddress src) {
1895 assert dst.isFpu();
1896
1897 emitByte(0xF2);
1898 prefix(src, dst);
1899 emitByte(0x0F);
1900 emitByte(0x5C);
1901 emitOperandHelper(dst, src);
1902 }
1903
1904 public final void subss(CiRegister dst, CiRegister src) {
1905 assert dst.isFpu();
1906 assert src.isFpu();
1907 emitByte(0xF3);
1908 int encode = prefixAndEncode(dst.encoding, src.encoding);
1909 emitByte(0x0F);
1910 emitByte(0x5C);
1911 emitByte(0xC0 | encode);
1912 }
1913
1914 public final void subss(CiRegister dst, CiAddress src) {
1915 assert dst.isFpu();
1916
1917 emitByte(0xF3);
1918 prefix(src, dst);
1919 emitByte(0x0F);
1920 emitByte(0x5C);
1921 emitOperandHelper(dst, src);
1922 }
1923
1924 public final void testb(CiRegister dst, int imm8) {
1925 prefixAndEncode(dst.encoding, true);
1926 emitArithB(0xF6, 0xC0, dst, imm8);
1927 }
1928
1929 public final void testl(CiRegister dst, int imm32) {
1930 // not using emitArith because test
1931 // doesn't support sign-extension of
1932 // 8bit operands
1933 int encode = dst.encoding;
1934 if (encode == 0) {
1935 emitByte(0xA9);
1936 } else {
1937 encode = prefixAndEncode(encode);
1938 emitByte(0xF7);
1939 emitByte(0xC0 | encode);
1940 }
1941 emitInt(imm32);
1942 }
1943
1944 public final void testl(CiRegister dst, CiRegister src) {
1945 prefixAndEncode(dst.encoding, src.encoding);
1946 emitArith(0x85, 0xC0, dst, src);
1947 }
1948
1949 public final void testl(CiRegister dst, CiAddress src) {
1950 prefix(src, dst);
1951 emitByte(0x85);
1952 emitOperandHelper(dst, src);
1953 }
1954
1955 public final void ucomisd(CiRegister dst, CiAddress src) {
1956 assert dst.isFpu();
1957 emitByte(0x66);
1958 ucomiss(dst, src);
1959 }
1960
1961 public final void ucomisd(CiRegister dst, CiRegister src) {
1962 assert dst.isFpu();
1963 assert src.isFpu();
1964 emitByte(0x66);
1965 ucomiss(dst, src);
1966 }
1967
1968 public final void ucomiss(CiRegister dst, CiAddress src) {
1969 assert dst.isFpu();
1970
1971 prefix(src, dst);
1972 emitByte(0x0F);
1973 emitByte(0x2E);
1974 emitOperandHelper(dst, src);
1975 }
1976
1977 public final void ucomiss(CiRegister dst, CiRegister src) {
1978 assert dst.isFpu();
1979 assert src.isFpu();
1980 int encode = prefixAndEncode(dst.encoding, src.encoding);
1981 emitByte(0x0F);
1982 emitByte(0x2E);
1983 emitByte(0xC0 | encode);
1984 }
1985
1986 public final void xaddl(CiAddress dst, CiRegister src) {
1987 assert src.isFpu();
1988
1989 prefix(dst, src);
1990 emitByte(0x0F);
1991 emitByte(0xC1);
1992 emitOperandHelper(src, dst);
1993 }
1994
1995 public final void xchgl(CiRegister dst, CiAddress src) { // xchg
1996 prefix(src, dst);
1997 emitByte(0x87);
1998 emitOperandHelper(dst, src);
1999 }
2000
2001 public final void xchgl(CiRegister dst, CiRegister src) {
2002 int encode = prefixAndEncode(dst.encoding, src.encoding);
2003 emitByte(0x87);
2004 emitByte(0xc0 | encode);
2005 }
2006
2007 public final void xorl(CiRegister dst, int imm32) {
2008 prefix(dst);
2009 emitArith(0x81, 0xF0, dst, imm32);
2010 }
2011
2012 public final void xorl(CiRegister dst, CiAddress src) {
2013 prefix(src, dst);
2014 emitByte(0x33);
2015 emitOperandHelper(dst, src);
2016 }
2017
2018 public final void xorl(CiRegister dst, CiRegister src) {
2019 prefixAndEncode(dst.encoding, src.encoding);
2020 emitArith(0x33, 0xC0, dst, src);
2021 }
2022
2023 public final void andpd(CiRegister dst, CiRegister src) {
2024 emitByte(0x66);
2025 andps(dst, src);
2026 }
2027
2028 public final void andpd(CiRegister dst, CiAddress src) {
2029 emitByte(0x66);
2030 andps(dst, src);
2031 }
2032
2033 public final void andps(CiRegister dst, CiRegister src) {
2034 assert dst.isFpu() && src.isFpu();
2035 int encode = prefixAndEncode(dst.encoding, src.encoding);
2036 emitByte(0x0F);
2037 emitByte(0x54);
2038 emitByte(0xC0 | encode);
2039 }
2040
2041 public final void andps(CiRegister dst, CiAddress src) {
2042 assert dst.isFpu();
2043 prefix(src, dst);
2044 emitByte(0x0F);
2045 emitByte(0x54);
2046 emitOperandHelper(dst, src);
2047 }
2048
2049 public final void orpd(CiRegister dst, CiRegister src) {
2050 emitByte(0x66);
2051 orps(dst, src);
2052 }
2053
2054 public final void orpd(CiRegister dst, CiAddress src) {
2055 emitByte(0x66);
2056 orps(dst, src);
2057 }
2058
2059 public final void orps(CiRegister dst, CiRegister src) {
2060 assert dst.isFpu() && src.isFpu();
2061 int encode = prefixAndEncode(dst.encoding, src.encoding);
2062 emitByte(0x0F);
2063 emitByte(0x56);
2064 emitByte(0xC0 | encode);
2065 }
2066
2067 public final void orps(CiRegister dst, CiAddress src) {
2068 assert dst.isFpu();
2069 prefix(src, dst);
2070 emitByte(0x0F);
2071 emitByte(0x56);
2072 emitOperandHelper(dst, src);
2073 }
2074
2075 public final void xorpd(CiRegister dst, CiRegister src) {
2076 emitByte(0x66);
2077 xorps(dst, src);
2078 }
2079
2080 public final void xorpd(CiRegister dst, CiAddress src) {
2081 emitByte(0x66);
2082 xorps(dst, src);
2083 }
2084
2085 public final void xorps(CiRegister dst, CiRegister src) {
2086 assert dst.isFpu() && src.isFpu();
2087 int encode = prefixAndEncode(dst.encoding, src.encoding);
2088 emitByte(0x0F);
2089 emitByte(0x57);
2090 emitByte(0xC0 | encode);
2091 }
2092
2093 public final void xorps(CiRegister dst, CiAddress src) {
2094 assert dst.isFpu();
2095 prefix(src, dst);
2096 emitByte(0x0F);
2097 emitByte(0x57);
2098 emitOperandHelper(dst, src);
2099 }
2100
2101 // 32bit only pieces of the assembler
2102
2103 public final void decl(CiRegister dst) {
2104 // Don't use it directly. Use Macrodecrementl() instead.
2105 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)
2106 int encode = prefixAndEncode(dst.encoding);
2107 emitByte(0xFF);
2108 emitByte(0xC8 | encode);
2109 }
2110
2111 public final void incl(CiRegister dst) {
2112 // Don't use it directly. Use Macroincrementl() instead.
2113 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2114 int encode = prefixAndEncode(dst.encoding);
2115 emitByte(0xFF);
2116 emitByte(0xC0 | encode);
2117 }
2118
2119 int prefixAndEncode(int regEnc) {
2120 return prefixAndEncode(regEnc, false);
2121 }
2122
2123 int prefixAndEncode(int regEnc, boolean byteinst) {
2124 if (regEnc >= 8) {
2125 emitByte(Prefix.REXB);
2126 regEnc -= 8;
2127 } else if (byteinst && regEnc >= 4) {
2128 emitByte(Prefix.REX);
2129 }
2130 return regEnc;
2131 }
2132
2133 int prefixqAndEncode(int regEnc) {
2134 if (regEnc < 8) {
2135 emitByte(Prefix.REXW);
2136 } else {
2137 emitByte(Prefix.REXWB);
2138 regEnc -= 8;
2139 }
2140 return regEnc;
2141 }
2142
2143 int prefixAndEncode(int dstEnc, int srcEnc) {
2144 return prefixAndEncode(dstEnc, srcEnc, false);
2145 }
2146
2147 int prefixAndEncode(int dstEnc, int srcEnc, boolean byteinst) {
2148 if (dstEnc < 8) {
2149 if (srcEnc >= 8) {
2150 emitByte(Prefix.REXB);
2151 srcEnc -= 8;
2152 } else if (byteinst && srcEnc >= 4) {
2153 emitByte(Prefix.REX);
2154 }
2155 } else {
2156 if (srcEnc < 8) {
2157 emitByte(Prefix.REXR);
2158 } else {
2159 emitByte(Prefix.REXRB);
2160 srcEnc -= 8;
2161 }
2162 dstEnc -= 8;
2163 }
2164 return dstEnc << 3 | srcEnc;
2165 }
2166
2167 /**
2168 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand prefix. If the given
2169 * operands exceed 3 bits, the 4th bit is encoded in the prefix.
2170 *
2171 * @param regEnc the encoding of the register part of the ModRM-Byte
2172 * @param rmEnc the encoding of the r/m part of the ModRM-Byte
2173 * @return the lower 6 bits of the ModRM-Byte that should be emitted
2174 */
2175 private int prefixqAndEncode(int regEnc, int rmEnc) {
2176 if (regEnc < 8) {
2177 if (rmEnc < 8) {
2178 emitByte(Prefix.REXW);
2179 } else {
2180 emitByte(Prefix.REXWB);
2181 rmEnc -= 8;
2182 }
2183 } else {
2184 if (rmEnc < 8) {
2185 emitByte(Prefix.REXWR);
2186 } else {
2187 emitByte(Prefix.REXWRB);
2188 rmEnc -= 8;
2189 }
2190 regEnc -= 8;
2191 }
2192 return regEnc << 3 | rmEnc;
2193 }
2194
2195 private void prefix(CiRegister reg) {
2196 if (reg.encoding >= 8) {
2197 emitByte(Prefix.REXB);
2198 }
2199 }
2200
2201 private void prefix(CiAddress adr) {
2202 if (adr.base().encoding >= MinEncodingNeedsRex) {
2203 if (adr.index().encoding >= MinEncodingNeedsRex) {
2204 emitByte(Prefix.REXXB);
2205 } else {
2206 emitByte(Prefix.REXB);
2207 }
2208 } else {
2209 if (adr.index().encoding >= MinEncodingNeedsRex) {
2210 emitByte(Prefix.REXX);
2211 }
2212 }
2213 }
2214
2215 private void prefixq(CiAddress adr) {
2216 if (adr.base().encoding >= MinEncodingNeedsRex) {
2217 if (adr.index().encoding >= MinEncodingNeedsRex) {
2218 emitByte(Prefix.REXWXB);
2219 } else {
2220 emitByte(Prefix.REXWB);
2221 }
2222 } else {
2223 if (adr.index().encoding >= MinEncodingNeedsRex) {
2224 emitByte(Prefix.REXWX);
2225 } else {
2226 emitByte(Prefix.REXW);
2227 }
2228 }
2229 }
2230
2231 private void prefix(CiAddress adr, CiRegister reg) {
2232 if (reg.encoding < 8) {
2233 if (adr.base().encoding >= MinEncodingNeedsRex) {
2234 if (adr.index().encoding >= MinEncodingNeedsRex) {
2235 emitByte(Prefix.REXXB);
2236 } else {
2237 emitByte(Prefix.REXB);
2238 }
2239 } else {
2240 if (adr.index().encoding >= MinEncodingNeedsRex) {
2241 emitByte(Prefix.REXX);
2242 } else if (reg.encoding >= 4) {
2243 emitByte(Prefix.REX);
2244 }
2245 }
2246 } else {
2247 if (adr.base().encoding >= MinEncodingNeedsRex) {
2248 if (adr.index().encoding >= MinEncodingNeedsRex) {
2249 emitByte(Prefix.REXRXB);
2250 } else {
2251 emitByte(Prefix.REXRB);
2252 }
2253 } else {
2254 if (adr.index().encoding >= MinEncodingNeedsRex) {
2255 emitByte(Prefix.REXRX);
2256 } else {
2257 emitByte(Prefix.REXR);
2258 }
2259 }
2260 }
2261 }
2262
2263 private void prefixq(CiAddress adr, CiRegister src) {
2264 if (src.encoding < 8) {
2265 if (adr.base().encoding >= MinEncodingNeedsRex) {
2266 if (adr.index().encoding >= MinEncodingNeedsRex) {
2267 emitByte(Prefix.REXWXB);
2268 } else {
2269 emitByte(Prefix.REXWB);
2270 }
2271 } else {
2272 if (adr.index().encoding >= MinEncodingNeedsRex) {
2273 emitByte(Prefix.REXWX);
2274 } else {
2275 emitByte(Prefix.REXW);
2276 }
2277 }
2278 } else {
2279 if (adr.base().encoding >= MinEncodingNeedsRex) {
2280 if (adr.index().encoding >= MinEncodingNeedsRex) {
2281 emitByte(Prefix.REXWRXB);
2282 } else {
2283 emitByte(Prefix.REXWRB);
2284 }
2285 } else {
2286 if (adr.index().encoding >= MinEncodingNeedsRex) {
2287 emitByte(Prefix.REXWRX);
2288 } else {
2289 emitByte(Prefix.REXWR);
2290 }
2291 }
2292 }
2293 }
2294
2295 public final void addq(CiAddress dst, int imm32) {
2296 prefixq(dst);
2297 emitArithOperand(0x81, rax, dst, imm32);
2298 }
2299
2300 public final void addq(CiAddress dst, CiRegister src) {
2301 prefixq(dst, src);
2302 emitByte(0x01);
2303 emitOperandHelper(src, dst);
2304 }
2305
2306 public final void addq(CiRegister dst, int imm32) {
2307 prefixqAndEncode(dst.encoding);
2308 emitArith(0x81, 0xC0, dst, imm32);
2309 }
2310
2311 public final void addq(CiRegister dst, CiAddress src) {
2312 prefixq(src, dst);
2313 emitByte(0x03);
2314 emitOperandHelper(dst, src);
2315 }
2316
2317 public final void addq(CiRegister dst, CiRegister src) {
2318 prefixqAndEncode(dst.encoding, src.encoding);
2319 emitArith(0x03, 0xC0, dst, src);
2320 }
2321
2322 public final void andq(CiRegister dst, int imm32) {
2323 prefixqAndEncode(dst.encoding);
2324 emitArith(0x81, 0xE0, dst, imm32);
2325 }
2326
2327 public final void andq(CiRegister dst, CiAddress src) {
2328 prefixq(src, dst);
2329 emitByte(0x23);
2330 emitOperandHelper(dst, src);
2331 }
2332
2333 public final void andq(CiRegister dst, CiRegister src) {
2334 prefixqAndEncode(dst.encoding, src.encoding);
2335 emitArith(0x23, 0xC0, dst, src);
2336 }
2337
2338 public final void bswapq(CiRegister reg) {
2339 int encode = prefixqAndEncode(reg.encoding);
2340 emitByte(0x0F);
2341 emitByte(0xC8 | encode);
2342 }
2343
2344 public final void cdqq() {
2345 emitByte(Prefix.REXW);
2346 emitByte(0x99);
2347 }
2348
2349 public final void cmovq(ConditionFlag cc, CiRegister dst, CiRegister src) {
2350 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2351 emitByte(0x0F);
2352 emitByte(0x40 | cc.value);
2353 emitByte(0xC0 | encode);
2354 }
2355
2356 public final void cmovq(ConditionFlag cc, CiRegister dst, CiAddress src) {
2357 prefixq(src, dst);
2358 emitByte(0x0F);
2359 emitByte(0x40 | cc.value);
2360 emitOperandHelper(dst, src);
2361 }
2362
2363 public final void cmpq(CiAddress dst, int imm32) {
2364 prefixq(dst);
2365 emitByte(0x81);
2366 emitOperandHelper(rdi, dst);
2367 emitInt(imm32);
2368 }
2369
2370 public final void cmpq(CiRegister dst, int imm32) {
2371 prefixqAndEncode(dst.encoding);
2372 emitArith(0x81, 0xF8, dst, imm32);
2373 }
2374
2375 public final void cmpq(CiAddress dst, CiRegister src) {
2376 prefixq(dst, src);
2377 emitByte(0x3B);
2378 emitOperandHelper(src, dst);
2379 }
2380
2381 public final void cmpq(CiRegister dst, CiRegister src) {
2382 prefixqAndEncode(dst.encoding, src.encoding);
2383 emitArith(0x3B, 0xC0, dst, src);
2384 }
2385
2386 public final void cmpq(CiRegister dst, CiAddress src) {
2387 prefixq(src, dst);
2388 emitByte(0x3B);
2389 emitOperandHelper(dst, src);
2390 }
2391
2392 public final void cmpxchgq(CiRegister reg, CiAddress adr) {
2393 prefixq(adr, reg);
2394 emitByte(0x0F);
2395 emitByte(0xB1);
2396 emitOperandHelper(reg, adr);
2397 }
2398
2399 public final void cvtsi2sdq(CiRegister dst, CiRegister src) {
2400 assert dst.isFpu();
2401 emitByte(0xF2);
2402 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2403 emitByte(0x0F);
2404 emitByte(0x2A);
2405 emitByte(0xC0 | encode);
2406 }
2407
2408 public final void cvtsi2ssq(CiRegister dst, CiRegister src) {
2409 assert dst.isFpu();
2410 emitByte(0xF3);
2411 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2412 emitByte(0x0F);
2413 emitByte(0x2A);
2414 emitByte(0xC0 | encode);
2415 }
2416
2417 public final void cvttsd2siq(CiRegister dst, CiRegister src) {
2418 assert src.isFpu();
2419 emitByte(0xF2);
2420 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2421 emitByte(0x0F);
2422 emitByte(0x2C);
2423 emitByte(0xC0 | encode);
2424 }
2425
2426 public final void cvttss2siq(CiRegister dst, CiRegister src) {
2427 assert src.isFpu();
2428 emitByte(0xF3);
2429 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2430 emitByte(0x0F);
2431 emitByte(0x2C);
2432 emitByte(0xC0 | encode);
2433 }
2434
2435 public final void decq(CiRegister dst) {
2436 // Don't use it directly. Use Macrodecrementq() instead.
2437 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2438 int encode = prefixqAndEncode(dst.encoding);
2439 emitByte(0xFF);
2440 emitByte(0xC8 | encode);
2441 }
2442
2443 public final void decq(CiAddress dst) {
2444 // Don't use it directly. Use Macrodecrementq() instead.
2445 prefixq(dst);
2446 emitByte(0xFF);
2447 emitOperandHelper(rcx, dst);
2448 }
2449
2450 public final void divq(CiRegister src) {
2451 int encode = prefixqAndEncode(src.encoding);
2452 emitByte(0xF7);
2453 emitByte(0xF0 | encode);
2454 }
2455
2456 public final void idivq(CiRegister src) {
2457 int encode = prefixqAndEncode(src.encoding);
2458 emitByte(0xF7);
2459 emitByte(0xF8 | encode);
2460 }
2461
2462 public final void imulq(CiRegister dst, CiRegister src) {
2463 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2464 emitByte(0x0F);
2465 emitByte(0xAF);
2466 emitByte(0xC0 | encode);
2467 }
2468
2469 public final void imulq(CiRegister dst, CiRegister src, int value) {
2470 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2471 if (isByte(value)) {
2472 emitByte(0x6B);
2473 emitByte(0xC0 | encode);
2474 emitByte(value);
2475 } else {
2476 emitByte(0x69);
2477 emitByte(0xC0 | encode);
2478 emitInt(value);
2479 }
2480 }
2481
2482 public final void incq(CiRegister dst) {
2483 // Don't use it directly. Use Macroincrementq() instead.
2484 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)
2485 int encode = prefixqAndEncode(dst.encoding);
2486 emitByte(0xFF);
2487 emitByte(0xC0 | encode);
2488 }
2489
2490 public final void incq(CiAddress dst) {
2491 // Don't use it directly. Use Macroincrementq() instead.
2492 prefixq(dst);
2493 emitByte(0xFF);
2494 emitOperandHelper(rax, dst);
2495 }
2496
2497 public final void movq(CiRegister dst, long imm64) {
2498 int encode = prefixqAndEncode(dst.encoding);
2499 emitByte(0xB8 | encode);
2500 emitLong(imm64);
2501 }
2502
2503 public final void movdq(CiRegister dst, CiRegister src) {
2504
2505 // table D-1 says MMX/SSE2
2506 emitByte(0x66);
2507
2508 if (dst.isFpu()) {
2509 assert dst.isFpu();
2510 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2511 emitByte(0x0F);
2512 emitByte(0x6E);
2513 emitByte(0xC0 | encode);
2514 } else if (src.isFpu()) {
2515
2516 // swap src/dst to get correct prefix
2517 int encode = prefixqAndEncode(src.encoding, dst.encoding);
2518 emitByte(0x0F);
2519 emitByte(0x7E);
2520 emitByte(0xC0 | encode);
2521 } else {
2522 throw new InternalError("should not reach here");
2523 }
2524 }
2525
2526 public final void movsbq(CiRegister dst, CiAddress src) {
2527 prefixq(src, dst);
2528 emitByte(0x0F);
2529 emitByte(0xBE);
2530 emitOperandHelper(dst, src);
2531 }
2532
2533 public final void movsbq(CiRegister dst, CiRegister src) {
2534 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2535 emitByte(0x0F);
2536 emitByte(0xBE);
2537 emitByte(0xC0 | encode);
2538 }
2539
2540 public final void movslq(CiRegister dst, int imm32) {
2541 int encode = prefixqAndEncode(dst.encoding);
2542 emitByte(0xC7 | encode);
2543 emitInt(imm32);
2544 // dbx shows movslq(X86.rcx, 3) as movq $0x0000000049000000,(%X86.rbx)
2545 // and movslq(X86.r8, 3); as movl $0x0000000048000000,(%X86.rbx)
2546 // as a result we shouldn't use until tested at runtime...
2547 throw new InternalError("untested");
2548 }
2549
2550 public final void movslq(CiAddress dst, int imm32) {
2551 prefixq(dst);
2552 emitByte(0xC7);
2553 emitOperandHelper(rax, dst);
2554 emitInt(imm32);
2555 }
2556
2557 public final void movslq(CiRegister dst, CiAddress src) {
2558 prefixq(src, dst);
2559 emitByte(0x63);
2560 emitOperandHelper(dst, src);
2561 }
2562
2563 public final void movslq(CiRegister dst, CiRegister src) {
2564 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2565 emitByte(0x63);
2566 emitByte(0xC0 | encode);
2567 }
2568
2569 public final void movswq(CiRegister dst, CiAddress src) {
2570 prefixq(src, dst);
2571 emitByte(0x0F);
2572 emitByte(0xBF);
2573 emitOperandHelper(dst, src);
2574 }
2575
2576 public final void movswq(CiRegister dst, CiRegister src) {
2577 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2578 emitByte(0x0F);
2579 emitByte(0xBF);
2580 emitByte(0xC0 | encode);
2581 }
2582
2583 public final void movzbq(CiRegister dst, CiAddress src) {
2584 prefixq(src, dst);
2585 emitByte(0x0F);
2586 emitByte(0xB6);
2587 emitOperandHelper(dst, src);
2588 }
2589
2590 public final void movzbq(CiRegister dst, CiRegister src) {
2591 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2592 emitByte(0x0F);
2593 emitByte(0xB6);
2594 emitByte(0xC0 | encode);
2595 }
2596
2597 public final void movzwq(CiRegister dst, CiAddress src) {
2598 prefixq(src, dst);
2599 emitByte(0x0F);
2600 emitByte(0xB7);
2601 emitOperandHelper(dst, src);
2602 }
2603
2604 public final void movzwq(CiRegister dst, CiRegister src) {
2605 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2606 emitByte(0x0F);
2607 emitByte(0xB7);
2608 emitByte(0xC0 | encode);
2609 }
2610
2611 public final void negq(CiRegister dst) {
2612 int encode = prefixqAndEncode(dst.encoding);
2613 emitByte(0xF7);
2614 emitByte(0xD8 | encode);
2615 }
2616
2617 public final void notq(CiRegister dst) {
2618 int encode = prefixqAndEncode(dst.encoding);
2619 emitByte(0xF7);
2620 emitByte(0xD0 | encode);
2621 }
2622
2623 public final void orq(CiAddress dst, int imm32) {
2624 prefixq(dst);
2625 emitByte(0x81);
2626 emitOperandHelper(rcx, dst);
2627 emitInt(imm32);
2628 }
2629
2630 public final void orq(CiRegister dst, int imm32) {
2631 prefixqAndEncode(dst.encoding);
2632 emitArith(0x81, 0xC8, dst, imm32);
2633 }
2634
2635 public final void orq(CiRegister dst, CiAddress src) {
2636 prefixq(src, dst);
2637 emitByte(0x0B);
2638 emitOperandHelper(dst, src);
2639 }
2640
2641 public final void orq(CiRegister dst, CiRegister src) {
2642 prefixqAndEncode(dst.encoding, src.encoding);
2643 emitArith(0x0B, 0xC0, dst, src);
2644 }
2645
2646 public final void popq(CiAddress dst) {
2647 prefixq(dst);
2648 emitByte(0x8F);
2649 emitOperandHelper(rax, dst);
2650 }
2651
2652 public final void pushq(CiAddress src) {
2653 prefixq(src);
2654 emitByte(0xFF);
2655 emitOperandHelper(rsi, src);
2656 }
2657
2658 public final void rclq(CiRegister dst, int imm8) {
2659 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2660 int encode = prefixqAndEncode(dst.encoding);
2661 if (imm8 == 1) {
2662 emitByte(0xD1);
2663 emitByte(0xD0 | encode);
2664 } else {
2665 emitByte(0xC1);
2666 emitByte(0xD0 | encode);
2667 emitByte(imm8);
2668 }
2669 }
2670
2671 public final void sarq(CiRegister dst, int imm8) {
2672 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2673 int encode = prefixqAndEncode(dst.encoding);
2674 if (imm8 == 1) {
2675 emitByte(0xD1);
2676 emitByte(0xF8 | encode);
2677 } else {
2678 emitByte(0xC1);
2679 emitByte(0xF8 | encode);
2680 emitByte(imm8);
2681 }
2682 }
2683
2684 public final void sarq(CiRegister dst) {
2685 int encode = prefixqAndEncode(dst.encoding);
2686 emitByte(0xD3);
2687 emitByte(0xF8 | encode);
2688 }
2689
2690 public final void shlq(CiRegister dst, int imm8) {
2691 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2692 int encode = prefixqAndEncode(dst.encoding);
2693 if (imm8 == 1) {
2694 emitByte(0xD1);
2695 emitByte(0xE0 | encode);
2696 } else {
2697 emitByte(0xC1);
2698 emitByte(0xE0 | encode);
2699 emitByte(imm8);
2700 }
2701 }
2702
2703 public final void shlq(CiRegister dst) {
2704 int encode = prefixqAndEncode(dst.encoding);
2705 emitByte(0xD3);
2706 emitByte(0xE0 | encode);
2707 }
2708
2709 public final void shrq(CiRegister dst, int imm8) {
2710 assert isShiftCount(imm8 >> 1) : "illegal shift count";
2711 int encode = prefixqAndEncode(dst.encoding);
2712 emitByte(0xC1);
2713 emitByte(0xE8 | encode);
2714 emitByte(imm8);
2715 }
2716
2717 public final void shrq(CiRegister dst) {
2718 int encode = prefixqAndEncode(dst.encoding);
2719 emitByte(0xD3);
2720 emitByte(0xE8 | encode);
2721 }
2722
2723 public final void sqrtsd(CiRegister dst, CiAddress src) {
2724 assert dst.isFpu();
2725
2726 emitByte(0xF2);
2727 prefix(src, dst);
2728 emitByte(0x0F);
2729 emitByte(0x51);
2730 emitOperandHelper(dst, src);
2731 }
2732
2733 public final void subq(CiAddress dst, int imm32) {
2734 prefixq(dst);
2735 if (isByte(imm32)) {
2736 emitByte(0x83);
2737 emitOperandHelper(rbp, dst);
2738 emitByte(imm32 & 0xFF);
2739 } else {
2740 emitByte(0x81);
2741 emitOperandHelper(rbp, dst);
2742 emitInt(imm32);
2743 }
2744 }
2745
2746 public final void subq(CiRegister dst, int imm32) {
2747 prefixqAndEncode(dst.encoding);
2748 emitArith(0x81, 0xE8, dst, imm32);
2749 }
2750
2751 public final void subq(CiAddress dst, CiRegister src) {
2752 prefixq(dst, src);
2753 emitByte(0x29);
2754 emitOperandHelper(src, dst);
2755 }
2756
2757 public final void subq(CiRegister dst, CiAddress src) {
2758 prefixq(src, dst);
2759 emitByte(0x2B);
2760 emitOperandHelper(dst, src);
2761 }
2762
2763 public final void subq(CiRegister dst, CiRegister src) {
2764 prefixqAndEncode(dst.encoding, src.encoding);
2765 emitArith(0x2B, 0xC0, dst, src);
2766 }
2767
2768 public final void testq(CiRegister dst, int imm32) {
2769 // not using emitArith because test
2770 // doesn't support sign-extension of
2771 // 8bit operands
2772 int encode = dst.encoding;
2773 if (encode == 0) {
2774 emitByte(Prefix.REXW);
2775 emitByte(0xA9);
2776 } else {
2777 encode = prefixqAndEncode(encode);
2778 emitByte(0xF7);
2779 emitByte(0xC0 | encode);
2780 }
2781 emitInt(imm32);
2782 }
2783
2784 public final void testq(CiRegister dst, CiRegister src) {
2785 prefixqAndEncode(dst.encoding, src.encoding);
2786 emitArith(0x85, 0xC0, dst, src);
2787 }
2788
2789 public final void xaddq(CiAddress dst, CiRegister src) {
2790 prefixq(dst, src);
2791 emitByte(0x0F);
2792 emitByte(0xC1);
2793 emitOperandHelper(src, dst);
2794 }
2795
2796 public final void xchgq(CiRegister dst, CiAddress src) {
2797 prefixq(src, dst);
2798 emitByte(0x87);
2799 emitOperandHelper(dst, src);
2800 }
2801
2802 public final void xchgq(CiRegister dst, CiRegister src) {
2803 int encode = prefixqAndEncode(dst.encoding, src.encoding);
2804 emitByte(0x87);
2805 emitByte(0xc0 | encode);
2806 }
2807
2808 public final void xorq(CiRegister dst, int imm32) {
2809 prefixqAndEncode(dst.encoding);
2810 emitArith(0x81, 0xF0, dst, imm32);
2811 }
2812
2813 public final void xorq(CiRegister dst, CiRegister src) {
2814 prefixqAndEncode(dst.encoding, src.encoding);
2815 emitArith(0x33, 0xC0, dst, src);
2816 }
2817
2818 public final void xorq(CiRegister dst, CiAddress src) {
2819
2820 prefixq(src, dst);
2821 emitByte(0x33);
2822 emitOperandHelper(dst, src);
2823
2824 }
2825
2826 public final void membar(int barriers) {
2827 if (target.isMP) {
2828 // We only have to handle StoreLoad
2829 if ((barriers & STORE_LOAD) != 0) {
2830 // All usable chips support "locked" instructions which suffice
2831 // as barriers, and are much faster than the alternative of
2832 // using cpuid instruction. We use here a locked add [rsp],0.
2833 // This is conveniently otherwise a no-op except for blowing
2834 // flags.
2835 // Any change to this code may need to revisit other places in
2836 // the code where this idiom is used, in particular the
2837 // orderAccess code.
2838 lock();
2839 addl(new CiAddress(Word, RSP, 0), 0); // Assert the lock# signal here
2840 }
2841 }
2842 }
2843
2844 @Override
2845 protected final void patchJumpTarget(int branch, int branchTarget) {
2846 int op = codeBuffer.getByte(branch);
2847 assert op == 0xE8 // call
2848 || op == 0x00 // jump table entry
2849 || op == 0xE9 // jmp
2850 || op == 0xEB // short jmp
2851 || (op & 0xF0) == 0x70 // short jcc
2852 || op == 0x0F && (codeBuffer.getByte(branch + 1) & 0xF0) == 0x80 // jcc
2853 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op;
2854
2855 if (op == 0x00) {
2856 int offsetToJumpTableBase = codeBuffer.getShort(branch + 1);
2857 int jumpTableBase = branch - offsetToJumpTableBase;
2858 int imm32 = branchTarget - jumpTableBase;
2859 codeBuffer.emitInt(imm32, branch);
2860 } else if (op == 0xEB || (op & 0xF0) == 0x70) {
2861
2862 // short offset operators (jmp and jcc)
2863 int imm8 = branchTarget - (branch + 2);
2864 codeBuffer.emitByte(imm8, branch + 1);
2865
2866 } else {
2867
2868 int off = 1;
2869 if (op == 0x0F) {
2870 off = 2;
2871 }
2872
2873 int imm32 = branchTarget - (branch + 4 + off);
2874 codeBuffer.emitInt(imm32, branch + off);
2875 }
2876 }
2877
2878 public void nullCheck(CiRegister r) {
2879 testl(AMD64.rax, new CiAddress(Word, r.asValue(Word), 0));
2880 }
2881
2882 public void align(int modulus) {
2883 if (codeBuffer.position() % modulus != 0) {
2884 nop(modulus - (codeBuffer.position() % modulus));
2885 }
2886 }
2887
2888 public void pushfq() {
2889 emitByte(0x9c);
2890 }
2891
2892 public void popfq() {
2893 emitByte(0x9D);
2894 }
2895
2896 /**
2897 * Makes sure that a subsequent {@linkplain #call} does not fail the alignment check.
2898 */
2899 public final void alignForPatchableDirectCall() {
2900 int dispStart = codeBuffer.position() + 1;
2901 int mask = target.wordSize - 1;
2902 if ((dispStart & ~mask) != ((dispStart + 3) & ~mask)) {
2903 nop(target.wordSize - (dispStart & mask));
2904 assert ((codeBuffer.position() + 1) & mask) == 0;
2905 }
2906 }
2907
2908 /**
2909 * Emits a direct call instruction. Note that the actual call target is not specified, because all calls
2910 * need patching anyway. Therefore, 0 is emitted as the call target, and the user is responsible
2911 * to add the call address to the appropriate patching tables.
2912 */
2913 public final void call() {
2914 emitByte(0xE8);
2915 emitInt(0);
2916 }
2917
2918 public final void call(CiRegister src) {
2919 int encode = prefixAndEncode(src.encoding);
2920 emitByte(0xFF);
2921 emitByte(0xD0 | encode);
2922 }
2923
2924 public void int3() {
2925 emitByte(0xCC);
2926 }
2927
2928 public void enter(short imm16, byte imm8) {
2929 emitByte(0xC8);
2930 // appended:
2931 emitByte(imm16 & 0xff);
2932 imm16 >>= 8;
2933 emitByte(imm16 & 0xff);
2934 emitByte(imm8);
2935 }
2936
2937 private void emitx87(int b1, int b2, int i) {
2938 assert 0 <= i && i < 8 : "illegal stack offset";
2939 emitByte(b1);
2940 emitByte(b2 + i);
2941 }
2942
2943 public void fld(CiAddress src) {
2944 emitByte(0xDD);
2945 emitOperandHelper(rax, src);
2946 }
2947
2948 public void fld(int i) {
2949 emitx87(0xD9, 0xC0, i);
2950 }
2951
2952 public void fldln2() {
2953 emitByte(0xD9);
2954 emitByte(0xED);
2955 }
2956
2957 public void fldlg2() {
2958 emitByte(0xD9);
2959 emitByte(0xEC);
2960 }
2961
2962 public void fyl2x() {
2963 emitByte(0xD9);
2964 emitByte(0xF1);
2965 }
2966
2967 public void fstp(CiAddress src) {
2968 emitByte(0xDD);
2969 emitOperandHelper(rbx, src);
2970 }
2971
2972 public void fsin() {
2973 emitByte(0xD9);
2974 emitByte(0xFE);
2975 }
2976
2977 public void fcos() {
2978 emitByte(0xD9);
2979 emitByte(0xFF);
2980 }
2981
2982 public void fptan() {
2983 emitByte(0xD9);
2984 emitByte(0xF2);
2985 }
2986
2987 public void fstp(int i) {
2988 emitx87(0xDD, 0xD8, i);
2989 }
2990 }