Mercurial > hg > graal-compiler
comparison graal/com.oracle.max.asm/src/com/oracle/max/asm/target/amd64/AMD64Assembler.java @ 3733:e233f5660da4
Added Java files from Maxine project.
author | Thomas Wuerthinger <thomas.wuerthinger@oracle.com> |
---|---|
date | Sat, 17 Dec 2011 19:59:18 +0100 |
parents | |
children | bc8527f3071c |
comparison
equal
deleted
inserted
replaced
3732:3e2e8b8abdaf | 3733:e233f5660da4 |
---|---|
1 /* | |
2 * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved. | |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 * or visit www.oracle.com if you need additional information or have any | |
21 * questions. | |
22 */ | |
23 package com.oracle.max.asm.target.amd64; | |
24 | |
25 import static com.oracle.max.asm.NumUtil.*; | |
26 import static com.oracle.max.asm.target.amd64.AMD64.*; | |
27 import static com.oracle.max.cri.intrinsics.MemoryBarriers.*; | |
28 | |
29 import com.oracle.max.asm.*; | |
30 import com.sun.cri.ci.*; | |
31 import com.sun.cri.ri.*; | |
32 | |
33 /** | |
34 * This class implements an assembler that can encode most X86 instructions. | |
35 */ | |
36 public class AMD64Assembler extends AbstractAssembler { | |
37 /** | |
38 * The kind for pointers and raw registers. Since we know we are 64 bit here, we can hardcode it. | |
39 */ | |
40 private static final CiKind Word = CiKind.Long; | |
41 | |
42 private static final int MinEncodingNeedsRex = 8; | |
43 | |
44 /** | |
45 * The x86 condition codes used for conditional jumps/moves. | |
46 */ | |
47 public enum ConditionFlag { | |
48 zero(0x4), | |
49 notZero(0x5), | |
50 equal(0x4), | |
51 notEqual(0x5), | |
52 less(0xc), | |
53 lessEqual(0xe), | |
54 greater(0xf), | |
55 greaterEqual(0xd), | |
56 below(0x2), | |
57 belowEqual(0x6), | |
58 above(0x7), | |
59 aboveEqual(0x3), | |
60 overflow(0x0), | |
61 noOverflow(0x1), | |
62 carrySet(0x2), | |
63 carryClear(0x3), | |
64 negative(0x8), | |
65 positive(0x9), | |
66 parity(0xa), | |
67 noParity(0xb); | |
68 | |
69 public final int value; | |
70 | |
71 private ConditionFlag(int value) { | |
72 this.value = value; | |
73 } | |
74 | |
75 public static final ConditionFlag[] values = values(); | |
76 } | |
77 | |
78 /** | |
79 * Constants for X86 prefix bytes. | |
80 */ | |
81 private class Prefix { | |
82 private static final int REX = 0x40; | |
83 private static final int REXB = 0x41; | |
84 private static final int REXX = 0x42; | |
85 private static final int REXXB = 0x43; | |
86 private static final int REXR = 0x44; | |
87 private static final int REXRB = 0x45; | |
88 private static final int REXRX = 0x46; | |
89 private static final int REXRXB = 0x47; | |
90 private static final int REXW = 0x48; | |
91 private static final int REXWB = 0x49; | |
92 private static final int REXWX = 0x4A; | |
93 private static final int REXWXB = 0x4B; | |
94 private static final int REXWR = 0x4C; | |
95 private static final int REXWRB = 0x4D; | |
96 private static final int REXWRX = 0x4E; | |
97 private static final int REXWRXB = 0x4F; | |
98 } | |
99 | |
100 /** | |
101 * The register to which {@link CiRegister#Frame} and {@link CiRegister#CallerFrame} are bound. | |
102 */ | |
103 public final CiRegister frameRegister; | |
104 | |
105 /** | |
106 * Constructs an assembler for the AMD64 architecture. | |
107 * | |
108 * @param registerConfig the register configuration used to bind {@link CiRegister#Frame} and | |
109 * {@link CiRegister#CallerFrame} to physical registers. This value can be null if this assembler | |
110 * instance will not be used to assemble instructions using these logical registers. | |
111 */ | |
112 public AMD64Assembler(CiTarget target, RiRegisterConfig registerConfig) { | |
113 super(target); | |
114 this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister(); | |
115 } | |
116 | |
117 private static int encode(CiRegister r) { | |
118 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding; | |
119 return r.encoding & 0x7; | |
120 } | |
121 | |
122 private void emitArithB(int op1, int op2, CiRegister dst, int imm8) { | |
123 assert dst.isByte() : "must have byte register"; | |
124 assert isUByte(op1) && isUByte(op2) : "wrong opcode"; | |
125 assert isUByte(imm8) : "not a byte"; | |
126 assert (op1 & 0x01) == 0 : "should be 8bit operation"; | |
127 emitByte(op1); | |
128 emitByte(op2 | encode(dst)); | |
129 emitByte(imm8); | |
130 } | |
131 | |
132 private void emitArith(int op1, int op2, CiRegister dst, int imm32) { | |
133 assert isUByte(op1) && isUByte(op2) : "wrong opcode"; | |
134 assert (op1 & 0x01) == 1 : "should be 32bit operation"; | |
135 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set"; | |
136 if (isByte(imm32)) { | |
137 emitByte(op1 | 0x02); // set sign bit | |
138 emitByte(op2 | encode(dst)); | |
139 emitByte(imm32 & 0xFF); | |
140 } else { | |
141 emitByte(op1); | |
142 emitByte(op2 | encode(dst)); | |
143 emitInt(imm32); | |
144 } | |
145 } | |
146 | |
147 // immediate-to-memory forms | |
148 private void emitArithOperand(int op1, CiRegister rm, CiAddress adr, int imm32) { | |
149 assert (op1 & 0x01) == 1 : "should be 32bit operation"; | |
150 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set"; | |
151 if (isByte(imm32)) { | |
152 emitByte(op1 | 0x02); // set sign bit | |
153 emitOperandHelper(rm, adr); | |
154 emitByte(imm32 & 0xFF); | |
155 } else { | |
156 emitByte(op1); | |
157 emitOperandHelper(rm, adr); | |
158 emitInt(imm32); | |
159 } | |
160 } | |
161 | |
162 private void emitArith(int op1, int op2, CiRegister dst, CiRegister src) { | |
163 assert isUByte(op1) && isUByte(op2) : "wrong opcode"; | |
164 emitByte(op1); | |
165 emitByte(op2 | encode(dst) << 3 | encode(src)); | |
166 } | |
167 | |
168 private void emitOperandHelper(CiRegister reg, CiAddress addr) { | |
169 CiRegister base = addr.base(); | |
170 | |
171 CiRegister index = addr.index(); | |
172 CiAddress.Scale scale = addr.scale; | |
173 int disp = addr.displacement; | |
174 | |
175 if (base == CiRegister.Frame) { | |
176 assert frameRegister != null : "cannot use register " + CiRegister.Frame + " in assembler with null register configuration"; | |
177 base = frameRegister; | |
178 // } else if (base == CiRegister.CallerFrame) { | |
179 // assert frameRegister != null : "cannot use register " + CiRegister.Frame + " in assembler with null register configuration"; | |
180 // base = frameRegister; | |
181 // disp += targetMethod.frameSize() + 8; | |
182 } | |
183 | |
184 // Encode the registers as needed in the fields they are used in | |
185 | |
186 assert reg != CiRegister.None; | |
187 int regenc = encode(reg) << 3; | |
188 | |
189 if (base == AMD64.rip) { | |
190 // [00 000 101] disp32 | |
191 emitByte(0x05 | regenc); | |
192 emitInt(disp); | |
193 } else if (addr == CiAddress.Placeholder) { | |
194 // [00 000 101] disp32 | |
195 emitByte(0x05 | regenc); | |
196 emitInt(0); | |
197 | |
198 } else if (base.isValid()) { | |
199 int baseenc = base.isValid() ? encode(base) : 0; | |
200 if (index.isValid()) { | |
201 int indexenc = encode(index) << 3; | |
202 // [base + indexscale + disp] | |
203 if (disp == 0 && base != rbp && (base != r13)) { | |
204 // [base + indexscale] | |
205 // [00 reg 100][ss index base] | |
206 assert index != rsp : "illegal addressing mode"; | |
207 emitByte(0x04 | regenc); | |
208 emitByte(scale.log2 << 6 | indexenc | baseenc); | |
209 } else if (isByte(disp)) { | |
210 // [base + indexscale + imm8] | |
211 // [01 reg 100][ss index base] imm8 | |
212 assert index != rsp : "illegal addressing mode"; | |
213 emitByte(0x44 | regenc); | |
214 emitByte(scale.log2 << 6 | indexenc | baseenc); | |
215 emitByte(disp & 0xFF); | |
216 } else { | |
217 // [base + indexscale + disp32] | |
218 // [10 reg 100][ss index base] disp32 | |
219 assert index != rsp : "illegal addressing mode"; | |
220 emitByte(0x84 | regenc); | |
221 emitByte(scale.log2 << 6 | indexenc | baseenc); | |
222 emitInt(disp); | |
223 } | |
224 } else if (base == rsp || (base == r12)) { | |
225 // [rsp + disp] | |
226 if (disp == 0) { | |
227 // [rsp] | |
228 // [00 reg 100][00 100 100] | |
229 emitByte(0x04 | regenc); | |
230 emitByte(0x24); | |
231 } else if (isByte(disp)) { | |
232 // [rsp + imm8] | |
233 // [01 reg 100][00 100 100] disp8 | |
234 emitByte(0x44 | regenc); | |
235 emitByte(0x24); | |
236 emitByte(disp & 0xFF); | |
237 } else { | |
238 // [rsp + imm32] | |
239 // [10 reg 100][00 100 100] disp32 | |
240 emitByte(0x84 | regenc); | |
241 emitByte(0x24); | |
242 emitInt(disp); | |
243 } | |
244 } else { | |
245 // [base + disp] | |
246 assert base != rsp && (base != r12) : "illegal addressing mode"; | |
247 if (disp == 0 && base != rbp && (base != r13)) { | |
248 // [base] | |
249 // [00 reg base] | |
250 emitByte(0x00 | regenc | baseenc); | |
251 } else if (isByte(disp)) { | |
252 // [base + disp8] | |
253 // [01 reg base] disp8 | |
254 emitByte(0x40 | regenc | baseenc); | |
255 emitByte(disp & 0xFF); | |
256 } else { | |
257 // [base + disp32] | |
258 // [10 reg base] disp32 | |
259 emitByte(0x80 | regenc | baseenc); | |
260 emitInt(disp); | |
261 } | |
262 } | |
263 } else { | |
264 if (index.isValid()) { | |
265 int indexenc = encode(index) << 3; | |
266 // [indexscale + disp] | |
267 // [00 reg 100][ss index 101] disp32 | |
268 assert index != rsp : "illegal addressing mode"; | |
269 emitByte(0x04 | regenc); | |
270 emitByte(scale.log2 << 6 | indexenc | 0x05); | |
271 emitInt(disp); | |
272 } else { | |
273 // [disp] ABSOLUTE | |
274 // [00 reg 100][00 100 101] disp32 | |
275 emitByte(0x04 | regenc); | |
276 emitByte(0x25); | |
277 emitInt(disp); | |
278 } | |
279 } | |
280 } | |
281 | |
282 public final void addl(CiAddress dst, int imm32) { | |
283 prefix(dst); | |
284 emitArithOperand(0x81, rax, dst, imm32); | |
285 } | |
286 | |
287 public final void addl(CiAddress dst, CiRegister src) { | |
288 prefix(dst, src); | |
289 emitByte(0x01); | |
290 emitOperandHelper(src, dst); | |
291 } | |
292 | |
293 public final void addl(CiRegister dst, int imm32) { | |
294 prefix(dst); | |
295 emitArith(0x81, 0xC0, dst, imm32); | |
296 } | |
297 | |
298 public final void addl(CiRegister dst, CiAddress src) { | |
299 prefix(src, dst); | |
300 emitByte(0x03); | |
301 emitOperandHelper(dst, src); | |
302 } | |
303 | |
304 public final void addl(CiRegister dst, CiRegister src) { | |
305 prefixAndEncode(dst.encoding, src.encoding); | |
306 emitArith(0x03, 0xC0, dst, src); | |
307 } | |
308 | |
309 private void addrNop4() { | |
310 // 4 bytes: NOP DWORD PTR [EAX+0] | |
311 emitByte(0x0F); | |
312 emitByte(0x1F); | |
313 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); | |
314 emitByte(0); // 8-bits offset (1 byte) | |
315 } | |
316 | |
317 private void addrNop5() { | |
318 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset | |
319 emitByte(0x0F); | |
320 emitByte(0x1F); | |
321 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); | |
322 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); | |
323 emitByte(0); // 8-bits offset (1 byte) | |
324 } | |
325 | |
326 private void addrNop7() { | |
327 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset | |
328 emitByte(0x0F); | |
329 emitByte(0x1F); | |
330 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); | |
331 emitInt(0); // 32-bits offset (4 bytes) | |
332 } | |
333 | |
334 private void addrNop8() { | |
335 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset | |
336 emitByte(0x0F); | |
337 emitByte(0x1F); | |
338 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); | |
339 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); | |
340 emitInt(0); // 32-bits offset (4 bytes) | |
341 } | |
342 | |
343 public final void addsd(CiRegister dst, CiRegister src) { | |
344 assert dst.isFpu() && src.isFpu(); | |
345 emitByte(0xF2); | |
346 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
347 emitByte(0x0F); | |
348 emitByte(0x58); | |
349 emitByte(0xC0 | encode); | |
350 } | |
351 | |
352 public final void addsd(CiRegister dst, CiAddress src) { | |
353 assert dst.isFpu(); | |
354 emitByte(0xF2); | |
355 prefix(src, dst); | |
356 emitByte(0x0F); | |
357 emitByte(0x58); | |
358 emitOperandHelper(dst, src); | |
359 } | |
360 | |
361 public final void addss(CiRegister dst, CiRegister src) { | |
362 assert dst.isFpu() && src.isFpu(); | |
363 emitByte(0xF3); | |
364 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
365 emitByte(0x0F); | |
366 emitByte(0x58); | |
367 emitByte(0xC0 | encode); | |
368 } | |
369 | |
370 public final void addss(CiRegister dst, CiAddress src) { | |
371 assert dst.isFpu(); | |
372 emitByte(0xF3); | |
373 prefix(src, dst); | |
374 emitByte(0x0F); | |
375 emitByte(0x58); | |
376 emitOperandHelper(dst, src); | |
377 } | |
378 | |
379 public final void andl(CiRegister dst, int imm32) { | |
380 prefix(dst); | |
381 emitArith(0x81, 0xE0, dst, imm32); | |
382 } | |
383 | |
384 public final void andl(CiRegister dst, CiAddress src) { | |
385 prefix(src, dst); | |
386 emitByte(0x23); | |
387 emitOperandHelper(dst, src); | |
388 } | |
389 | |
390 public final void andl(CiRegister dst, CiRegister src) { | |
391 prefixAndEncode(dst.encoding, src.encoding); | |
392 emitArith(0x23, 0xC0, dst, src); | |
393 } | |
394 | |
395 public final void bsfq(CiRegister dst, CiRegister src) { | |
396 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
397 emitByte(0x0F); | |
398 emitByte(0xBC); | |
399 emitByte(0xC0 | encode); | |
400 } | |
401 | |
402 public final void bsfq(CiRegister dst, CiAddress src) { | |
403 prefixq(src, dst); | |
404 emitByte(0xBC); | |
405 emitOperandHelper(dst, src); | |
406 } | |
407 | |
408 public final void bsrq(CiRegister dst, CiRegister src) { | |
409 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
410 emitByte(0x0F); | |
411 emitByte(0xBD); | |
412 emitByte(0xC0 | encode); | |
413 } | |
414 | |
415 | |
416 public final void bsrq(CiRegister dst, CiAddress src) { | |
417 prefixq(src, dst); | |
418 emitByte(0xBD); | |
419 emitOperandHelper(dst, src); | |
420 } | |
421 | |
422 public final void bswapl(CiRegister reg) { // bswap | |
423 int encode = prefixAndEncode(reg.encoding); | |
424 emitByte(0x0F); | |
425 emitByte(0xC8 | encode); | |
426 } | |
427 | |
428 public final void btli(CiAddress src, int imm8) { | |
429 prefixq(src); | |
430 emitByte(0x0F); | |
431 emitByte(0xBA); | |
432 emitOperandHelper(rsp, src); | |
433 emitByte(imm8); | |
434 } | |
435 | |
436 public final void cdql() { | |
437 emitByte(0x99); | |
438 } | |
439 | |
440 public final void cmovl(ConditionFlag cc, CiRegister dst, CiRegister src) { | |
441 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
442 emitByte(0x0F); | |
443 emitByte(0x40 | cc.value); | |
444 emitByte(0xC0 | encode); | |
445 } | |
446 | |
447 public final void cmovl(ConditionFlag cc, CiRegister dst, CiAddress src) { | |
448 prefix(src, dst); | |
449 emitByte(0x0F); | |
450 emitByte(0x40 | cc.value); | |
451 emitOperandHelper(dst, src); | |
452 } | |
453 | |
454 public final void cmpb(CiAddress dst, int imm8) { | |
455 prefix(dst); | |
456 emitByte(0x80); | |
457 emitOperandHelper(rdi, dst); | |
458 emitByte(imm8); | |
459 } | |
460 | |
461 public final void cmpl(CiAddress dst, int imm32) { | |
462 prefix(dst); | |
463 emitByte(0x81); | |
464 emitOperandHelper(rdi, dst); | |
465 emitInt(imm32); | |
466 } | |
467 | |
468 public final void cmpl(CiRegister dst, int imm32) { | |
469 prefix(dst); | |
470 emitArith(0x81, 0xF8, dst, imm32); | |
471 } | |
472 | |
473 public final void cmpl(CiRegister dst, CiRegister src) { | |
474 prefixAndEncode(dst.encoding, src.encoding); | |
475 emitArith(0x3B, 0xC0, dst, src); | |
476 } | |
477 | |
478 public final void cmpl(CiRegister dst, CiAddress src) { | |
479 prefix(src, dst); | |
480 emitByte(0x3B); | |
481 emitOperandHelper(dst, src); | |
482 } | |
483 | |
484 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, | |
485 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,. | |
486 // The ZF is set if the compared values were equal, and cleared otherwise. | |
487 public final void cmpxchgl(CiRegister reg, CiAddress adr) { // cmpxchg | |
488 if ((AsmOptions.Atomics & 2) != 0) { | |
489 // caveat: no instructionmark, so this isn't relocatable. | |
490 // Emit a synthetic, non-atomic, CAS equivalent. | |
491 // Beware. The synthetic form sets all ICCs, not just ZF. | |
492 // cmpxchg r,[m] is equivalent to X86.rax, = CAS (m, X86.rax, r) | |
493 cmpl(rax, adr); | |
494 movl(rax, adr); | |
495 if (reg != rax) { | |
496 Label l = new Label(); | |
497 jcc(ConditionFlag.notEqual, l); | |
498 movl(adr, reg); | |
499 bind(l); | |
500 } | |
501 } else { | |
502 | |
503 prefix(adr, reg); | |
504 emitByte(0x0F); | |
505 emitByte(0xB1); | |
506 emitOperandHelper(reg, adr); | |
507 } | |
508 } | |
509 | |
510 public final void comisd(CiRegister dst, CiAddress src) { | |
511 assert dst.isFpu(); | |
512 // NOTE: dbx seems to decode this as comiss even though the | |
513 // 0x66 is there. Strangly ucomisd comes out correct | |
514 emitByte(0x66); | |
515 comiss(dst, src); | |
516 } | |
517 | |
518 public final void comiss(CiRegister dst, CiAddress src) { | |
519 assert dst.isFpu(); | |
520 | |
521 prefix(src, dst); | |
522 emitByte(0x0F); | |
523 emitByte(0x2F); | |
524 emitOperandHelper(dst, src); | |
525 } | |
526 | |
527 public final void cvtdq2pd(CiRegister dst, CiRegister src) { | |
528 assert dst.isFpu(); | |
529 assert src.isFpu(); | |
530 | |
531 emitByte(0xF3); | |
532 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
533 emitByte(0x0F); | |
534 emitByte(0xE6); | |
535 emitByte(0xC0 | encode); | |
536 } | |
537 | |
538 public final void cvtdq2ps(CiRegister dst, CiRegister src) { | |
539 assert dst.isFpu(); | |
540 assert src.isFpu(); | |
541 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
542 emitByte(0x0F); | |
543 emitByte(0x5B); | |
544 emitByte(0xC0 | encode); | |
545 } | |
546 | |
547 public final void cvtsd2ss(CiRegister dst, CiRegister src) { | |
548 assert dst.isFpu(); | |
549 assert src.isFpu(); | |
550 emitByte(0xF2); | |
551 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
552 emitByte(0x0F); | |
553 emitByte(0x5A); | |
554 emitByte(0xC0 | encode); | |
555 } | |
556 | |
557 public final void cvtsi2sdl(CiRegister dst, CiRegister src) { | |
558 assert dst.isFpu(); | |
559 emitByte(0xF2); | |
560 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
561 emitByte(0x0F); | |
562 emitByte(0x2A); | |
563 emitByte(0xC0 | encode); | |
564 } | |
565 | |
566 public final void cvtsi2ssl(CiRegister dst, CiRegister src) { | |
567 assert dst.isFpu(); | |
568 emitByte(0xF3); | |
569 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
570 emitByte(0x0F); | |
571 emitByte(0x2A); | |
572 emitByte(0xC0 | encode); | |
573 } | |
574 | |
575 public final void cvtss2sd(CiRegister dst, CiRegister src) { | |
576 assert dst.isFpu(); | |
577 assert src.isFpu(); | |
578 emitByte(0xF3); | |
579 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
580 emitByte(0x0F); | |
581 emitByte(0x5A); | |
582 emitByte(0xC0 | encode); | |
583 } | |
584 | |
585 public final void cvttsd2sil(CiRegister dst, CiRegister src) { | |
586 assert src.isFpu(); | |
587 emitByte(0xF2); | |
588 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
589 emitByte(0x0F); | |
590 emitByte(0x2C); | |
591 emitByte(0xC0 | encode); | |
592 } | |
593 | |
594 public final void cvttss2sil(CiRegister dst, CiRegister src) { | |
595 assert src.isFpu(); | |
596 emitByte(0xF3); | |
597 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
598 emitByte(0x0F); | |
599 emitByte(0x2C); | |
600 emitByte(0xC0 | encode); | |
601 } | |
602 | |
603 public final void decl(CiAddress dst) { | |
604 // Don't use it directly. Use Macrodecrement() instead. | |
605 prefix(dst); | |
606 emitByte(0xFF); | |
607 emitOperandHelper(rcx, dst); | |
608 } | |
609 | |
610 public final void divsd(CiRegister dst, CiAddress src) { | |
611 assert dst.isFpu(); | |
612 emitByte(0xF2); | |
613 prefix(src, dst); | |
614 emitByte(0x0F); | |
615 emitByte(0x5E); | |
616 emitOperandHelper(dst, src); | |
617 } | |
618 | |
619 public final void divsd(CiRegister dst, CiRegister src) { | |
620 assert dst.isFpu(); | |
621 assert src.isFpu(); | |
622 emitByte(0xF2); | |
623 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
624 emitByte(0x0F); | |
625 emitByte(0x5E); | |
626 emitByte(0xC0 | encode); | |
627 } | |
628 | |
629 public final void divss(CiRegister dst, CiAddress src) { | |
630 assert dst.isFpu(); | |
631 emitByte(0xF3); | |
632 prefix(src, dst); | |
633 emitByte(0x0F); | |
634 emitByte(0x5E); | |
635 emitOperandHelper(dst, src); | |
636 } | |
637 | |
638 public final void divss(CiRegister dst, CiRegister src) { | |
639 assert dst.isFpu(); | |
640 assert src.isFpu(); | |
641 emitByte(0xF3); | |
642 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
643 emitByte(0x0F); | |
644 emitByte(0x5E); | |
645 emitByte(0xC0 | encode); | |
646 } | |
647 | |
648 public final void hlt() { | |
649 emitByte(0xF4); | |
650 } | |
651 | |
652 public final void idivl(CiRegister src) { | |
653 int encode = prefixAndEncode(src.encoding); | |
654 emitByte(0xF7); | |
655 emitByte(0xF8 | encode); | |
656 } | |
657 | |
658 public final void divl(CiRegister src) { | |
659 int encode = prefixAndEncode(src.encoding); | |
660 emitByte(0xF7); | |
661 emitByte(0xF0 | encode); | |
662 } | |
663 | |
664 public final void imull(CiRegister dst, CiRegister src) { | |
665 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
666 emitByte(0x0F); | |
667 emitByte(0xAF); | |
668 emitByte(0xC0 | encode); | |
669 } | |
670 | |
671 public final void imull(CiRegister dst, CiRegister src, int value) { | |
672 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
673 if (isByte(value)) { | |
674 emitByte(0x6B); | |
675 emitByte(0xC0 | encode); | |
676 emitByte(value & 0xFF); | |
677 } else { | |
678 emitByte(0x69); | |
679 emitByte(0xC0 | encode); | |
680 emitInt(value); | |
681 } | |
682 } | |
683 | |
684 public final void incl(CiAddress dst) { | |
685 // Don't use it directly. Use Macroincrement() instead. | |
686 prefix(dst); | |
687 emitByte(0xFF); | |
688 emitOperandHelper(rax, dst); | |
689 } | |
690 | |
691 public final void jcc(ConditionFlag cc, int target, boolean forceDisp32) { | |
692 int shortSize = 2; | |
693 int longSize = 6; | |
694 long disp = target - codeBuffer.position(); | |
695 if (!forceDisp32 && isByte(disp - shortSize)) { | |
696 // 0111 tttn #8-bit disp | |
697 emitByte(0x70 | cc.value); | |
698 emitByte((int) ((disp - shortSize) & 0xFF)); | |
699 } else { | |
700 // 0000 1111 1000 tttn #32-bit disp | |
701 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; | |
702 emitByte(0x0F); | |
703 emitByte(0x80 | cc.value); | |
704 emitInt((int) (disp - longSize)); | |
705 } | |
706 } | |
707 | |
708 public final void jcc(ConditionFlag cc, Label l) { | |
709 assert (0 <= cc.value) && (cc.value < 16) : "illegal cc"; | |
710 if (l.isBound()) { | |
711 jcc(cc, l.position(), false); | |
712 } else { | |
713 // Note: could eliminate cond. jumps to this jump if condition | |
714 // is the same however, seems to be rather unlikely case. | |
715 // Note: use jccb() if label to be bound is very close to get | |
716 // an 8-bit displacement | |
717 l.addPatchAt(codeBuffer.position()); | |
718 emitByte(0x0F); | |
719 emitByte(0x80 | cc.value); | |
720 emitInt(0); | |
721 } | |
722 | |
723 } | |
724 | |
725 public final void jccb(ConditionFlag cc, Label l) { | |
726 if (l.isBound()) { | |
727 int shortSize = 2; | |
728 int entry = l.position(); | |
729 assert isByte(entry - (codeBuffer.position() + shortSize)) : "Dispacement too large for a short jmp"; | |
730 long disp = entry - codeBuffer.position(); | |
731 // 0111 tttn #8-bit disp | |
732 emitByte(0x70 | cc.value); | |
733 emitByte((int) ((disp - shortSize) & 0xFF)); | |
734 } else { | |
735 | |
736 l.addPatchAt(codeBuffer.position()); | |
737 emitByte(0x70 | cc.value); | |
738 emitByte(0); | |
739 } | |
740 } | |
741 | |
742 public final void jmp(CiAddress adr) { | |
743 prefix(adr); | |
744 emitByte(0xFF); | |
745 emitOperandHelper(rsp, adr); | |
746 } | |
747 | |
748 public final void jmp(int target, boolean forceDisp32) { | |
749 int shortSize = 2; | |
750 int longSize = 5; | |
751 long disp = target - codeBuffer.position(); | |
752 if (!forceDisp32 && isByte(disp - shortSize)) { | |
753 emitByte(0xEB); | |
754 emitByte((int) ((disp - shortSize) & 0xFF)); | |
755 } else { | |
756 emitByte(0xE9); | |
757 emitInt((int) (disp - longSize)); | |
758 } | |
759 } | |
760 | |
761 public final void jmp(Label l) { | |
762 if (l.isBound()) { | |
763 jmp(l.position(), false); | |
764 } else { | |
765 // By default, forward jumps are always 32-bit displacements, since | |
766 // we can't yet know where the label will be bound. If you're sure that | |
767 // the forward jump will not run beyond 256 bytes, use jmpb to | |
768 // force an 8-bit displacement. | |
769 | |
770 l.addPatchAt(codeBuffer.position()); | |
771 emitByte(0xE9); | |
772 emitInt(0); | |
773 } | |
774 } | |
775 | |
776 public final void jmp(CiRegister entry) { | |
777 int encode = prefixAndEncode(entry.encoding); | |
778 emitByte(0xFF); | |
779 emitByte(0xE0 | encode); | |
780 } | |
781 | |
782 public final void jmpb(Label l) { | |
783 if (l.isBound()) { | |
784 int shortSize = 2; | |
785 int entry = l.position(); | |
786 assert isByte((entry - codeBuffer.position()) + shortSize) : "Dispacement too large for a short jmp"; | |
787 long offs = entry - codeBuffer.position(); | |
788 emitByte(0xEB); | |
789 emitByte((int) ((offs - shortSize) & 0xFF)); | |
790 } else { | |
791 | |
792 l.addPatchAt(codeBuffer.position()); | |
793 emitByte(0xEB); | |
794 emitByte(0); | |
795 } | |
796 } | |
797 | |
798 public final void leaq(CiRegister dst, CiAddress src) { | |
799 prefixq(src, dst); | |
800 emitByte(0x8D); | |
801 emitOperandHelper(dst, src); | |
802 } | |
803 | |
804 public final void enter(int imm16, int imm8) { | |
805 emitByte(0xC8); | |
806 emitShort(imm16); | |
807 emitByte(imm8); | |
808 } | |
809 | |
810 public final void leave() { | |
811 emitByte(0xC9); | |
812 } | |
813 | |
814 public final void lock() { | |
815 if ((AsmOptions.Atomics & 1) != 0) { | |
816 // Emit either nothing, a NOP, or a NOP: prefix | |
817 emitByte(0x90); | |
818 } else { | |
819 emitByte(0xF0); | |
820 } | |
821 } | |
822 | |
823 // Emit mfence instruction | |
824 public final void mfence() { | |
825 emitByte(0x0F); | |
826 emitByte(0xAE); | |
827 emitByte(0xF0); | |
828 } | |
829 | |
830 public final void mov(CiRegister dst, CiRegister src) { | |
831 movq(dst, src); | |
832 } | |
833 | |
834 public final void movapd(CiRegister dst, CiRegister src) { | |
835 assert dst.isFpu(); | |
836 assert src.isFpu(); | |
837 int dstenc = dst.encoding; | |
838 int srcenc = src.encoding; | |
839 emitByte(0x66); | |
840 if (dstenc < 8) { | |
841 if (srcenc >= 8) { | |
842 emitByte(Prefix.REXB); | |
843 srcenc -= 8; | |
844 } | |
845 } else { | |
846 if (srcenc < 8) { | |
847 emitByte(Prefix.REXR); | |
848 } else { | |
849 emitByte(Prefix.REXRB); | |
850 srcenc -= 8; | |
851 } | |
852 dstenc -= 8; | |
853 } | |
854 emitByte(0x0F); | |
855 emitByte(0x28); | |
856 emitByte(0xC0 | dstenc << 3 | srcenc); | |
857 } | |
858 | |
859 public final void movaps(CiRegister dst, CiRegister src) { | |
860 assert dst.isFpu(); | |
861 assert src.isFpu(); | |
862 int dstenc = dst.encoding; | |
863 int srcenc = src.encoding; | |
864 if (dstenc < 8) { | |
865 if (srcenc >= 8) { | |
866 emitByte(Prefix.REXB); | |
867 srcenc -= 8; | |
868 } | |
869 } else { | |
870 if (srcenc < 8) { | |
871 emitByte(Prefix.REXR); | |
872 } else { | |
873 emitByte(Prefix.REXRB); | |
874 srcenc -= 8; | |
875 } | |
876 dstenc -= 8; | |
877 } | |
878 emitByte(0x0F); | |
879 emitByte(0x28); | |
880 emitByte(0xC0 | dstenc << 3 | srcenc); | |
881 } | |
882 | |
883 public final void movb(CiRegister dst, CiAddress src) { | |
884 prefix(src, dst); // , true) | |
885 emitByte(0x8A); | |
886 emitOperandHelper(dst, src); | |
887 } | |
888 | |
889 public final void movb(CiAddress dst, int imm8) { | |
890 prefix(dst); | |
891 emitByte(0xC6); | |
892 emitOperandHelper(rax, dst); | |
893 emitByte(imm8); | |
894 } | |
895 | |
896 public final void movb(CiAddress dst, CiRegister src) { | |
897 assert src.isByte() : "must have byte register"; | |
898 prefix(dst, src); // , true) | |
899 emitByte(0x88); | |
900 emitOperandHelper(src, dst); | |
901 } | |
902 | |
903 public final void movdl(CiRegister dst, CiRegister src) { | |
904 if (dst.isFpu()) { | |
905 assert !src.isFpu() : "does this hold?"; | |
906 emitByte(0x66); | |
907 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
908 emitByte(0x0F); | |
909 emitByte(0x6E); | |
910 emitByte(0xC0 | encode); | |
911 } else if (src.isFpu()) { | |
912 assert !dst.isFpu(); | |
913 emitByte(0x66); | |
914 // swap src/dst to get correct prefix | |
915 int encode = prefixAndEncode(src.encoding, dst.encoding); | |
916 emitByte(0x0F); | |
917 emitByte(0x7E); | |
918 emitByte(0xC0 | encode); | |
919 } | |
920 } | |
921 | |
922 public final void movdqa(CiRegister dst, CiAddress src) { | |
923 assert dst.isFpu(); | |
924 emitByte(0x66); | |
925 prefix(src, dst); | |
926 emitByte(0x0F); | |
927 emitByte(0x6F); | |
928 emitOperandHelper(dst, src); | |
929 } | |
930 | |
931 public final void movdqa(CiRegister dst, CiRegister src) { | |
932 assert dst.isFpu(); | |
933 emitByte(0x66); | |
934 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
935 emitByte(0x0F); | |
936 emitByte(0x6F); | |
937 emitByte(0xC0 | encode); | |
938 } | |
939 | |
940 public final void movdqa(CiAddress dst, CiRegister src) { | |
941 assert src.isFpu(); | |
942 emitByte(0x66); | |
943 prefix(dst, src); | |
944 emitByte(0x0F); | |
945 emitByte(0x7F); | |
946 emitOperandHelper(src, dst); | |
947 } | |
948 | |
949 public final void movdqu(CiRegister dst, CiAddress src) { | |
950 assert dst.isFpu(); | |
951 emitByte(0xF3); | |
952 prefix(src, dst); | |
953 emitByte(0x0F); | |
954 emitByte(0x6F); | |
955 emitOperandHelper(dst, src); | |
956 } | |
957 | |
958 public final void movdqu(CiRegister dst, CiRegister src) { | |
959 assert dst.isFpu(); | |
960 assert src.isFpu(); | |
961 | |
962 emitByte(0xF3); | |
963 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
964 emitByte(0x0F); | |
965 emitByte(0x6F); | |
966 emitByte(0xC0 | encode); | |
967 } | |
968 | |
969 public final void movdqu(CiAddress dst, CiRegister src) { | |
970 assert src.isFpu(); | |
971 | |
972 emitByte(0xF3); | |
973 prefix(dst, src); | |
974 emitByte(0x0F); | |
975 emitByte(0x7F); | |
976 emitOperandHelper(src, dst); | |
977 } | |
978 | |
979 public final void movl(CiRegister dst, int imm32) { | |
980 int encode = prefixAndEncode(dst.encoding); | |
981 emitByte(0xB8 | encode); | |
982 emitInt(imm32); | |
983 } | |
984 | |
985 public final void movl(CiRegister dst, CiRegister src) { | |
986 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
987 emitByte(0x8B); | |
988 emitByte(0xC0 | encode); | |
989 } | |
990 | |
991 public final void movl(CiRegister dst, CiAddress src) { | |
992 prefix(src, dst); | |
993 emitByte(0x8B); | |
994 emitOperandHelper(dst, src); | |
995 } | |
996 | |
997 public final void movl(CiAddress dst, int imm32) { | |
998 prefix(dst); | |
999 emitByte(0xC7); | |
1000 emitOperandHelper(rax, dst); | |
1001 emitInt(imm32); | |
1002 } | |
1003 | |
1004 public final void movl(CiAddress dst, CiRegister src) { | |
1005 prefix(dst, src); | |
1006 emitByte(0x89); | |
1007 emitOperandHelper(src, dst); | |
1008 } | |
1009 | |
1010 /** | |
1011 * New CPUs require use of movsd and movss to avoid partial register stall | |
1012 * when loading from memory. But for old Opteron use movlpd instead of movsd. | |
1013 * The selection is done in {@link AMD64MacroAssembler#movdbl(CiRegister, CiAddress)} | |
1014 * and {@link AMD64MacroAssembler#movflt(CiRegister, CiRegister)}. | |
1015 */ | |
1016 public final void movlpd(CiRegister dst, CiAddress src) { | |
1017 assert dst.isFpu(); | |
1018 emitByte(0x66); | |
1019 prefix(src, dst); | |
1020 emitByte(0x0F); | |
1021 emitByte(0x12); | |
1022 emitOperandHelper(dst, src); | |
1023 } | |
1024 | |
1025 public final void movlpd(CiAddress dst, CiRegister src) { | |
1026 assert src.isFpu(); | |
1027 emitByte(0x66); | |
1028 prefix(dst, src); | |
1029 emitByte(0x0F); | |
1030 emitByte(0x13); | |
1031 emitOperandHelper(src, dst); | |
1032 } | |
1033 | |
1034 public final void movq(CiRegister dst, CiAddress src) { | |
1035 if (dst.isFpu()) { | |
1036 emitByte(0xF3); | |
1037 prefixq(src, dst); | |
1038 emitByte(0x0F); | |
1039 emitByte(0x7E); | |
1040 emitOperandHelper(dst, src); | |
1041 } else { | |
1042 prefixq(src, dst); | |
1043 emitByte(0x8B); | |
1044 emitOperandHelper(dst, src); | |
1045 } | |
1046 } | |
1047 | |
1048 public final void movq(CiRegister dst, CiRegister src) { | |
1049 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
1050 emitByte(0x8B); | |
1051 emitByte(0xC0 | encode); | |
1052 } | |
1053 | |
1054 public final void movq(CiAddress dst, CiRegister src) { | |
1055 if (src.isFpu()) { | |
1056 emitByte(0x66); | |
1057 prefixq(dst, src); | |
1058 emitByte(0x0F); | |
1059 emitByte(0xD6); | |
1060 emitOperandHelper(src, dst); | |
1061 } else { | |
1062 prefixq(dst, src); | |
1063 emitByte(0x89); | |
1064 emitOperandHelper(src, dst); | |
1065 } | |
1066 } | |
1067 | |
1068 public final void movsxb(CiRegister dst, CiAddress src) { // movsxb | |
1069 prefix(src, dst); | |
1070 emitByte(0x0F); | |
1071 emitByte(0xBE); | |
1072 emitOperandHelper(dst, src); | |
1073 } | |
1074 | |
1075 public final void movsxb(CiRegister dst, CiRegister src) { // movsxb | |
1076 int encode = prefixAndEncode(dst.encoding, src.encoding, true); | |
1077 emitByte(0x0F); | |
1078 emitByte(0xBE); | |
1079 emitByte(0xC0 | encode); | |
1080 } | |
1081 | |
1082 public final void movsd(CiRegister dst, CiRegister src) { | |
1083 assert dst.isFpu(); | |
1084 assert src.isFpu(); | |
1085 emitByte(0xF2); | |
1086 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1087 emitByte(0x0F); | |
1088 emitByte(0x10); | |
1089 emitByte(0xC0 | encode); | |
1090 } | |
1091 | |
1092 public final void movsd(CiRegister dst, CiAddress src) { | |
1093 assert dst.isFpu(); | |
1094 emitByte(0xF2); | |
1095 prefix(src, dst); | |
1096 emitByte(0x0F); | |
1097 emitByte(0x10); | |
1098 emitOperandHelper(dst, src); | |
1099 } | |
1100 | |
1101 public final void movsd(CiAddress dst, CiRegister src) { | |
1102 assert src.isFpu(); | |
1103 emitByte(0xF2); | |
1104 prefix(dst, src); | |
1105 emitByte(0x0F); | |
1106 emitByte(0x11); | |
1107 emitOperandHelper(src, dst); | |
1108 } | |
1109 | |
1110 public final void movss(CiRegister dst, CiRegister src) { | |
1111 assert dst.isFpu(); | |
1112 assert src.isFpu(); | |
1113 emitByte(0xF3); | |
1114 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1115 emitByte(0x0F); | |
1116 emitByte(0x10); | |
1117 emitByte(0xC0 | encode); | |
1118 } | |
1119 | |
1120 public final void movss(CiRegister dst, CiAddress src) { | |
1121 assert dst.isFpu(); | |
1122 emitByte(0xF3); | |
1123 prefix(src, dst); | |
1124 emitByte(0x0F); | |
1125 emitByte(0x10); | |
1126 emitOperandHelper(dst, src); | |
1127 } | |
1128 | |
1129 public final void movss(CiAddress dst, CiRegister src) { | |
1130 assert src.isFpu(); | |
1131 emitByte(0xF3); | |
1132 prefix(dst, src); | |
1133 emitByte(0x0F); | |
1134 emitByte(0x11); | |
1135 emitOperandHelper(src, dst); | |
1136 } | |
1137 | |
1138 public final void movswl(CiRegister dst, CiAddress src) { | |
1139 prefix(src, dst); | |
1140 emitByte(0x0F); | |
1141 emitByte(0xBF); | |
1142 emitOperandHelper(dst, src); | |
1143 } | |
1144 | |
1145 public final void movsxw(CiRegister dst, CiRegister src) { // movsxw | |
1146 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1147 emitByte(0x0F); | |
1148 emitByte(0xBF); | |
1149 emitByte(0xC0 | encode); | |
1150 } | |
1151 | |
1152 public final void movsxw(CiRegister dst, CiAddress src) { // movsxw | |
1153 prefix(src, dst); | |
1154 emitByte(0x0F); | |
1155 emitByte(0xBF); | |
1156 emitOperandHelper(dst, src); | |
1157 } | |
1158 | |
1159 public final void movzxd(CiRegister dst, CiRegister src) { // movzxd | |
1160 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1161 emitByte(0x63); | |
1162 emitByte(0xC0 | encode); | |
1163 } | |
1164 | |
1165 public final void movzxd(CiRegister dst, CiAddress src) { // movzxd | |
1166 prefix(src, dst); | |
1167 emitByte(0x63); | |
1168 emitOperandHelper(dst, src); | |
1169 } | |
1170 | |
1171 public final void movw(CiAddress dst, int imm16) { | |
1172 emitByte(0x66); // switch to 16-bit mode | |
1173 prefix(dst); | |
1174 emitByte(0xC7); | |
1175 emitOperandHelper(rax, dst); | |
1176 emitShort(imm16); | |
1177 } | |
1178 | |
1179 public final void movw(CiRegister dst, CiAddress src) { | |
1180 emitByte(0x66); | |
1181 prefix(src, dst); | |
1182 emitByte(0x8B); | |
1183 emitOperandHelper(dst, src); | |
1184 } | |
1185 | |
1186 public final void movw(CiAddress dst, CiRegister src) { | |
1187 emitByte(0x66); | |
1188 prefix(dst, src); | |
1189 emitByte(0x89); | |
1190 emitOperandHelper(src, dst); | |
1191 } | |
1192 | |
1193 public final void movzxb(CiRegister dst, CiAddress src) { // movzxb | |
1194 prefix(src, dst); | |
1195 emitByte(0x0F); | |
1196 emitByte(0xB6); | |
1197 emitOperandHelper(dst, src); | |
1198 } | |
1199 | |
1200 public final void movzxb(CiRegister dst, CiRegister src) { // movzxb | |
1201 int encode = prefixAndEncode(dst.encoding, src.encoding, true); | |
1202 emitByte(0x0F); | |
1203 emitByte(0xB6); | |
1204 emitByte(0xC0 | encode); | |
1205 } | |
1206 | |
1207 public final void movzxl(CiRegister dst, CiAddress src) { // movzxw | |
1208 prefix(src, dst); | |
1209 emitByte(0x0F); | |
1210 emitByte(0xB7); | |
1211 emitOperandHelper(dst, src); | |
1212 } | |
1213 | |
1214 public final void movzxl(CiRegister dst, CiRegister src) { // movzxw | |
1215 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1216 emitByte(0x0F); | |
1217 emitByte(0xB7); | |
1218 emitByte(0xC0 | encode); | |
1219 } | |
1220 | |
1221 public final void mull(CiAddress src) { | |
1222 prefix(src); | |
1223 emitByte(0xF7); | |
1224 emitOperandHelper(rsp, src); | |
1225 } | |
1226 | |
1227 public final void mulsd(CiRegister dst, CiAddress src) { | |
1228 assert dst.isFpu(); | |
1229 emitByte(0xF2); | |
1230 prefix(src, dst); | |
1231 emitByte(0x0F); | |
1232 emitByte(0x59); | |
1233 emitOperandHelper(dst, src); | |
1234 } | |
1235 | |
1236 public final void mulsd(CiRegister dst, CiRegister src) { | |
1237 assert dst.isFpu(); | |
1238 assert src.isFpu(); | |
1239 | |
1240 emitByte(0xF2); | |
1241 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1242 emitByte(0x0F); | |
1243 emitByte(0x59); | |
1244 emitByte(0xC0 | encode); | |
1245 } | |
1246 | |
1247 public final void mulss(CiRegister dst, CiAddress src) { | |
1248 assert dst.isFpu(); | |
1249 | |
1250 emitByte(0xF3); | |
1251 prefix(src, dst); | |
1252 emitByte(0x0F); | |
1253 emitByte(0x59); | |
1254 emitOperandHelper(dst, src); | |
1255 } | |
1256 | |
1257 public final void mulss(CiRegister dst, CiRegister src) { | |
1258 assert dst.isFpu(); | |
1259 assert src.isFpu(); | |
1260 emitByte(0xF3); | |
1261 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1262 emitByte(0x0F); | |
1263 emitByte(0x59); | |
1264 emitByte(0xC0 | encode); | |
1265 } | |
1266 | |
1267 public final void negl(CiRegister dst) { | |
1268 int encode = prefixAndEncode(dst.encoding); | |
1269 emitByte(0xF7); | |
1270 emitByte(0xD8 | encode); | |
1271 } | |
1272 | |
1273 public final void ensureUniquePC() { | |
1274 nop(); | |
1275 } | |
1276 | |
1277 public final void nop() { | |
1278 nop(1); | |
1279 } | |
1280 | |
1281 public void nop(int i) { | |
1282 if (AsmOptions.UseNormalNop) { | |
1283 assert i > 0 : " "; | |
1284 // The fancy nops aren't currently recognized by debuggers making it a | |
1285 // pain to disassemble code while debugging. If assert are on clearly | |
1286 // speed is not an issue so simply use the single byte traditional nop | |
1287 // to do alignment. | |
1288 | |
1289 for (; i > 0; i--) { | |
1290 emitByte(0x90); | |
1291 } | |
1292 return; | |
1293 } | |
1294 | |
1295 if (AsmOptions.UseAddressNop) { | |
1296 // | |
1297 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. | |
1298 // 1: 0x90 | |
1299 // 2: 0x66 0x90 | |
1300 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) | |
1301 // 4: 0x0F 0x1F 0x40 0x00 | |
1302 // 5: 0x0F 0x1F 0x44 0x00 0x00 | |
1303 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 | |
1304 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 | |
1305 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1306 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1307 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1308 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1309 | |
1310 // The rest coding is AMD specific - use consecutive Address nops | |
1311 | |
1312 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 | |
1313 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 | |
1314 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 | |
1315 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 | |
1316 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1317 // Size prefixes (0x66) are added for larger sizes | |
1318 | |
1319 while (i >= 22) { | |
1320 i -= 11; | |
1321 emitByte(0x66); // size prefix | |
1322 emitByte(0x66); // size prefix | |
1323 emitByte(0x66); // size prefix | |
1324 addrNop8(); | |
1325 } | |
1326 // Generate first nop for size between 21-12 | |
1327 switch (i) { | |
1328 case 21: | |
1329 i -= 1; | |
1330 emitByte(0x66); // size prefix | |
1331 // fall through | |
1332 case 20: | |
1333 // fall through | |
1334 case 19: | |
1335 i -= 1; | |
1336 emitByte(0x66); // size prefix | |
1337 // fall through | |
1338 case 18: | |
1339 // fall through | |
1340 case 17: | |
1341 i -= 1; | |
1342 emitByte(0x66); // size prefix | |
1343 // fall through | |
1344 case 16: | |
1345 // fall through | |
1346 case 15: | |
1347 i -= 8; | |
1348 addrNop8(); | |
1349 break; | |
1350 case 14: | |
1351 case 13: | |
1352 i -= 7; | |
1353 addrNop7(); | |
1354 break; | |
1355 case 12: | |
1356 i -= 6; | |
1357 emitByte(0x66); // size prefix | |
1358 addrNop5(); | |
1359 break; | |
1360 default: | |
1361 assert i < 12; | |
1362 } | |
1363 | |
1364 // Generate second nop for size between 11-1 | |
1365 switch (i) { | |
1366 case 11: | |
1367 emitByte(0x66); // size prefix | |
1368 emitByte(0x66); // size prefix | |
1369 emitByte(0x66); // size prefix | |
1370 addrNop8(); | |
1371 break; | |
1372 case 10: | |
1373 emitByte(0x66); // size prefix | |
1374 emitByte(0x66); // size prefix | |
1375 addrNop8(); | |
1376 break; | |
1377 case 9: | |
1378 emitByte(0x66); // size prefix | |
1379 addrNop8(); | |
1380 break; | |
1381 case 8: | |
1382 addrNop8(); | |
1383 break; | |
1384 case 7: | |
1385 addrNop7(); | |
1386 break; | |
1387 case 6: | |
1388 emitByte(0x66); // size prefix | |
1389 addrNop5(); | |
1390 break; | |
1391 case 5: | |
1392 addrNop5(); | |
1393 break; | |
1394 case 4: | |
1395 addrNop4(); | |
1396 break; | |
1397 case 3: | |
1398 // Don't use "0x0F 0x1F 0x00" - need patching safe padding | |
1399 emitByte(0x66); // size prefix | |
1400 emitByte(0x66); // size prefix | |
1401 emitByte(0x90); // nop | |
1402 break; | |
1403 case 2: | |
1404 emitByte(0x66); // size prefix | |
1405 emitByte(0x90); // nop | |
1406 break; | |
1407 case 1: | |
1408 emitByte(0x90); // nop | |
1409 break; | |
1410 default: | |
1411 assert i == 0; | |
1412 } | |
1413 return; | |
1414 } | |
1415 | |
1416 // Using nops with size prefixes "0x66 0x90". | |
1417 // From AMD Optimization Guide: | |
1418 // 1: 0x90 | |
1419 // 2: 0x66 0x90 | |
1420 // 3: 0x66 0x66 0x90 | |
1421 // 4: 0x66 0x66 0x66 0x90 | |
1422 // 5: 0x66 0x66 0x90 0x66 0x90 | |
1423 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 | |
1424 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 | |
1425 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 | |
1426 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 | |
1427 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 | |
1428 // | |
1429 while (i > 12) { | |
1430 i -= 4; | |
1431 emitByte(0x66); // size prefix | |
1432 emitByte(0x66); | |
1433 emitByte(0x66); | |
1434 emitByte(0x90); // nop | |
1435 } | |
1436 // 1 - 12 nops | |
1437 if (i > 8) { | |
1438 if (i > 9) { | |
1439 i -= 1; | |
1440 emitByte(0x66); | |
1441 } | |
1442 i -= 3; | |
1443 emitByte(0x66); | |
1444 emitByte(0x66); | |
1445 emitByte(0x90); | |
1446 } | |
1447 // 1 - 8 nops | |
1448 if (i > 4) { | |
1449 if (i > 6) { | |
1450 i -= 1; | |
1451 emitByte(0x66); | |
1452 } | |
1453 i -= 3; | |
1454 emitByte(0x66); | |
1455 emitByte(0x66); | |
1456 emitByte(0x90); | |
1457 } | |
1458 switch (i) { | |
1459 case 4: | |
1460 emitByte(0x66); | |
1461 emitByte(0x66); | |
1462 emitByte(0x66); | |
1463 emitByte(0x90); | |
1464 break; | |
1465 case 3: | |
1466 emitByte(0x66); | |
1467 emitByte(0x66); | |
1468 emitByte(0x90); | |
1469 break; | |
1470 case 2: | |
1471 emitByte(0x66); | |
1472 emitByte(0x90); | |
1473 break; | |
1474 case 1: | |
1475 emitByte(0x90); | |
1476 break; | |
1477 default: | |
1478 assert i == 0; | |
1479 } | |
1480 } | |
1481 | |
1482 public final void notl(CiRegister dst) { | |
1483 int encode = prefixAndEncode(dst.encoding); | |
1484 emitByte(0xF7); | |
1485 emitByte(0xD0 | encode); | |
1486 } | |
1487 | |
1488 public final void orl(CiAddress dst, int imm32) { | |
1489 prefix(dst); | |
1490 emitByte(0x81); | |
1491 emitOperandHelper(rcx, dst); | |
1492 emitInt(imm32); | |
1493 } | |
1494 | |
1495 public final void orl(CiRegister dst, int imm32) { | |
1496 prefix(dst); | |
1497 emitArith(0x81, 0xC8, dst, imm32); | |
1498 } | |
1499 | |
1500 public final void orl(CiRegister dst, CiAddress src) { | |
1501 prefix(src, dst); | |
1502 emitByte(0x0B); | |
1503 emitOperandHelper(dst, src); | |
1504 } | |
1505 | |
1506 public final void orl(CiRegister dst, CiRegister src) { | |
1507 prefixAndEncode(dst.encoding, src.encoding); | |
1508 emitArith(0x0B, 0xC0, dst, src); | |
1509 } | |
1510 | |
1511 // generic | |
1512 public final void pop(CiRegister dst) { | |
1513 int encode = prefixAndEncode(dst.encoding); | |
1514 emitByte(0x58 | encode); | |
1515 } | |
1516 | |
1517 public final void popl(CiAddress dst) { | |
1518 // NOTE: this will adjust stack by 8byte on 64bits | |
1519 prefix(dst); | |
1520 emitByte(0x8F); | |
1521 emitOperandHelper(rax, dst); | |
1522 } | |
1523 | |
1524 public final void prefetchPrefix(CiAddress src) { | |
1525 prefix(src); | |
1526 emitByte(0x0F); | |
1527 } | |
1528 | |
1529 public final void prefetchnta(CiAddress src) { | |
1530 prefetchPrefix(src); | |
1531 emitByte(0x18); | |
1532 emitOperandHelper(rax, src); // 0, src | |
1533 } | |
1534 | |
1535 public final void prefetchr(CiAddress src) { | |
1536 prefetchPrefix(src); | |
1537 emitByte(0x0D); | |
1538 emitOperandHelper(rax, src); // 0, src | |
1539 } | |
1540 | |
1541 public final void prefetcht0(CiAddress src) { | |
1542 prefetchPrefix(src); | |
1543 emitByte(0x18); | |
1544 emitOperandHelper(rcx, src); // 1, src | |
1545 | |
1546 } | |
1547 | |
1548 public final void prefetcht1(CiAddress src) { | |
1549 prefetchPrefix(src); | |
1550 emitByte(0x18); | |
1551 emitOperandHelper(rdx, src); // 2, src | |
1552 } | |
1553 | |
1554 public final void prefetcht2(CiAddress src) { | |
1555 prefetchPrefix(src); | |
1556 emitByte(0x18); | |
1557 emitOperandHelper(rbx, src); // 3, src | |
1558 } | |
1559 | |
1560 public final void prefetchw(CiAddress src) { | |
1561 prefetchPrefix(src); | |
1562 emitByte(0x0D); | |
1563 emitOperandHelper(rcx, src); // 1, src | |
1564 } | |
1565 | |
1566 public final void pshufd(CiRegister dst, CiRegister src, int mode) { | |
1567 assert dst.isFpu(); | |
1568 assert src.isFpu(); | |
1569 assert isUByte(mode) : "invalid value"; | |
1570 | |
1571 emitByte(0x66); | |
1572 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1573 emitByte(0x0F); | |
1574 emitByte(0x70); | |
1575 emitByte(0xC0 | encode); | |
1576 emitByte(mode & 0xFF); | |
1577 } | |
1578 | |
1579 public final void pshufd(CiRegister dst, CiAddress src, int mode) { | |
1580 assert dst.isFpu(); | |
1581 assert isUByte(mode) : "invalid value"; | |
1582 | |
1583 emitByte(0x66); | |
1584 prefix(src, dst); | |
1585 emitByte(0x0F); | |
1586 emitByte(0x70); | |
1587 emitOperandHelper(dst, src); | |
1588 emitByte(mode & 0xFF); | |
1589 | |
1590 } | |
1591 | |
1592 public final void pshuflw(CiRegister dst, CiRegister src, int mode) { | |
1593 assert dst.isFpu(); | |
1594 assert src.isFpu(); | |
1595 assert isUByte(mode) : "invalid value"; | |
1596 | |
1597 emitByte(0xF2); | |
1598 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1599 emitByte(0x0F); | |
1600 emitByte(0x70); | |
1601 emitByte(0xC0 | encode); | |
1602 emitByte(mode & 0xFF); | |
1603 } | |
1604 | |
1605 public final void pshuflw(CiRegister dst, CiAddress src, int mode) { | |
1606 assert dst.isFpu(); | |
1607 assert isUByte(mode) : "invalid value"; | |
1608 | |
1609 emitByte(0xF2); | |
1610 prefix(src, dst); // QQ new | |
1611 emitByte(0x0F); | |
1612 emitByte(0x70); | |
1613 emitOperandHelper(dst, src); | |
1614 emitByte(mode & 0xFF); | |
1615 } | |
1616 | |
1617 public final void psrlq(CiRegister dst, int shift) { | |
1618 assert dst.isFpu(); | |
1619 // HMM Table D-1 says sse2 or mmx | |
1620 | |
1621 int encode = prefixqAndEncode(xmm2.encoding, dst.encoding); | |
1622 emitByte(0x66); | |
1623 emitByte(0x0F); | |
1624 emitByte(0x73); | |
1625 emitByte(0xC0 | encode); | |
1626 emitByte(shift); | |
1627 } | |
1628 | |
1629 public final void punpcklbw(CiRegister dst, CiRegister src) { | |
1630 assert dst.isFpu(); | |
1631 assert src.isFpu(); | |
1632 emitByte(0x66); | |
1633 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1634 emitByte(0x0F); | |
1635 emitByte(0x60); | |
1636 emitByte(0xC0 | encode); | |
1637 } | |
1638 | |
1639 public final void push(int imm32) { | |
1640 // in 64bits we push 64bits onto the stack but only | |
1641 // take a 32bit immediate | |
1642 emitByte(0x68); | |
1643 emitInt(imm32); | |
1644 } | |
1645 | |
1646 public final void push(CiRegister src) { | |
1647 int encode = prefixAndEncode(src.encoding); | |
1648 emitByte(0x50 | encode); | |
1649 } | |
1650 | |
1651 public final void pushf() { | |
1652 emitByte(0x9C); | |
1653 } | |
1654 | |
1655 public final void pushl(CiAddress src) { | |
1656 // Note this will push 64bit on 64bit | |
1657 prefix(src); | |
1658 emitByte(0xFF); | |
1659 emitOperandHelper(rsi, src); | |
1660 } | |
1661 | |
1662 public final void pxor(CiRegister dst, CiAddress src) { | |
1663 assert dst.isFpu(); | |
1664 | |
1665 emitByte(0x66); | |
1666 prefix(src, dst); | |
1667 emitByte(0x0F); | |
1668 emitByte(0xEF); | |
1669 emitOperandHelper(dst, src); | |
1670 } | |
1671 | |
1672 public final void pxor(CiRegister dst, CiRegister src) { | |
1673 assert dst.isFpu(); | |
1674 assert src.isFpu(); | |
1675 | |
1676 emitByte(0x66); | |
1677 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1678 emitByte(0x0F); | |
1679 emitByte(0xEF); | |
1680 emitByte(0xC0 | encode); | |
1681 | |
1682 } | |
1683 | |
1684 public final void rcll(CiRegister dst, int imm8) { | |
1685 assert isShiftCount(imm8) : "illegal shift count"; | |
1686 int encode = prefixAndEncode(dst.encoding); | |
1687 if (imm8 == 1) { | |
1688 emitByte(0xD1); | |
1689 emitByte(0xD0 | encode); | |
1690 } else { | |
1691 emitByte(0xC1); | |
1692 emitByte(0xD0 | encode); | |
1693 emitByte(imm8); | |
1694 } | |
1695 } | |
1696 | |
1697 public final void pause() { | |
1698 emitByte(0xF3); | |
1699 emitByte(0x90); | |
1700 } | |
1701 | |
1702 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx heap words. | |
1703 public final void repeatMoveWords() { | |
1704 emitByte(0xF3); | |
1705 emitByte(Prefix.REXW); | |
1706 emitByte(0xA5); | |
1707 } | |
1708 | |
1709 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx bytes. | |
1710 public final void repeatMoveBytes() { | |
1711 emitByte(0xF3); | |
1712 emitByte(Prefix.REXW); | |
1713 emitByte(0xA4); | |
1714 } | |
1715 | |
1716 // sets X86.rcx pointer sized words with X86.rax, value at [edi] | |
1717 // generic | |
1718 public final void repSet() { // repSet | |
1719 emitByte(0xF3); | |
1720 // STOSQ | |
1721 emitByte(Prefix.REXW); | |
1722 emitByte(0xAB); | |
1723 } | |
1724 | |
1725 // scans X86.rcx pointer sized words at [edi] for occurance of X86.rax, | |
1726 // generic | |
1727 public final void repneScan() { // repneScan | |
1728 emitByte(0xF2); | |
1729 // SCASQ | |
1730 emitByte(Prefix.REXW); | |
1731 emitByte(0xAF); | |
1732 } | |
1733 | |
1734 // scans X86.rcx 4 byte words at [edi] for occurance of X86.rax, | |
1735 // generic | |
1736 public final void repneScanl() { // repneScan | |
1737 emitByte(0xF2); | |
1738 // SCASL | |
1739 emitByte(0xAF); | |
1740 } | |
1741 | |
1742 public final void ret(int imm16) { | |
1743 if (imm16 == 0) { | |
1744 emitByte(0xC3); | |
1745 } else { | |
1746 emitByte(0xC2); | |
1747 emitShort(imm16); | |
1748 } | |
1749 } | |
1750 | |
1751 public final void sarl(CiRegister dst, int imm8) { | |
1752 int encode = prefixAndEncode(dst.encoding); | |
1753 assert isShiftCount(imm8) : "illegal shift count"; | |
1754 if (imm8 == 1) { | |
1755 emitByte(0xD1); | |
1756 emitByte(0xF8 | encode); | |
1757 } else { | |
1758 emitByte(0xC1); | |
1759 emitByte(0xF8 | encode); | |
1760 emitByte(imm8); | |
1761 } | |
1762 } | |
1763 | |
1764 public final void sarl(CiRegister dst) { | |
1765 int encode = prefixAndEncode(dst.encoding); | |
1766 emitByte(0xD3); | |
1767 emitByte(0xF8 | encode); | |
1768 } | |
1769 | |
1770 public final void sbbl(CiAddress dst, int imm32) { | |
1771 prefix(dst); | |
1772 emitArithOperand(0x81, rbx, dst, imm32); | |
1773 } | |
1774 | |
1775 public final void sbbl(CiRegister dst, int imm32) { | |
1776 prefix(dst); | |
1777 emitArith(0x81, 0xD8, dst, imm32); | |
1778 } | |
1779 | |
1780 public final void sbbl(CiRegister dst, CiAddress src) { | |
1781 prefix(src, dst); | |
1782 emitByte(0x1B); | |
1783 emitOperandHelper(dst, src); | |
1784 } | |
1785 | |
1786 public final void sbbl(CiRegister dst, CiRegister src) { | |
1787 prefixAndEncode(dst.encoding, src.encoding); | |
1788 emitArith(0x1B, 0xC0, dst, src); | |
1789 } | |
1790 | |
1791 public final void setb(ConditionFlag cc, CiRegister dst) { | |
1792 assert 0 <= cc.value && cc.value < 16 : "illegal cc"; | |
1793 int encode = prefixAndEncode(dst.encoding, true); | |
1794 emitByte(0x0F); | |
1795 emitByte(0x90 | cc.value); | |
1796 emitByte(0xC0 | encode); | |
1797 } | |
1798 | |
1799 public final void shll(CiRegister dst, int imm8) { | |
1800 assert isShiftCount(imm8) : "illegal shift count"; | |
1801 int encode = prefixAndEncode(dst.encoding); | |
1802 if (imm8 == 1) { | |
1803 emitByte(0xD1); | |
1804 emitByte(0xE0 | encode); | |
1805 } else { | |
1806 emitByte(0xC1); | |
1807 emitByte(0xE0 | encode); | |
1808 emitByte(imm8); | |
1809 } | |
1810 } | |
1811 | |
1812 public final void shll(CiRegister dst) { | |
1813 int encode = prefixAndEncode(dst.encoding); | |
1814 emitByte(0xD3); | |
1815 emitByte(0xE0 | encode); | |
1816 } | |
1817 | |
1818 public final void shrl(CiRegister dst, int imm8) { | |
1819 assert isShiftCount(imm8) : "illegal shift count"; | |
1820 int encode = prefixAndEncode(dst.encoding); | |
1821 emitByte(0xC1); | |
1822 emitByte(0xE8 | encode); | |
1823 emitByte(imm8); | |
1824 } | |
1825 | |
1826 public final void shrl(CiRegister dst) { | |
1827 int encode = prefixAndEncode(dst.encoding); | |
1828 emitByte(0xD3); | |
1829 emitByte(0xE8 | encode); | |
1830 } | |
1831 | |
1832 // copies a single word from [esi] to [edi] | |
1833 public final void smovl() { | |
1834 emitByte(0xA5); | |
1835 } | |
1836 | |
1837 public final void sqrtsd(CiRegister dst, CiRegister src) { | |
1838 assert dst.isFpu(); | |
1839 assert src.isFpu(); | |
1840 // HMM Table D-1 says sse2 | |
1841 // assert is64 || target.supportsSSE(); | |
1842 emitByte(0xF2); | |
1843 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1844 emitByte(0x0F); | |
1845 emitByte(0x51); | |
1846 emitByte(0xC0 | encode); | |
1847 } | |
1848 | |
1849 public final void subl(CiAddress dst, int imm32) { | |
1850 prefix(dst); | |
1851 if (isByte(imm32)) { | |
1852 emitByte(0x83); | |
1853 emitOperandHelper(rbp, dst); | |
1854 emitByte(imm32 & 0xFF); | |
1855 } else { | |
1856 emitByte(0x81); | |
1857 emitOperandHelper(rbp, dst); | |
1858 emitInt(imm32); | |
1859 } | |
1860 } | |
1861 | |
1862 public final void subl(CiRegister dst, int imm32) { | |
1863 prefix(dst); | |
1864 emitArith(0x81, 0xE8, dst, imm32); | |
1865 } | |
1866 | |
1867 public final void subl(CiAddress dst, CiRegister src) { | |
1868 prefix(dst, src); | |
1869 emitByte(0x29); | |
1870 emitOperandHelper(src, dst); | |
1871 } | |
1872 | |
1873 public final void subl(CiRegister dst, CiAddress src) { | |
1874 prefix(src, dst); | |
1875 emitByte(0x2B); | |
1876 emitOperandHelper(dst, src); | |
1877 } | |
1878 | |
1879 public final void subl(CiRegister dst, CiRegister src) { | |
1880 prefixAndEncode(dst.encoding, src.encoding); | |
1881 emitArith(0x2B, 0xC0, dst, src); | |
1882 } | |
1883 | |
1884 public final void subsd(CiRegister dst, CiRegister src) { | |
1885 assert dst.isFpu(); | |
1886 assert src.isFpu(); | |
1887 emitByte(0xF2); | |
1888 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1889 emitByte(0x0F); | |
1890 emitByte(0x5C); | |
1891 emitByte(0xC0 | encode); | |
1892 } | |
1893 | |
1894 public final void subsd(CiRegister dst, CiAddress src) { | |
1895 assert dst.isFpu(); | |
1896 | |
1897 emitByte(0xF2); | |
1898 prefix(src, dst); | |
1899 emitByte(0x0F); | |
1900 emitByte(0x5C); | |
1901 emitOperandHelper(dst, src); | |
1902 } | |
1903 | |
1904 public final void subss(CiRegister dst, CiRegister src) { | |
1905 assert dst.isFpu(); | |
1906 assert src.isFpu(); | |
1907 emitByte(0xF3); | |
1908 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1909 emitByte(0x0F); | |
1910 emitByte(0x5C); | |
1911 emitByte(0xC0 | encode); | |
1912 } | |
1913 | |
1914 public final void subss(CiRegister dst, CiAddress src) { | |
1915 assert dst.isFpu(); | |
1916 | |
1917 emitByte(0xF3); | |
1918 prefix(src, dst); | |
1919 emitByte(0x0F); | |
1920 emitByte(0x5C); | |
1921 emitOperandHelper(dst, src); | |
1922 } | |
1923 | |
1924 public final void testb(CiRegister dst, int imm8) { | |
1925 prefixAndEncode(dst.encoding, true); | |
1926 emitArithB(0xF6, 0xC0, dst, imm8); | |
1927 } | |
1928 | |
1929 public final void testl(CiRegister dst, int imm32) { | |
1930 // not using emitArith because test | |
1931 // doesn't support sign-extension of | |
1932 // 8bit operands | |
1933 int encode = dst.encoding; | |
1934 if (encode == 0) { | |
1935 emitByte(0xA9); | |
1936 } else { | |
1937 encode = prefixAndEncode(encode); | |
1938 emitByte(0xF7); | |
1939 emitByte(0xC0 | encode); | |
1940 } | |
1941 emitInt(imm32); | |
1942 } | |
1943 | |
1944 public final void testl(CiRegister dst, CiRegister src) { | |
1945 prefixAndEncode(dst.encoding, src.encoding); | |
1946 emitArith(0x85, 0xC0, dst, src); | |
1947 } | |
1948 | |
1949 public final void testl(CiRegister dst, CiAddress src) { | |
1950 prefix(src, dst); | |
1951 emitByte(0x85); | |
1952 emitOperandHelper(dst, src); | |
1953 } | |
1954 | |
1955 public final void ucomisd(CiRegister dst, CiAddress src) { | |
1956 assert dst.isFpu(); | |
1957 emitByte(0x66); | |
1958 ucomiss(dst, src); | |
1959 } | |
1960 | |
1961 public final void ucomisd(CiRegister dst, CiRegister src) { | |
1962 assert dst.isFpu(); | |
1963 assert src.isFpu(); | |
1964 emitByte(0x66); | |
1965 ucomiss(dst, src); | |
1966 } | |
1967 | |
1968 public final void ucomiss(CiRegister dst, CiAddress src) { | |
1969 assert dst.isFpu(); | |
1970 | |
1971 prefix(src, dst); | |
1972 emitByte(0x0F); | |
1973 emitByte(0x2E); | |
1974 emitOperandHelper(dst, src); | |
1975 } | |
1976 | |
1977 public final void ucomiss(CiRegister dst, CiRegister src) { | |
1978 assert dst.isFpu(); | |
1979 assert src.isFpu(); | |
1980 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1981 emitByte(0x0F); | |
1982 emitByte(0x2E); | |
1983 emitByte(0xC0 | encode); | |
1984 } | |
1985 | |
1986 public final void xaddl(CiAddress dst, CiRegister src) { | |
1987 assert src.isFpu(); | |
1988 | |
1989 prefix(dst, src); | |
1990 emitByte(0x0F); | |
1991 emitByte(0xC1); | |
1992 emitOperandHelper(src, dst); | |
1993 } | |
1994 | |
1995 public final void xchgl(CiRegister dst, CiAddress src) { // xchg | |
1996 prefix(src, dst); | |
1997 emitByte(0x87); | |
1998 emitOperandHelper(dst, src); | |
1999 } | |
2000 | |
2001 public final void xchgl(CiRegister dst, CiRegister src) { | |
2002 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2003 emitByte(0x87); | |
2004 emitByte(0xc0 | encode); | |
2005 } | |
2006 | |
2007 public final void xorl(CiRegister dst, int imm32) { | |
2008 prefix(dst); | |
2009 emitArith(0x81, 0xF0, dst, imm32); | |
2010 } | |
2011 | |
2012 public final void xorl(CiRegister dst, CiAddress src) { | |
2013 prefix(src, dst); | |
2014 emitByte(0x33); | |
2015 emitOperandHelper(dst, src); | |
2016 } | |
2017 | |
2018 public final void xorl(CiRegister dst, CiRegister src) { | |
2019 prefixAndEncode(dst.encoding, src.encoding); | |
2020 emitArith(0x33, 0xC0, dst, src); | |
2021 } | |
2022 | |
2023 public final void andpd(CiRegister dst, CiRegister src) { | |
2024 emitByte(0x66); | |
2025 andps(dst, src); | |
2026 } | |
2027 | |
2028 public final void andpd(CiRegister dst, CiAddress src) { | |
2029 emitByte(0x66); | |
2030 andps(dst, src); | |
2031 } | |
2032 | |
2033 public final void andps(CiRegister dst, CiRegister src) { | |
2034 assert dst.isFpu() && src.isFpu(); | |
2035 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2036 emitByte(0x0F); | |
2037 emitByte(0x54); | |
2038 emitByte(0xC0 | encode); | |
2039 } | |
2040 | |
2041 public final void andps(CiRegister dst, CiAddress src) { | |
2042 assert dst.isFpu(); | |
2043 prefix(src, dst); | |
2044 emitByte(0x0F); | |
2045 emitByte(0x54); | |
2046 emitOperandHelper(dst, src); | |
2047 } | |
2048 | |
2049 public final void orpd(CiRegister dst, CiRegister src) { | |
2050 emitByte(0x66); | |
2051 orps(dst, src); | |
2052 } | |
2053 | |
2054 public final void orpd(CiRegister dst, CiAddress src) { | |
2055 emitByte(0x66); | |
2056 orps(dst, src); | |
2057 } | |
2058 | |
2059 public final void orps(CiRegister dst, CiRegister src) { | |
2060 assert dst.isFpu() && src.isFpu(); | |
2061 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2062 emitByte(0x0F); | |
2063 emitByte(0x56); | |
2064 emitByte(0xC0 | encode); | |
2065 } | |
2066 | |
2067 public final void orps(CiRegister dst, CiAddress src) { | |
2068 assert dst.isFpu(); | |
2069 prefix(src, dst); | |
2070 emitByte(0x0F); | |
2071 emitByte(0x56); | |
2072 emitOperandHelper(dst, src); | |
2073 } | |
2074 | |
2075 public final void xorpd(CiRegister dst, CiRegister src) { | |
2076 emitByte(0x66); | |
2077 xorps(dst, src); | |
2078 } | |
2079 | |
2080 public final void xorpd(CiRegister dst, CiAddress src) { | |
2081 emitByte(0x66); | |
2082 xorps(dst, src); | |
2083 } | |
2084 | |
2085 public final void xorps(CiRegister dst, CiRegister src) { | |
2086 assert dst.isFpu() && src.isFpu(); | |
2087 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2088 emitByte(0x0F); | |
2089 emitByte(0x57); | |
2090 emitByte(0xC0 | encode); | |
2091 } | |
2092 | |
2093 public final void xorps(CiRegister dst, CiAddress src) { | |
2094 assert dst.isFpu(); | |
2095 prefix(src, dst); | |
2096 emitByte(0x0F); | |
2097 emitByte(0x57); | |
2098 emitOperandHelper(dst, src); | |
2099 } | |
2100 | |
2101 // 32bit only pieces of the assembler | |
2102 | |
2103 public final void decl(CiRegister dst) { | |
2104 // Don't use it directly. Use Macrodecrementl() instead. | |
2105 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) | |
2106 int encode = prefixAndEncode(dst.encoding); | |
2107 emitByte(0xFF); | |
2108 emitByte(0xC8 | encode); | |
2109 } | |
2110 | |
2111 public final void incl(CiRegister dst) { | |
2112 // Don't use it directly. Use Macroincrementl() instead. | |
2113 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) | |
2114 int encode = prefixAndEncode(dst.encoding); | |
2115 emitByte(0xFF); | |
2116 emitByte(0xC0 | encode); | |
2117 } | |
2118 | |
2119 int prefixAndEncode(int regEnc) { | |
2120 return prefixAndEncode(regEnc, false); | |
2121 } | |
2122 | |
2123 int prefixAndEncode(int regEnc, boolean byteinst) { | |
2124 if (regEnc >= 8) { | |
2125 emitByte(Prefix.REXB); | |
2126 regEnc -= 8; | |
2127 } else if (byteinst && regEnc >= 4) { | |
2128 emitByte(Prefix.REX); | |
2129 } | |
2130 return regEnc; | |
2131 } | |
2132 | |
2133 int prefixqAndEncode(int regEnc) { | |
2134 if (regEnc < 8) { | |
2135 emitByte(Prefix.REXW); | |
2136 } else { | |
2137 emitByte(Prefix.REXWB); | |
2138 regEnc -= 8; | |
2139 } | |
2140 return regEnc; | |
2141 } | |
2142 | |
2143 int prefixAndEncode(int dstEnc, int srcEnc) { | |
2144 return prefixAndEncode(dstEnc, srcEnc, false); | |
2145 } | |
2146 | |
2147 int prefixAndEncode(int dstEnc, int srcEnc, boolean byteinst) { | |
2148 if (dstEnc < 8) { | |
2149 if (srcEnc >= 8) { | |
2150 emitByte(Prefix.REXB); | |
2151 srcEnc -= 8; | |
2152 } else if (byteinst && srcEnc >= 4) { | |
2153 emitByte(Prefix.REX); | |
2154 } | |
2155 } else { | |
2156 if (srcEnc < 8) { | |
2157 emitByte(Prefix.REXR); | |
2158 } else { | |
2159 emitByte(Prefix.REXRB); | |
2160 srcEnc -= 8; | |
2161 } | |
2162 dstEnc -= 8; | |
2163 } | |
2164 return dstEnc << 3 | srcEnc; | |
2165 } | |
2166 | |
2167 /** | |
2168 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand prefix. If the given | |
2169 * operands exceed 3 bits, the 4th bit is encoded in the prefix. | |
2170 * | |
2171 * @param regEnc the encoding of the register part of the ModRM-Byte | |
2172 * @param rmEnc the encoding of the r/m part of the ModRM-Byte | |
2173 * @return the lower 6 bits of the ModRM-Byte that should be emitted | |
2174 */ | |
2175 private int prefixqAndEncode(int regEnc, int rmEnc) { | |
2176 if (regEnc < 8) { | |
2177 if (rmEnc < 8) { | |
2178 emitByte(Prefix.REXW); | |
2179 } else { | |
2180 emitByte(Prefix.REXWB); | |
2181 rmEnc -= 8; | |
2182 } | |
2183 } else { | |
2184 if (rmEnc < 8) { | |
2185 emitByte(Prefix.REXWR); | |
2186 } else { | |
2187 emitByte(Prefix.REXWRB); | |
2188 rmEnc -= 8; | |
2189 } | |
2190 regEnc -= 8; | |
2191 } | |
2192 return regEnc << 3 | rmEnc; | |
2193 } | |
2194 | |
2195 private void prefix(CiRegister reg) { | |
2196 if (reg.encoding >= 8) { | |
2197 emitByte(Prefix.REXB); | |
2198 } | |
2199 } | |
2200 | |
2201 private void prefix(CiAddress adr) { | |
2202 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2203 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2204 emitByte(Prefix.REXXB); | |
2205 } else { | |
2206 emitByte(Prefix.REXB); | |
2207 } | |
2208 } else { | |
2209 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2210 emitByte(Prefix.REXX); | |
2211 } | |
2212 } | |
2213 } | |
2214 | |
2215 private void prefixq(CiAddress adr) { | |
2216 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2217 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2218 emitByte(Prefix.REXWXB); | |
2219 } else { | |
2220 emitByte(Prefix.REXWB); | |
2221 } | |
2222 } else { | |
2223 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2224 emitByte(Prefix.REXWX); | |
2225 } else { | |
2226 emitByte(Prefix.REXW); | |
2227 } | |
2228 } | |
2229 } | |
2230 | |
2231 private void prefix(CiAddress adr, CiRegister reg) { | |
2232 if (reg.encoding < 8) { | |
2233 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2234 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2235 emitByte(Prefix.REXXB); | |
2236 } else { | |
2237 emitByte(Prefix.REXB); | |
2238 } | |
2239 } else { | |
2240 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2241 emitByte(Prefix.REXX); | |
2242 } else if (reg.encoding >= 4) { | |
2243 emitByte(Prefix.REX); | |
2244 } | |
2245 } | |
2246 } else { | |
2247 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2248 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2249 emitByte(Prefix.REXRXB); | |
2250 } else { | |
2251 emitByte(Prefix.REXRB); | |
2252 } | |
2253 } else { | |
2254 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2255 emitByte(Prefix.REXRX); | |
2256 } else { | |
2257 emitByte(Prefix.REXR); | |
2258 } | |
2259 } | |
2260 } | |
2261 } | |
2262 | |
2263 private void prefixq(CiAddress adr, CiRegister src) { | |
2264 if (src.encoding < 8) { | |
2265 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2266 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2267 emitByte(Prefix.REXWXB); | |
2268 } else { | |
2269 emitByte(Prefix.REXWB); | |
2270 } | |
2271 } else { | |
2272 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2273 emitByte(Prefix.REXWX); | |
2274 } else { | |
2275 emitByte(Prefix.REXW); | |
2276 } | |
2277 } | |
2278 } else { | |
2279 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2280 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2281 emitByte(Prefix.REXWRXB); | |
2282 } else { | |
2283 emitByte(Prefix.REXWRB); | |
2284 } | |
2285 } else { | |
2286 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2287 emitByte(Prefix.REXWRX); | |
2288 } else { | |
2289 emitByte(Prefix.REXWR); | |
2290 } | |
2291 } | |
2292 } | |
2293 } | |
2294 | |
2295 public final void addq(CiAddress dst, int imm32) { | |
2296 prefixq(dst); | |
2297 emitArithOperand(0x81, rax, dst, imm32); | |
2298 } | |
2299 | |
2300 public final void addq(CiAddress dst, CiRegister src) { | |
2301 prefixq(dst, src); | |
2302 emitByte(0x01); | |
2303 emitOperandHelper(src, dst); | |
2304 } | |
2305 | |
2306 public final void addq(CiRegister dst, int imm32) { | |
2307 prefixqAndEncode(dst.encoding); | |
2308 emitArith(0x81, 0xC0, dst, imm32); | |
2309 } | |
2310 | |
2311 public final void addq(CiRegister dst, CiAddress src) { | |
2312 prefixq(src, dst); | |
2313 emitByte(0x03); | |
2314 emitOperandHelper(dst, src); | |
2315 } | |
2316 | |
2317 public final void addq(CiRegister dst, CiRegister src) { | |
2318 prefixqAndEncode(dst.encoding, src.encoding); | |
2319 emitArith(0x03, 0xC0, dst, src); | |
2320 } | |
2321 | |
2322 public final void andq(CiRegister dst, int imm32) { | |
2323 prefixqAndEncode(dst.encoding); | |
2324 emitArith(0x81, 0xE0, dst, imm32); | |
2325 } | |
2326 | |
2327 public final void andq(CiRegister dst, CiAddress src) { | |
2328 prefixq(src, dst); | |
2329 emitByte(0x23); | |
2330 emitOperandHelper(dst, src); | |
2331 } | |
2332 | |
2333 public final void andq(CiRegister dst, CiRegister src) { | |
2334 prefixqAndEncode(dst.encoding, src.encoding); | |
2335 emitArith(0x23, 0xC0, dst, src); | |
2336 } | |
2337 | |
2338 public final void bswapq(CiRegister reg) { | |
2339 int encode = prefixqAndEncode(reg.encoding); | |
2340 emitByte(0x0F); | |
2341 emitByte(0xC8 | encode); | |
2342 } | |
2343 | |
2344 public final void cdqq() { | |
2345 emitByte(Prefix.REXW); | |
2346 emitByte(0x99); | |
2347 } | |
2348 | |
2349 public final void cmovq(ConditionFlag cc, CiRegister dst, CiRegister src) { | |
2350 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2351 emitByte(0x0F); | |
2352 emitByte(0x40 | cc.value); | |
2353 emitByte(0xC0 | encode); | |
2354 } | |
2355 | |
2356 public final void cmovq(ConditionFlag cc, CiRegister dst, CiAddress src) { | |
2357 prefixq(src, dst); | |
2358 emitByte(0x0F); | |
2359 emitByte(0x40 | cc.value); | |
2360 emitOperandHelper(dst, src); | |
2361 } | |
2362 | |
2363 public final void cmpq(CiAddress dst, int imm32) { | |
2364 prefixq(dst); | |
2365 emitByte(0x81); | |
2366 emitOperandHelper(rdi, dst); | |
2367 emitInt(imm32); | |
2368 } | |
2369 | |
2370 public final void cmpq(CiRegister dst, int imm32) { | |
2371 prefixqAndEncode(dst.encoding); | |
2372 emitArith(0x81, 0xF8, dst, imm32); | |
2373 } | |
2374 | |
2375 public final void cmpq(CiAddress dst, CiRegister src) { | |
2376 prefixq(dst, src); | |
2377 emitByte(0x3B); | |
2378 emitOperandHelper(src, dst); | |
2379 } | |
2380 | |
2381 public final void cmpq(CiRegister dst, CiRegister src) { | |
2382 prefixqAndEncode(dst.encoding, src.encoding); | |
2383 emitArith(0x3B, 0xC0, dst, src); | |
2384 } | |
2385 | |
2386 public final void cmpq(CiRegister dst, CiAddress src) { | |
2387 prefixq(src, dst); | |
2388 emitByte(0x3B); | |
2389 emitOperandHelper(dst, src); | |
2390 } | |
2391 | |
2392 public final void cmpxchgq(CiRegister reg, CiAddress adr) { | |
2393 prefixq(adr, reg); | |
2394 emitByte(0x0F); | |
2395 emitByte(0xB1); | |
2396 emitOperandHelper(reg, adr); | |
2397 } | |
2398 | |
2399 public final void cvtsi2sdq(CiRegister dst, CiRegister src) { | |
2400 assert dst.isFpu(); | |
2401 emitByte(0xF2); | |
2402 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2403 emitByte(0x0F); | |
2404 emitByte(0x2A); | |
2405 emitByte(0xC0 | encode); | |
2406 } | |
2407 | |
2408 public final void cvtsi2ssq(CiRegister dst, CiRegister src) { | |
2409 assert dst.isFpu(); | |
2410 emitByte(0xF3); | |
2411 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2412 emitByte(0x0F); | |
2413 emitByte(0x2A); | |
2414 emitByte(0xC0 | encode); | |
2415 } | |
2416 | |
2417 public final void cvttsd2siq(CiRegister dst, CiRegister src) { | |
2418 assert src.isFpu(); | |
2419 emitByte(0xF2); | |
2420 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2421 emitByte(0x0F); | |
2422 emitByte(0x2C); | |
2423 emitByte(0xC0 | encode); | |
2424 } | |
2425 | |
2426 public final void cvttss2siq(CiRegister dst, CiRegister src) { | |
2427 assert src.isFpu(); | |
2428 emitByte(0xF3); | |
2429 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2430 emitByte(0x0F); | |
2431 emitByte(0x2C); | |
2432 emitByte(0xC0 | encode); | |
2433 } | |
2434 | |
2435 public final void decq(CiRegister dst) { | |
2436 // Don't use it directly. Use Macrodecrementq() instead. | |
2437 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) | |
2438 int encode = prefixqAndEncode(dst.encoding); | |
2439 emitByte(0xFF); | |
2440 emitByte(0xC8 | encode); | |
2441 } | |
2442 | |
2443 public final void decq(CiAddress dst) { | |
2444 // Don't use it directly. Use Macrodecrementq() instead. | |
2445 prefixq(dst); | |
2446 emitByte(0xFF); | |
2447 emitOperandHelper(rcx, dst); | |
2448 } | |
2449 | |
2450 public final void divq(CiRegister src) { | |
2451 int encode = prefixqAndEncode(src.encoding); | |
2452 emitByte(0xF7); | |
2453 emitByte(0xF0 | encode); | |
2454 } | |
2455 | |
2456 public final void idivq(CiRegister src) { | |
2457 int encode = prefixqAndEncode(src.encoding); | |
2458 emitByte(0xF7); | |
2459 emitByte(0xF8 | encode); | |
2460 } | |
2461 | |
2462 public final void imulq(CiRegister dst, CiRegister src) { | |
2463 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2464 emitByte(0x0F); | |
2465 emitByte(0xAF); | |
2466 emitByte(0xC0 | encode); | |
2467 } | |
2468 | |
2469 public final void imulq(CiRegister dst, CiRegister src, int value) { | |
2470 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2471 if (isByte(value)) { | |
2472 emitByte(0x6B); | |
2473 emitByte(0xC0 | encode); | |
2474 emitByte(value); | |
2475 } else { | |
2476 emitByte(0x69); | |
2477 emitByte(0xC0 | encode); | |
2478 emitInt(value); | |
2479 } | |
2480 } | |
2481 | |
2482 public final void incq(CiRegister dst) { | |
2483 // Don't use it directly. Use Macroincrementq() instead. | |
2484 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) | |
2485 int encode = prefixqAndEncode(dst.encoding); | |
2486 emitByte(0xFF); | |
2487 emitByte(0xC0 | encode); | |
2488 } | |
2489 | |
2490 public final void incq(CiAddress dst) { | |
2491 // Don't use it directly. Use Macroincrementq() instead. | |
2492 prefixq(dst); | |
2493 emitByte(0xFF); | |
2494 emitOperandHelper(rax, dst); | |
2495 } | |
2496 | |
2497 public final void movq(CiRegister dst, long imm64) { | |
2498 int encode = prefixqAndEncode(dst.encoding); | |
2499 emitByte(0xB8 | encode); | |
2500 emitLong(imm64); | |
2501 } | |
2502 | |
2503 public final void movdq(CiRegister dst, CiRegister src) { | |
2504 | |
2505 // table D-1 says MMX/SSE2 | |
2506 emitByte(0x66); | |
2507 | |
2508 if (dst.isFpu()) { | |
2509 assert dst.isFpu(); | |
2510 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2511 emitByte(0x0F); | |
2512 emitByte(0x6E); | |
2513 emitByte(0xC0 | encode); | |
2514 } else if (src.isFpu()) { | |
2515 | |
2516 // swap src/dst to get correct prefix | |
2517 int encode = prefixqAndEncode(src.encoding, dst.encoding); | |
2518 emitByte(0x0F); | |
2519 emitByte(0x7E); | |
2520 emitByte(0xC0 | encode); | |
2521 } else { | |
2522 throw new InternalError("should not reach here"); | |
2523 } | |
2524 } | |
2525 | |
2526 public final void movsbq(CiRegister dst, CiAddress src) { | |
2527 prefixq(src, dst); | |
2528 emitByte(0x0F); | |
2529 emitByte(0xBE); | |
2530 emitOperandHelper(dst, src); | |
2531 } | |
2532 | |
2533 public final void movsbq(CiRegister dst, CiRegister src) { | |
2534 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2535 emitByte(0x0F); | |
2536 emitByte(0xBE); | |
2537 emitByte(0xC0 | encode); | |
2538 } | |
2539 | |
2540 public final void movslq(CiRegister dst, int imm32) { | |
2541 int encode = prefixqAndEncode(dst.encoding); | |
2542 emitByte(0xC7 | encode); | |
2543 emitInt(imm32); | |
2544 // dbx shows movslq(X86.rcx, 3) as movq $0x0000000049000000,(%X86.rbx) | |
2545 // and movslq(X86.r8, 3); as movl $0x0000000048000000,(%X86.rbx) | |
2546 // as a result we shouldn't use until tested at runtime... | |
2547 throw new InternalError("untested"); | |
2548 } | |
2549 | |
2550 public final void movslq(CiAddress dst, int imm32) { | |
2551 prefixq(dst); | |
2552 emitByte(0xC7); | |
2553 emitOperandHelper(rax, dst); | |
2554 emitInt(imm32); | |
2555 } | |
2556 | |
2557 public final void movslq(CiRegister dst, CiAddress src) { | |
2558 prefixq(src, dst); | |
2559 emitByte(0x63); | |
2560 emitOperandHelper(dst, src); | |
2561 } | |
2562 | |
2563 public final void movslq(CiRegister dst, CiRegister src) { | |
2564 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2565 emitByte(0x63); | |
2566 emitByte(0xC0 | encode); | |
2567 } | |
2568 | |
2569 public final void movswq(CiRegister dst, CiAddress src) { | |
2570 prefixq(src, dst); | |
2571 emitByte(0x0F); | |
2572 emitByte(0xBF); | |
2573 emitOperandHelper(dst, src); | |
2574 } | |
2575 | |
2576 public final void movswq(CiRegister dst, CiRegister src) { | |
2577 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2578 emitByte(0x0F); | |
2579 emitByte(0xBF); | |
2580 emitByte(0xC0 | encode); | |
2581 } | |
2582 | |
2583 public final void movzbq(CiRegister dst, CiAddress src) { | |
2584 prefixq(src, dst); | |
2585 emitByte(0x0F); | |
2586 emitByte(0xB6); | |
2587 emitOperandHelper(dst, src); | |
2588 } | |
2589 | |
2590 public final void movzbq(CiRegister dst, CiRegister src) { | |
2591 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2592 emitByte(0x0F); | |
2593 emitByte(0xB6); | |
2594 emitByte(0xC0 | encode); | |
2595 } | |
2596 | |
2597 public final void movzwq(CiRegister dst, CiAddress src) { | |
2598 prefixq(src, dst); | |
2599 emitByte(0x0F); | |
2600 emitByte(0xB7); | |
2601 emitOperandHelper(dst, src); | |
2602 } | |
2603 | |
2604 public final void movzwq(CiRegister dst, CiRegister src) { | |
2605 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2606 emitByte(0x0F); | |
2607 emitByte(0xB7); | |
2608 emitByte(0xC0 | encode); | |
2609 } | |
2610 | |
2611 public final void negq(CiRegister dst) { | |
2612 int encode = prefixqAndEncode(dst.encoding); | |
2613 emitByte(0xF7); | |
2614 emitByte(0xD8 | encode); | |
2615 } | |
2616 | |
2617 public final void notq(CiRegister dst) { | |
2618 int encode = prefixqAndEncode(dst.encoding); | |
2619 emitByte(0xF7); | |
2620 emitByte(0xD0 | encode); | |
2621 } | |
2622 | |
2623 public final void orq(CiAddress dst, int imm32) { | |
2624 prefixq(dst); | |
2625 emitByte(0x81); | |
2626 emitOperandHelper(rcx, dst); | |
2627 emitInt(imm32); | |
2628 } | |
2629 | |
2630 public final void orq(CiRegister dst, int imm32) { | |
2631 prefixqAndEncode(dst.encoding); | |
2632 emitArith(0x81, 0xC8, dst, imm32); | |
2633 } | |
2634 | |
2635 public final void orq(CiRegister dst, CiAddress src) { | |
2636 prefixq(src, dst); | |
2637 emitByte(0x0B); | |
2638 emitOperandHelper(dst, src); | |
2639 } | |
2640 | |
2641 public final void orq(CiRegister dst, CiRegister src) { | |
2642 prefixqAndEncode(dst.encoding, src.encoding); | |
2643 emitArith(0x0B, 0xC0, dst, src); | |
2644 } | |
2645 | |
2646 public final void popq(CiAddress dst) { | |
2647 prefixq(dst); | |
2648 emitByte(0x8F); | |
2649 emitOperandHelper(rax, dst); | |
2650 } | |
2651 | |
2652 public final void pushq(CiAddress src) { | |
2653 prefixq(src); | |
2654 emitByte(0xFF); | |
2655 emitOperandHelper(rsi, src); | |
2656 } | |
2657 | |
2658 public final void rclq(CiRegister dst, int imm8) { | |
2659 assert isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2660 int encode = prefixqAndEncode(dst.encoding); | |
2661 if (imm8 == 1) { | |
2662 emitByte(0xD1); | |
2663 emitByte(0xD0 | encode); | |
2664 } else { | |
2665 emitByte(0xC1); | |
2666 emitByte(0xD0 | encode); | |
2667 emitByte(imm8); | |
2668 } | |
2669 } | |
2670 | |
2671 public final void sarq(CiRegister dst, int imm8) { | |
2672 assert isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2673 int encode = prefixqAndEncode(dst.encoding); | |
2674 if (imm8 == 1) { | |
2675 emitByte(0xD1); | |
2676 emitByte(0xF8 | encode); | |
2677 } else { | |
2678 emitByte(0xC1); | |
2679 emitByte(0xF8 | encode); | |
2680 emitByte(imm8); | |
2681 } | |
2682 } | |
2683 | |
2684 public final void sarq(CiRegister dst) { | |
2685 int encode = prefixqAndEncode(dst.encoding); | |
2686 emitByte(0xD3); | |
2687 emitByte(0xF8 | encode); | |
2688 } | |
2689 | |
2690 public final void shlq(CiRegister dst, int imm8) { | |
2691 assert isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2692 int encode = prefixqAndEncode(dst.encoding); | |
2693 if (imm8 == 1) { | |
2694 emitByte(0xD1); | |
2695 emitByte(0xE0 | encode); | |
2696 } else { | |
2697 emitByte(0xC1); | |
2698 emitByte(0xE0 | encode); | |
2699 emitByte(imm8); | |
2700 } | |
2701 } | |
2702 | |
2703 public final void shlq(CiRegister dst) { | |
2704 int encode = prefixqAndEncode(dst.encoding); | |
2705 emitByte(0xD3); | |
2706 emitByte(0xE0 | encode); | |
2707 } | |
2708 | |
2709 public final void shrq(CiRegister dst, int imm8) { | |
2710 assert isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2711 int encode = prefixqAndEncode(dst.encoding); | |
2712 emitByte(0xC1); | |
2713 emitByte(0xE8 | encode); | |
2714 emitByte(imm8); | |
2715 } | |
2716 | |
2717 public final void shrq(CiRegister dst) { | |
2718 int encode = prefixqAndEncode(dst.encoding); | |
2719 emitByte(0xD3); | |
2720 emitByte(0xE8 | encode); | |
2721 } | |
2722 | |
2723 public final void sqrtsd(CiRegister dst, CiAddress src) { | |
2724 assert dst.isFpu(); | |
2725 | |
2726 emitByte(0xF2); | |
2727 prefix(src, dst); | |
2728 emitByte(0x0F); | |
2729 emitByte(0x51); | |
2730 emitOperandHelper(dst, src); | |
2731 } | |
2732 | |
2733 public final void subq(CiAddress dst, int imm32) { | |
2734 prefixq(dst); | |
2735 if (isByte(imm32)) { | |
2736 emitByte(0x83); | |
2737 emitOperandHelper(rbp, dst); | |
2738 emitByte(imm32 & 0xFF); | |
2739 } else { | |
2740 emitByte(0x81); | |
2741 emitOperandHelper(rbp, dst); | |
2742 emitInt(imm32); | |
2743 } | |
2744 } | |
2745 | |
2746 public final void subq(CiRegister dst, int imm32) { | |
2747 prefixqAndEncode(dst.encoding); | |
2748 emitArith(0x81, 0xE8, dst, imm32); | |
2749 } | |
2750 | |
2751 public final void subq(CiAddress dst, CiRegister src) { | |
2752 prefixq(dst, src); | |
2753 emitByte(0x29); | |
2754 emitOperandHelper(src, dst); | |
2755 } | |
2756 | |
2757 public final void subq(CiRegister dst, CiAddress src) { | |
2758 prefixq(src, dst); | |
2759 emitByte(0x2B); | |
2760 emitOperandHelper(dst, src); | |
2761 } | |
2762 | |
2763 public final void subq(CiRegister dst, CiRegister src) { | |
2764 prefixqAndEncode(dst.encoding, src.encoding); | |
2765 emitArith(0x2B, 0xC0, dst, src); | |
2766 } | |
2767 | |
2768 public final void testq(CiRegister dst, int imm32) { | |
2769 // not using emitArith because test | |
2770 // doesn't support sign-extension of | |
2771 // 8bit operands | |
2772 int encode = dst.encoding; | |
2773 if (encode == 0) { | |
2774 emitByte(Prefix.REXW); | |
2775 emitByte(0xA9); | |
2776 } else { | |
2777 encode = prefixqAndEncode(encode); | |
2778 emitByte(0xF7); | |
2779 emitByte(0xC0 | encode); | |
2780 } | |
2781 emitInt(imm32); | |
2782 } | |
2783 | |
2784 public final void testq(CiRegister dst, CiRegister src) { | |
2785 prefixqAndEncode(dst.encoding, src.encoding); | |
2786 emitArith(0x85, 0xC0, dst, src); | |
2787 } | |
2788 | |
2789 public final void xaddq(CiAddress dst, CiRegister src) { | |
2790 prefixq(dst, src); | |
2791 emitByte(0x0F); | |
2792 emitByte(0xC1); | |
2793 emitOperandHelper(src, dst); | |
2794 } | |
2795 | |
2796 public final void xchgq(CiRegister dst, CiAddress src) { | |
2797 prefixq(src, dst); | |
2798 emitByte(0x87); | |
2799 emitOperandHelper(dst, src); | |
2800 } | |
2801 | |
2802 public final void xchgq(CiRegister dst, CiRegister src) { | |
2803 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2804 emitByte(0x87); | |
2805 emitByte(0xc0 | encode); | |
2806 } | |
2807 | |
2808 public final void xorq(CiRegister dst, int imm32) { | |
2809 prefixqAndEncode(dst.encoding); | |
2810 emitArith(0x81, 0xF0, dst, imm32); | |
2811 } | |
2812 | |
2813 public final void xorq(CiRegister dst, CiRegister src) { | |
2814 prefixqAndEncode(dst.encoding, src.encoding); | |
2815 emitArith(0x33, 0xC0, dst, src); | |
2816 } | |
2817 | |
2818 public final void xorq(CiRegister dst, CiAddress src) { | |
2819 | |
2820 prefixq(src, dst); | |
2821 emitByte(0x33); | |
2822 emitOperandHelper(dst, src); | |
2823 | |
2824 } | |
2825 | |
2826 public final void membar(int barriers) { | |
2827 if (target.isMP) { | |
2828 // We only have to handle StoreLoad | |
2829 if ((barriers & STORE_LOAD) != 0) { | |
2830 // All usable chips support "locked" instructions which suffice | |
2831 // as barriers, and are much faster than the alternative of | |
2832 // using cpuid instruction. We use here a locked add [rsp],0. | |
2833 // This is conveniently otherwise a no-op except for blowing | |
2834 // flags. | |
2835 // Any change to this code may need to revisit other places in | |
2836 // the code where this idiom is used, in particular the | |
2837 // orderAccess code. | |
2838 lock(); | |
2839 addl(new CiAddress(Word, RSP, 0), 0); // Assert the lock# signal here | |
2840 } | |
2841 } | |
2842 } | |
2843 | |
2844 @Override | |
2845 protected final void patchJumpTarget(int branch, int branchTarget) { | |
2846 int op = codeBuffer.getByte(branch); | |
2847 assert op == 0xE8 // call | |
2848 || op == 0x00 // jump table entry | |
2849 || op == 0xE9 // jmp | |
2850 || op == 0xEB // short jmp | |
2851 || (op & 0xF0) == 0x70 // short jcc | |
2852 || op == 0x0F && (codeBuffer.getByte(branch + 1) & 0xF0) == 0x80 // jcc | |
2853 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; | |
2854 | |
2855 if (op == 0x00) { | |
2856 int offsetToJumpTableBase = codeBuffer.getShort(branch + 1); | |
2857 int jumpTableBase = branch - offsetToJumpTableBase; | |
2858 int imm32 = branchTarget - jumpTableBase; | |
2859 codeBuffer.emitInt(imm32, branch); | |
2860 } else if (op == 0xEB || (op & 0xF0) == 0x70) { | |
2861 | |
2862 // short offset operators (jmp and jcc) | |
2863 int imm8 = branchTarget - (branch + 2); | |
2864 codeBuffer.emitByte(imm8, branch + 1); | |
2865 | |
2866 } else { | |
2867 | |
2868 int off = 1; | |
2869 if (op == 0x0F) { | |
2870 off = 2; | |
2871 } | |
2872 | |
2873 int imm32 = branchTarget - (branch + 4 + off); | |
2874 codeBuffer.emitInt(imm32, branch + off); | |
2875 } | |
2876 } | |
2877 | |
2878 public void nullCheck(CiRegister r) { | |
2879 testl(AMD64.rax, new CiAddress(Word, r.asValue(Word), 0)); | |
2880 } | |
2881 | |
2882 public void align(int modulus) { | |
2883 if (codeBuffer.position() % modulus != 0) { | |
2884 nop(modulus - (codeBuffer.position() % modulus)); | |
2885 } | |
2886 } | |
2887 | |
2888 public void pushfq() { | |
2889 emitByte(0x9c); | |
2890 } | |
2891 | |
2892 public void popfq() { | |
2893 emitByte(0x9D); | |
2894 } | |
2895 | |
2896 /** | |
2897 * Makes sure that a subsequent {@linkplain #call} does not fail the alignment check. | |
2898 */ | |
2899 public final void alignForPatchableDirectCall() { | |
2900 int dispStart = codeBuffer.position() + 1; | |
2901 int mask = target.wordSize - 1; | |
2902 if ((dispStart & ~mask) != ((dispStart + 3) & ~mask)) { | |
2903 nop(target.wordSize - (dispStart & mask)); | |
2904 assert ((codeBuffer.position() + 1) & mask) == 0; | |
2905 } | |
2906 } | |
2907 | |
2908 /** | |
2909 * Emits a direct call instruction. Note that the actual call target is not specified, because all calls | |
2910 * need patching anyway. Therefore, 0 is emitted as the call target, and the user is responsible | |
2911 * to add the call address to the appropriate patching tables. | |
2912 */ | |
2913 public final void call() { | |
2914 emitByte(0xE8); | |
2915 emitInt(0); | |
2916 } | |
2917 | |
2918 public final void call(CiRegister src) { | |
2919 int encode = prefixAndEncode(src.encoding); | |
2920 emitByte(0xFF); | |
2921 emitByte(0xD0 | encode); | |
2922 } | |
2923 | |
2924 public void int3() { | |
2925 emitByte(0xCC); | |
2926 } | |
2927 | |
2928 public void enter(short imm16, byte imm8) { | |
2929 emitByte(0xC8); | |
2930 // appended: | |
2931 emitByte(imm16 & 0xff); | |
2932 imm16 >>= 8; | |
2933 emitByte(imm16 & 0xff); | |
2934 emitByte(imm8); | |
2935 } | |
2936 | |
2937 private void emitx87(int b1, int b2, int i) { | |
2938 assert 0 <= i && i < 8 : "illegal stack offset"; | |
2939 emitByte(b1); | |
2940 emitByte(b2 + i); | |
2941 } | |
2942 | |
2943 public void fld(CiAddress src) { | |
2944 emitByte(0xDD); | |
2945 emitOperandHelper(rax, src); | |
2946 } | |
2947 | |
2948 public void fld(int i) { | |
2949 emitx87(0xD9, 0xC0, i); | |
2950 } | |
2951 | |
2952 public void fldln2() { | |
2953 emitByte(0xD9); | |
2954 emitByte(0xED); | |
2955 } | |
2956 | |
2957 public void fldlg2() { | |
2958 emitByte(0xD9); | |
2959 emitByte(0xEC); | |
2960 } | |
2961 | |
2962 public void fyl2x() { | |
2963 emitByte(0xD9); | |
2964 emitByte(0xF1); | |
2965 } | |
2966 | |
2967 public void fstp(CiAddress src) { | |
2968 emitByte(0xDD); | |
2969 emitOperandHelper(rbx, src); | |
2970 } | |
2971 | |
2972 public void fsin() { | |
2973 emitByte(0xD9); | |
2974 emitByte(0xFE); | |
2975 } | |
2976 | |
2977 public void fcos() { | |
2978 emitByte(0xD9); | |
2979 emitByte(0xFF); | |
2980 } | |
2981 | |
2982 public void fptan() { | |
2983 emitByte(0xD9); | |
2984 emitByte(0xF2); | |
2985 } | |
2986 | |
2987 public void fstp(int i) { | |
2988 emitx87(0xDD, 0xD8, i); | |
2989 } | |
2990 } |