Mercurial > hg > graal-jvmci-8
comparison graal/com.oracle.graal.asm.amd64/src/com/oracle/max/asm/amd64/AMD64Assembler.java @ 6497:64b7dd2075c0
renamed projects com.oracle.max.asm* to com.oracle.graal.asm*
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Wed, 03 Oct 2012 17:42:12 +0200 |
parents | graal/com.oracle.max.asm.amd64/src/com/oracle/max/asm/amd64/AMD64Assembler.java@85c1b84f8fd9 |
children | 6bc8aa568cb9 |
comparison
equal
deleted
inserted
replaced
6496:16d1411409b4 | 6497:64b7dd2075c0 |
---|---|
1 /* | |
2 * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved. | |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 * or visit www.oracle.com if you need additional information or have any | |
21 * questions. | |
22 */ | |
23 package com.oracle.max.asm.amd64; | |
24 | |
25 import static com.oracle.graal.api.code.ValueUtil.*; | |
26 import static com.oracle.max.asm.NumUtil.*; | |
27 import static com.oracle.max.asm.amd64.AMD64.*; | |
28 import static com.oracle.max.asm.amd64.AMD64AsmOptions.*; | |
29 import static com.oracle.max.criutils.MemoryBarriers.*; | |
30 | |
31 import com.oracle.graal.api.code.*; | |
32 import com.oracle.graal.api.meta.*; | |
33 import com.oracle.max.asm.*; | |
34 | |
35 /** | |
36 * This class implements an assembler that can encode most X86 instructions. | |
37 */ | |
38 public class AMD64Assembler extends AbstractAssembler { | |
39 /** | |
40 * The kind for pointers and raw registers. Since we know we are 64 bit here, we can hardcode it. | |
41 */ | |
42 private static final Kind Word = Kind.Long; | |
43 | |
44 private static final int MinEncodingNeedsRex = 8; | |
45 | |
46 /** | |
47 * The x86 condition codes used for conditional jumps/moves. | |
48 */ | |
49 public enum ConditionFlag { | |
50 zero(0x4, "|zero|"), | |
51 notZero(0x5, "|nzero|"), | |
52 equal(0x4, "="), | |
53 notEqual(0x5, "!="), | |
54 less(0xc, "<"), | |
55 lessEqual(0xe, "<="), | |
56 greater(0xf, ">"), | |
57 greaterEqual(0xd, ">="), | |
58 below(0x2, "|<|"), | |
59 belowEqual(0x6, "|<=|"), | |
60 above(0x7, "|>|"), | |
61 aboveEqual(0x3, "|>=|"), | |
62 overflow(0x0, "|of|"), | |
63 noOverflow(0x1, "|nof|"), | |
64 carrySet(0x2, "|carry|"), | |
65 carryClear(0x3, "|ncarry|"), | |
66 negative(0x8, "|neg|"), | |
67 positive(0x9, "|pos|"), | |
68 parity(0xa, "|par|"), | |
69 noParity(0xb, "|npar|"); | |
70 | |
71 public final int value; | |
72 public final String operator; | |
73 | |
74 private ConditionFlag(int value, String operator) { | |
75 this.value = value; | |
76 this.operator = operator; | |
77 } | |
78 | |
79 public ConditionFlag negate() { | |
80 switch(this) { | |
81 case zero: return notZero; | |
82 case notZero: return zero; | |
83 case equal: return notEqual; | |
84 case notEqual: return equal; | |
85 case less: return greaterEqual; | |
86 case lessEqual: return greater; | |
87 case greater: return lessEqual; | |
88 case greaterEqual: return less; | |
89 case below: return aboveEqual; | |
90 case belowEqual: return above; | |
91 case above: return belowEqual; | |
92 case aboveEqual: return below; | |
93 case overflow: return noOverflow; | |
94 case noOverflow: return overflow; | |
95 case carrySet: return carryClear; | |
96 case carryClear: return carrySet; | |
97 case negative: return positive; | |
98 case positive: return negative; | |
99 case parity: return noParity; | |
100 case noParity: return parity; | |
101 } | |
102 throw new IllegalArgumentException(); | |
103 } | |
104 } | |
105 | |
106 /** | |
107 * Constants for X86 prefix bytes. | |
108 */ | |
109 private static class Prefix { | |
110 private static final int REX = 0x40; | |
111 private static final int REXB = 0x41; | |
112 private static final int REXX = 0x42; | |
113 private static final int REXXB = 0x43; | |
114 private static final int REXR = 0x44; | |
115 private static final int REXRB = 0x45; | |
116 private static final int REXRX = 0x46; | |
117 private static final int REXRXB = 0x47; | |
118 private static final int REXW = 0x48; | |
119 private static final int REXWB = 0x49; | |
120 private static final int REXWX = 0x4A; | |
121 private static final int REXWXB = 0x4B; | |
122 private static final int REXWR = 0x4C; | |
123 private static final int REXWRB = 0x4D; | |
124 private static final int REXWRX = 0x4E; | |
125 private static final int REXWRXB = 0x4F; | |
126 } | |
127 | |
128 /** | |
129 * The register to which {@link Register#Frame} and {@link Register#CallerFrame} are bound. | |
130 */ | |
131 public final Register frameRegister; | |
132 | |
133 /** | |
134 * Constructs an assembler for the AMD64 architecture. | |
135 * | |
136 * @param registerConfig the register configuration used to bind {@link Register#Frame} and | |
137 * {@link Register#CallerFrame} to physical registers. This value can be null if this assembler | |
138 * instance will not be used to assemble instructions using these logical registers. | |
139 */ | |
140 public AMD64Assembler(TargetDescription target, RegisterConfig registerConfig) { | |
141 super(target); | |
142 this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister(); | |
143 } | |
144 | |
145 private static int encode(Register r) { | |
146 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding; | |
147 return r.encoding & 0x7; | |
148 } | |
149 | |
150 private void emitArithB(int op1, int op2, Register dst, int imm8) { | |
151 assert dst.isByte() : "must have byte register"; | |
152 assert isUByte(op1) && isUByte(op2) : "wrong opcode"; | |
153 assert isUByte(imm8) : "not a byte"; | |
154 assert (op1 & 0x01) == 0 : "should be 8bit operation"; | |
155 emitByte(op1); | |
156 emitByte(op2 | encode(dst)); | |
157 emitByte(imm8); | |
158 } | |
159 | |
160 private void emitArith(int op1, int op2, Register dst, int imm32) { | |
161 assert isUByte(op1) && isUByte(op2) : "wrong opcode"; | |
162 assert (op1 & 0x01) == 1 : "should be 32bit operation"; | |
163 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set"; | |
164 if (isByte(imm32)) { | |
165 emitByte(op1 | 0x02); // set sign bit | |
166 emitByte(op2 | encode(dst)); | |
167 emitByte(imm32 & 0xFF); | |
168 } else { | |
169 emitByte(op1); | |
170 emitByte(op2 | encode(dst)); | |
171 emitInt(imm32); | |
172 } | |
173 } | |
174 | |
175 // immediate-to-memory forms | |
176 private void emitArithOperand(int op1, Register rm, Address adr, int imm32) { | |
177 assert (op1 & 0x01) == 1 : "should be 32bit operation"; | |
178 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set"; | |
179 if (isByte(imm32)) { | |
180 emitByte(op1 | 0x02); // set sign bit | |
181 emitOperandHelper(rm, adr); | |
182 emitByte(imm32 & 0xFF); | |
183 } else { | |
184 emitByte(op1); | |
185 emitOperandHelper(rm, adr); | |
186 emitInt(imm32); | |
187 } | |
188 } | |
189 | |
190 private void emitArith(int op1, int op2, Register dst, Register src) { | |
191 assert isUByte(op1) && isUByte(op2) : "wrong opcode"; | |
192 emitByte(op1); | |
193 emitByte(op2 | encode(dst) << 3 | encode(src)); | |
194 } | |
195 | |
196 private void emitOperandHelper(Register reg, Address addr) { | |
197 Register base = isLegal(addr.getBase()) ? asRegister(addr.getBase()) : Register.None; | |
198 Register index = isLegal(addr.getIndex()) ? asRegister(addr.getIndex()) : Register.None; | |
199 | |
200 Address.Scale scale = addr.getScale(); | |
201 int disp = addr.getDisplacement(); | |
202 | |
203 if (base == Register.Frame) { | |
204 assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration"; | |
205 base = frameRegister; | |
206 // } else if (base == Register.CallerFrame) { | |
207 // assert frameRegister != null : "cannot use register " + Register.Frame + " in assembler with null register configuration"; | |
208 // base = frameRegister; | |
209 // disp += targetMethod.frameSize() + 8; | |
210 } | |
211 | |
212 // Encode the registers as needed in the fields they are used in | |
213 | |
214 assert reg != Register.None; | |
215 int regenc = encode(reg) << 3; | |
216 | |
217 if (base == AMD64.rip) { | |
218 // [00 000 101] disp32 | |
219 emitByte(0x05 | regenc); | |
220 emitInt(disp); | |
221 } else if (addr == Address.Placeholder) { | |
222 // [00 000 101] disp32 | |
223 emitByte(0x05 | regenc); | |
224 emitInt(0); | |
225 | |
226 } else if (base.isValid()) { | |
227 int baseenc = base.isValid() ? encode(base) : 0; | |
228 if (index.isValid()) { | |
229 int indexenc = encode(index) << 3; | |
230 // [base + indexscale + disp] | |
231 if (disp == 0 && base != rbp && (base != r13)) { | |
232 // [base + indexscale] | |
233 // [00 reg 100][ss index base] | |
234 assert index != rsp : "illegal addressing mode"; | |
235 emitByte(0x04 | regenc); | |
236 emitByte(scale.log2 << 6 | indexenc | baseenc); | |
237 } else if (isByte(disp)) { | |
238 // [base + indexscale + imm8] | |
239 // [01 reg 100][ss index base] imm8 | |
240 assert index != rsp : "illegal addressing mode"; | |
241 emitByte(0x44 | regenc); | |
242 emitByte(scale.log2 << 6 | indexenc | baseenc); | |
243 emitByte(disp & 0xFF); | |
244 } else { | |
245 // [base + indexscale + disp32] | |
246 // [10 reg 100][ss index base] disp32 | |
247 assert index != rsp : "illegal addressing mode"; | |
248 emitByte(0x84 | regenc); | |
249 emitByte(scale.log2 << 6 | indexenc | baseenc); | |
250 emitInt(disp); | |
251 } | |
252 } else if (base == rsp || (base == r12)) { | |
253 // [rsp + disp] | |
254 if (disp == 0) { | |
255 // [rsp] | |
256 // [00 reg 100][00 100 100] | |
257 emitByte(0x04 | regenc); | |
258 emitByte(0x24); | |
259 } else if (isByte(disp)) { | |
260 // [rsp + imm8] | |
261 // [01 reg 100][00 100 100] disp8 | |
262 emitByte(0x44 | regenc); | |
263 emitByte(0x24); | |
264 emitByte(disp & 0xFF); | |
265 } else { | |
266 // [rsp + imm32] | |
267 // [10 reg 100][00 100 100] disp32 | |
268 emitByte(0x84 | regenc); | |
269 emitByte(0x24); | |
270 emitInt(disp); | |
271 } | |
272 } else { | |
273 // [base + disp] | |
274 assert base != rsp && (base != r12) : "illegal addressing mode"; | |
275 if (disp == 0 && base != rbp && (base != r13)) { | |
276 // [base] | |
277 // [00 reg base] | |
278 emitByte(0x00 | regenc | baseenc); | |
279 } else if (isByte(disp)) { | |
280 // [base + disp8] | |
281 // [01 reg base] disp8 | |
282 emitByte(0x40 | regenc | baseenc); | |
283 emitByte(disp & 0xFF); | |
284 } else { | |
285 // [base + disp32] | |
286 // [10 reg base] disp32 | |
287 emitByte(0x80 | regenc | baseenc); | |
288 emitInt(disp); | |
289 } | |
290 } | |
291 } else { | |
292 if (index.isValid()) { | |
293 int indexenc = encode(index) << 3; | |
294 // [indexscale + disp] | |
295 // [00 reg 100][ss index 101] disp32 | |
296 assert index != rsp : "illegal addressing mode"; | |
297 emitByte(0x04 | regenc); | |
298 emitByte(scale.log2 << 6 | indexenc | 0x05); | |
299 emitInt(disp); | |
300 } else { | |
301 // [disp] ABSOLUTE | |
302 // [00 reg 100][00 100 101] disp32 | |
303 emitByte(0x04 | regenc); | |
304 emitByte(0x25); | |
305 emitInt(disp); | |
306 } | |
307 } | |
308 } | |
309 | |
310 public final void addl(Address dst, int imm32) { | |
311 prefix(dst); | |
312 emitArithOperand(0x81, rax, dst, imm32); | |
313 } | |
314 | |
315 public final void addl(Address dst, Register src) { | |
316 prefix(dst, src); | |
317 emitByte(0x01); | |
318 emitOperandHelper(src, dst); | |
319 } | |
320 | |
321 public final void addl(Register dst, int imm32) { | |
322 prefix(dst); | |
323 emitArith(0x81, 0xC0, dst, imm32); | |
324 } | |
325 | |
326 public final void addl(Register dst, Address src) { | |
327 prefix(src, dst); | |
328 emitByte(0x03); | |
329 emitOperandHelper(dst, src); | |
330 } | |
331 | |
332 public final void addl(Register dst, Register src) { | |
333 prefixAndEncode(dst.encoding, src.encoding); | |
334 emitArith(0x03, 0xC0, dst, src); | |
335 } | |
336 | |
337 private void addrNop4() { | |
338 // 4 bytes: NOP DWORD PTR [EAX+0] | |
339 emitByte(0x0F); | |
340 emitByte(0x1F); | |
341 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); | |
342 emitByte(0); // 8-bits offset (1 byte) | |
343 } | |
344 | |
345 private void addrNop5() { | |
346 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset | |
347 emitByte(0x0F); | |
348 emitByte(0x1F); | |
349 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); | |
350 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); | |
351 emitByte(0); // 8-bits offset (1 byte) | |
352 } | |
353 | |
354 private void addrNop7() { | |
355 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset | |
356 emitByte(0x0F); | |
357 emitByte(0x1F); | |
358 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); | |
359 emitInt(0); // 32-bits offset (4 bytes) | |
360 } | |
361 | |
362 private void addrNop8() { | |
363 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset | |
364 emitByte(0x0F); | |
365 emitByte(0x1F); | |
366 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); | |
367 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); | |
368 emitInt(0); // 32-bits offset (4 bytes) | |
369 } | |
370 | |
371 public final void addsd(Register dst, Register src) { | |
372 assert dst.isFpu() && src.isFpu(); | |
373 emitByte(0xF2); | |
374 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
375 emitByte(0x0F); | |
376 emitByte(0x58); | |
377 emitByte(0xC0 | encode); | |
378 } | |
379 | |
380 public final void addsd(Register dst, Address src) { | |
381 assert dst.isFpu(); | |
382 emitByte(0xF2); | |
383 prefix(src, dst); | |
384 emitByte(0x0F); | |
385 emitByte(0x58); | |
386 emitOperandHelper(dst, src); | |
387 } | |
388 | |
389 public final void addss(Register dst, Register src) { | |
390 assert dst.isFpu() && src.isFpu(); | |
391 emitByte(0xF3); | |
392 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
393 emitByte(0x0F); | |
394 emitByte(0x58); | |
395 emitByte(0xC0 | encode); | |
396 } | |
397 | |
398 public final void addss(Register dst, Address src) { | |
399 assert dst.isFpu(); | |
400 emitByte(0xF3); | |
401 prefix(src, dst); | |
402 emitByte(0x0F); | |
403 emitByte(0x58); | |
404 emitOperandHelper(dst, src); | |
405 } | |
406 | |
407 public final void andl(Register dst, int imm32) { | |
408 prefix(dst); | |
409 emitArith(0x81, 0xE0, dst, imm32); | |
410 } | |
411 | |
412 public final void andl(Register dst, Address src) { | |
413 prefix(src, dst); | |
414 emitByte(0x23); | |
415 emitOperandHelper(dst, src); | |
416 } | |
417 | |
418 public final void andl(Register dst, Register src) { | |
419 prefixAndEncode(dst.encoding, src.encoding); | |
420 emitArith(0x23, 0xC0, dst, src); | |
421 } | |
422 | |
423 public final void bsfq(Register dst, Register src) { | |
424 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
425 emitByte(0x0F); | |
426 emitByte(0xBC); | |
427 emitByte(0xC0 | encode); | |
428 } | |
429 | |
430 public final void bsfq(Register dst, Address src) { | |
431 prefixq(src, dst); | |
432 emitByte(0xBC); | |
433 emitOperandHelper(dst, src); | |
434 } | |
435 | |
436 public final void bsrq(Register dst, Register src) { | |
437 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
438 emitByte(0x0F); | |
439 emitByte(0xBD); | |
440 emitByte(0xC0 | encode); | |
441 } | |
442 | |
443 | |
444 public final void bsrq(Register dst, Address src) { | |
445 prefixq(src, dst); | |
446 emitByte(0xBD); | |
447 emitOperandHelper(dst, src); | |
448 } | |
449 | |
450 public final void bsrl(Register dst, Register src) { | |
451 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
452 emitByte(0x0F); | |
453 emitByte(0xBD); | |
454 emitByte(0xC0 | encode); | |
455 } | |
456 | |
457 | |
458 public final void bsrl(Register dst, Address src) { | |
459 prefix(src, dst); | |
460 emitByte(0xBD); | |
461 emitOperandHelper(dst, src); | |
462 } | |
463 | |
464 public final void bswapl(Register reg) { // bswap | |
465 int encode = prefixAndEncode(reg.encoding); | |
466 emitByte(0x0F); | |
467 emitByte(0xC8 | encode); | |
468 } | |
469 | |
470 public final void btli(Address src, int imm8) { | |
471 prefixq(src); | |
472 emitByte(0x0F); | |
473 emitByte(0xBA); | |
474 emitOperandHelper(rsp, src); | |
475 emitByte(imm8); | |
476 } | |
477 | |
478 public final void cdql() { | |
479 emitByte(0x99); | |
480 } | |
481 | |
482 public final void cmovl(ConditionFlag cc, Register dst, Register src) { | |
483 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
484 emitByte(0x0F); | |
485 emitByte(0x40 | cc.value); | |
486 emitByte(0xC0 | encode); | |
487 } | |
488 | |
489 public final void cmovl(ConditionFlag cc, Register dst, Address src) { | |
490 prefix(src, dst); | |
491 emitByte(0x0F); | |
492 emitByte(0x40 | cc.value); | |
493 emitOperandHelper(dst, src); | |
494 } | |
495 | |
496 public final void cmpb(Address dst, int imm8) { | |
497 prefix(dst); | |
498 emitByte(0x80); | |
499 emitOperandHelper(rdi, dst); | |
500 emitByte(imm8); | |
501 } | |
502 | |
503 public final void cmpl(Address dst, int imm32) { | |
504 prefix(dst); | |
505 emitByte(0x81); | |
506 emitOperandHelper(rdi, dst); | |
507 emitInt(imm32); | |
508 } | |
509 | |
510 public final void cmpl(Register dst, int imm32) { | |
511 prefix(dst); | |
512 emitArith(0x81, 0xF8, dst, imm32); | |
513 } | |
514 | |
515 public final void cmpl(Register dst, Register src) { | |
516 prefixAndEncode(dst.encoding, src.encoding); | |
517 emitArith(0x3B, 0xC0, dst, src); | |
518 } | |
519 | |
520 public final void cmpl(Register dst, Address src) { | |
521 prefix(src, dst); | |
522 emitByte(0x3B); | |
523 emitOperandHelper(dst, src); | |
524 } | |
525 | |
526 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, | |
527 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,. | |
528 // The ZF is set if the compared values were equal, and cleared otherwise. | |
529 public final void cmpxchgl(Register reg, Address adr) { // cmpxchg | |
530 if ((Atomics & 2) != 0) { | |
531 // caveat: no instructionmark, so this isn't relocatable. | |
532 // Emit a synthetic, non-atomic, CAS equivalent. | |
533 // Beware. The synthetic form sets all ICCs, not just ZF. | |
534 // cmpxchg r,[m] is equivalent to X86.rax, = CAS (m, X86.rax, r) | |
535 cmpl(rax, adr); | |
536 movl(rax, adr); | |
537 if (reg != rax) { | |
538 Label l = new Label(); | |
539 jcc(ConditionFlag.notEqual, l); | |
540 movl(adr, reg); | |
541 bind(l); | |
542 } | |
543 } else { | |
544 | |
545 prefix(adr, reg); | |
546 emitByte(0x0F); | |
547 emitByte(0xB1); | |
548 emitOperandHelper(reg, adr); | |
549 } | |
550 } | |
551 | |
552 public final void comisd(Register dst, Address src) { | |
553 assert dst.isFpu(); | |
554 // NOTE: dbx seems to decode this as comiss even though the | |
555 // 0x66 is there. Strangly ucomisd comes out correct | |
556 emitByte(0x66); | |
557 comiss(dst, src); | |
558 } | |
559 | |
560 public final void comiss(Register dst, Address src) { | |
561 assert dst.isFpu(); | |
562 | |
563 prefix(src, dst); | |
564 emitByte(0x0F); | |
565 emitByte(0x2F); | |
566 emitOperandHelper(dst, src); | |
567 } | |
568 | |
569 public final void cvtdq2pd(Register dst, Register src) { | |
570 assert dst.isFpu(); | |
571 assert src.isFpu(); | |
572 | |
573 emitByte(0xF3); | |
574 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
575 emitByte(0x0F); | |
576 emitByte(0xE6); | |
577 emitByte(0xC0 | encode); | |
578 } | |
579 | |
580 public final void cvtdq2ps(Register dst, Register src) { | |
581 assert dst.isFpu(); | |
582 assert src.isFpu(); | |
583 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
584 emitByte(0x0F); | |
585 emitByte(0x5B); | |
586 emitByte(0xC0 | encode); | |
587 } | |
588 | |
589 public final void cvtsd2ss(Register dst, Register src) { | |
590 assert dst.isFpu(); | |
591 assert src.isFpu(); | |
592 emitByte(0xF2); | |
593 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
594 emitByte(0x0F); | |
595 emitByte(0x5A); | |
596 emitByte(0xC0 | encode); | |
597 } | |
598 | |
599 public final void cvtsi2sdl(Register dst, Register src) { | |
600 assert dst.isFpu(); | |
601 emitByte(0xF2); | |
602 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
603 emitByte(0x0F); | |
604 emitByte(0x2A); | |
605 emitByte(0xC0 | encode); | |
606 } | |
607 | |
608 public final void cvtsi2ssl(Register dst, Register src) { | |
609 assert dst.isFpu(); | |
610 emitByte(0xF3); | |
611 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
612 emitByte(0x0F); | |
613 emitByte(0x2A); | |
614 emitByte(0xC0 | encode); | |
615 } | |
616 | |
617 public final void cvtss2sd(Register dst, Register src) { | |
618 assert dst.isFpu(); | |
619 assert src.isFpu(); | |
620 emitByte(0xF3); | |
621 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
622 emitByte(0x0F); | |
623 emitByte(0x5A); | |
624 emitByte(0xC0 | encode); | |
625 } | |
626 | |
627 public final void cvttsd2sil(Register dst, Register src) { | |
628 assert src.isFpu(); | |
629 emitByte(0xF2); | |
630 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
631 emitByte(0x0F); | |
632 emitByte(0x2C); | |
633 emitByte(0xC0 | encode); | |
634 } | |
635 | |
636 public final void cvttss2sil(Register dst, Register src) { | |
637 assert src.isFpu(); | |
638 emitByte(0xF3); | |
639 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
640 emitByte(0x0F); | |
641 emitByte(0x2C); | |
642 emitByte(0xC0 | encode); | |
643 } | |
644 | |
645 public final void decl(Address dst) { | |
646 // Don't use it directly. Use Macrodecrement() instead. | |
647 prefix(dst); | |
648 emitByte(0xFF); | |
649 emitOperandHelper(rcx, dst); | |
650 } | |
651 | |
652 public final void divsd(Register dst, Address src) { | |
653 assert dst.isFpu(); | |
654 emitByte(0xF2); | |
655 prefix(src, dst); | |
656 emitByte(0x0F); | |
657 emitByte(0x5E); | |
658 emitOperandHelper(dst, src); | |
659 } | |
660 | |
661 public final void divsd(Register dst, Register src) { | |
662 assert dst.isFpu(); | |
663 assert src.isFpu(); | |
664 emitByte(0xF2); | |
665 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
666 emitByte(0x0F); | |
667 emitByte(0x5E); | |
668 emitByte(0xC0 | encode); | |
669 } | |
670 | |
671 public final void divss(Register dst, Address src) { | |
672 assert dst.isFpu(); | |
673 emitByte(0xF3); | |
674 prefix(src, dst); | |
675 emitByte(0x0F); | |
676 emitByte(0x5E); | |
677 emitOperandHelper(dst, src); | |
678 } | |
679 | |
680 public final void divss(Register dst, Register src) { | |
681 assert dst.isFpu(); | |
682 assert src.isFpu(); | |
683 emitByte(0xF3); | |
684 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
685 emitByte(0x0F); | |
686 emitByte(0x5E); | |
687 emitByte(0xC0 | encode); | |
688 } | |
689 | |
690 public final void hlt() { | |
691 emitByte(0xF4); | |
692 } | |
693 | |
694 public final void idivl(Register src) { | |
695 int encode = prefixAndEncode(src.encoding); | |
696 emitByte(0xF7); | |
697 emitByte(0xF8 | encode); | |
698 } | |
699 | |
700 public final void divl(Register src) { | |
701 int encode = prefixAndEncode(src.encoding); | |
702 emitByte(0xF7); | |
703 emitByte(0xF0 | encode); | |
704 } | |
705 | |
706 public final void imull(Register dst, Register src) { | |
707 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
708 emitByte(0x0F); | |
709 emitByte(0xAF); | |
710 emitByte(0xC0 | encode); | |
711 } | |
712 | |
713 public final void imull(Register dst, Register src, int value) { | |
714 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
715 if (isByte(value)) { | |
716 emitByte(0x6B); | |
717 emitByte(0xC0 | encode); | |
718 emitByte(value & 0xFF); | |
719 } else { | |
720 emitByte(0x69); | |
721 emitByte(0xC0 | encode); | |
722 emitInt(value); | |
723 } | |
724 } | |
725 | |
726 public final void incl(Address dst) { | |
727 // Don't use it directly. Use Macroincrement() instead. | |
728 prefix(dst); | |
729 emitByte(0xFF); | |
730 emitOperandHelper(rax, dst); | |
731 } | |
732 | |
733 public final void jcc(ConditionFlag cc, int jumpTarget, boolean forceDisp32) { | |
734 int shortSize = 2; | |
735 int longSize = 6; | |
736 long disp = jumpTarget - codeBuffer.position(); | |
737 if (!forceDisp32 && isByte(disp - shortSize)) { | |
738 // 0111 tttn #8-bit disp | |
739 emitByte(0x70 | cc.value); | |
740 emitByte((int) ((disp - shortSize) & 0xFF)); | |
741 } else { | |
742 // 0000 1111 1000 tttn #32-bit disp | |
743 assert isInt(disp - longSize) : "must be 32bit offset (call4)"; | |
744 emitByte(0x0F); | |
745 emitByte(0x80 | cc.value); | |
746 emitInt((int) (disp - longSize)); | |
747 } | |
748 } | |
749 | |
750 public final void jcc(ConditionFlag cc, Label l) { | |
751 assert (0 <= cc.value) && (cc.value < 16) : "illegal cc"; | |
752 if (l.isBound()) { | |
753 jcc(cc, l.position(), false); | |
754 } else { | |
755 // Note: could eliminate cond. jumps to this jump if condition | |
756 // is the same however, seems to be rather unlikely case. | |
757 // Note: use jccb() if label to be bound is very close to get | |
758 // an 8-bit displacement | |
759 l.addPatchAt(codeBuffer.position()); | |
760 emitByte(0x0F); | |
761 emitByte(0x80 | cc.value); | |
762 emitInt(0); | |
763 } | |
764 | |
765 } | |
766 | |
767 public final void jccb(ConditionFlag cc, Label l) { | |
768 if (l.isBound()) { | |
769 int shortSize = 2; | |
770 int entry = l.position(); | |
771 assert isByte(entry - (codeBuffer.position() + shortSize)) : "Dispacement too large for a short jmp"; | |
772 long disp = entry - codeBuffer.position(); | |
773 // 0111 tttn #8-bit disp | |
774 emitByte(0x70 | cc.value); | |
775 emitByte((int) ((disp - shortSize) & 0xFF)); | |
776 } else { | |
777 | |
778 l.addPatchAt(codeBuffer.position()); | |
779 emitByte(0x70 | cc.value); | |
780 emitByte(0); | |
781 } | |
782 } | |
783 | |
784 public final void jmp(Address adr) { | |
785 prefix(adr); | |
786 emitByte(0xFF); | |
787 emitOperandHelper(rsp, adr); | |
788 } | |
789 | |
790 public final void jmp(int jumpTarget, boolean forceDisp32) { | |
791 int shortSize = 2; | |
792 int longSize = 5; | |
793 long disp = jumpTarget - codeBuffer.position(); | |
794 if (!forceDisp32 && isByte(disp - shortSize)) { | |
795 emitByte(0xEB); | |
796 emitByte((int) ((disp - shortSize) & 0xFF)); | |
797 } else { | |
798 emitByte(0xE9); | |
799 emitInt((int) (disp - longSize)); | |
800 } | |
801 } | |
802 | |
803 @Override | |
804 public final void jmp(Label l) { | |
805 if (l.isBound()) { | |
806 jmp(l.position(), false); | |
807 } else { | |
808 // By default, forward jumps are always 32-bit displacements, since | |
809 // we can't yet know where the label will be bound. If you're sure that | |
810 // the forward jump will not run beyond 256 bytes, use jmpb to | |
811 // force an 8-bit displacement. | |
812 | |
813 l.addPatchAt(codeBuffer.position()); | |
814 emitByte(0xE9); | |
815 emitInt(0); | |
816 } | |
817 } | |
818 | |
819 public final void jmp(Register entry) { | |
820 int encode = prefixAndEncode(entry.encoding); | |
821 emitByte(0xFF); | |
822 emitByte(0xE0 | encode); | |
823 } | |
824 | |
825 public final void jmpb(Label l) { | |
826 if (l.isBound()) { | |
827 int shortSize = 2; | |
828 int entry = l.position(); | |
829 assert isByte((entry - codeBuffer.position()) + shortSize) : "Dispacement too large for a short jmp"; | |
830 long offs = entry - codeBuffer.position(); | |
831 emitByte(0xEB); | |
832 emitByte((int) ((offs - shortSize) & 0xFF)); | |
833 } else { | |
834 | |
835 l.addPatchAt(codeBuffer.position()); | |
836 emitByte(0xEB); | |
837 emitByte(0); | |
838 } | |
839 } | |
840 | |
841 public final void leaq(Register dst, Address src) { | |
842 prefixq(src, dst); | |
843 emitByte(0x8D); | |
844 emitOperandHelper(dst, src); | |
845 } | |
846 | |
847 public final void enter(int imm16, int imm8) { | |
848 emitByte(0xC8); | |
849 emitShort(imm16); | |
850 emitByte(imm8); | |
851 } | |
852 | |
853 public final void leave() { | |
854 emitByte(0xC9); | |
855 } | |
856 | |
857 public final void lock() { | |
858 if ((Atomics & 1) != 0) { | |
859 // Emit either nothing, a NOP, or a NOP: prefix | |
860 emitByte(0x90); | |
861 } else { | |
862 emitByte(0xF0); | |
863 } | |
864 } | |
865 | |
866 // Emit mfence instruction | |
867 public final void mfence() { | |
868 emitByte(0x0F); | |
869 emitByte(0xAE); | |
870 emitByte(0xF0); | |
871 } | |
872 | |
873 public final void mov(Register dst, Register src) { | |
874 movq(dst, src); | |
875 } | |
876 | |
877 public final void movapd(Register dst, Register src) { | |
878 assert dst.isFpu(); | |
879 assert src.isFpu(); | |
880 int dstenc = dst.encoding; | |
881 int srcenc = src.encoding; | |
882 emitByte(0x66); | |
883 if (dstenc < 8) { | |
884 if (srcenc >= 8) { | |
885 emitByte(Prefix.REXB); | |
886 srcenc -= 8; | |
887 } | |
888 } else { | |
889 if (srcenc < 8) { | |
890 emitByte(Prefix.REXR); | |
891 } else { | |
892 emitByte(Prefix.REXRB); | |
893 srcenc -= 8; | |
894 } | |
895 dstenc -= 8; | |
896 } | |
897 emitByte(0x0F); | |
898 emitByte(0x28); | |
899 emitByte(0xC0 | dstenc << 3 | srcenc); | |
900 } | |
901 | |
902 public final void movaps(Register dst, Register src) { | |
903 assert dst.isFpu(); | |
904 assert src.isFpu(); | |
905 int dstenc = dst.encoding; | |
906 int srcenc = src.encoding; | |
907 if (dstenc < 8) { | |
908 if (srcenc >= 8) { | |
909 emitByte(Prefix.REXB); | |
910 srcenc -= 8; | |
911 } | |
912 } else { | |
913 if (srcenc < 8) { | |
914 emitByte(Prefix.REXR); | |
915 } else { | |
916 emitByte(Prefix.REXRB); | |
917 srcenc -= 8; | |
918 } | |
919 dstenc -= 8; | |
920 } | |
921 emitByte(0x0F); | |
922 emitByte(0x28); | |
923 emitByte(0xC0 | dstenc << 3 | srcenc); | |
924 } | |
925 | |
926 public final void movb(Register dst, Address src) { | |
927 prefix(src, dst); // , true) | |
928 emitByte(0x8A); | |
929 emitOperandHelper(dst, src); | |
930 } | |
931 | |
932 public final void movb(Address dst, int imm8) { | |
933 prefix(dst); | |
934 emitByte(0xC6); | |
935 emitOperandHelper(rax, dst); | |
936 emitByte(imm8); | |
937 } | |
938 | |
939 public final void movb(Address dst, Register src) { | |
940 assert src.isByte() : "must have byte register"; | |
941 prefix(dst, src); // , true) | |
942 emitByte(0x88); | |
943 emitOperandHelper(src, dst); | |
944 } | |
945 | |
946 public final void movdl(Register dst, Register src) { | |
947 if (dst.isFpu()) { | |
948 assert !src.isFpu() : "does this hold?"; | |
949 emitByte(0x66); | |
950 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
951 emitByte(0x0F); | |
952 emitByte(0x6E); | |
953 emitByte(0xC0 | encode); | |
954 } else if (src.isFpu()) { | |
955 assert !dst.isFpu(); | |
956 emitByte(0x66); | |
957 // swap src/dst to get correct prefix | |
958 int encode = prefixAndEncode(src.encoding, dst.encoding); | |
959 emitByte(0x0F); | |
960 emitByte(0x7E); | |
961 emitByte(0xC0 | encode); | |
962 } | |
963 } | |
964 | |
965 public final void movdqa(Register dst, Address src) { | |
966 assert dst.isFpu(); | |
967 emitByte(0x66); | |
968 prefix(src, dst); | |
969 emitByte(0x0F); | |
970 emitByte(0x6F); | |
971 emitOperandHelper(dst, src); | |
972 } | |
973 | |
974 public final void movdqa(Register dst, Register src) { | |
975 assert dst.isFpu(); | |
976 emitByte(0x66); | |
977 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
978 emitByte(0x0F); | |
979 emitByte(0x6F); | |
980 emitByte(0xC0 | encode); | |
981 } | |
982 | |
983 public final void movdqa(Address dst, Register src) { | |
984 assert src.isFpu(); | |
985 emitByte(0x66); | |
986 prefix(dst, src); | |
987 emitByte(0x0F); | |
988 emitByte(0x7F); | |
989 emitOperandHelper(src, dst); | |
990 } | |
991 | |
992 public final void movdqu(Register dst, Address src) { | |
993 assert dst.isFpu(); | |
994 emitByte(0xF3); | |
995 prefix(src, dst); | |
996 emitByte(0x0F); | |
997 emitByte(0x6F); | |
998 emitOperandHelper(dst, src); | |
999 } | |
1000 | |
1001 public final void movdqu(Register dst, Register src) { | |
1002 assert dst.isFpu(); | |
1003 assert src.isFpu(); | |
1004 | |
1005 emitByte(0xF3); | |
1006 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
1007 emitByte(0x0F); | |
1008 emitByte(0x6F); | |
1009 emitByte(0xC0 | encode); | |
1010 } | |
1011 | |
1012 public final void movdqu(Address dst, Register src) { | |
1013 assert src.isFpu(); | |
1014 | |
1015 emitByte(0xF3); | |
1016 prefix(dst, src); | |
1017 emitByte(0x0F); | |
1018 emitByte(0x7F); | |
1019 emitOperandHelper(src, dst); | |
1020 } | |
1021 | |
1022 public final void movl(Register dst, int imm32) { | |
1023 int encode = prefixAndEncode(dst.encoding); | |
1024 emitByte(0xB8 | encode); | |
1025 emitInt(imm32); | |
1026 } | |
1027 | |
1028 public final void movl(Register dst, Register src) { | |
1029 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1030 emitByte(0x8B); | |
1031 emitByte(0xC0 | encode); | |
1032 } | |
1033 | |
1034 public final void movl(Register dst, Address src) { | |
1035 prefix(src, dst); | |
1036 emitByte(0x8B); | |
1037 emitOperandHelper(dst, src); | |
1038 } | |
1039 | |
1040 public final void movl(Address dst, int imm32) { | |
1041 prefix(dst); | |
1042 emitByte(0xC7); | |
1043 emitOperandHelper(rax, dst); | |
1044 emitInt(imm32); | |
1045 } | |
1046 | |
1047 public final void movl(Address dst, Register src) { | |
1048 prefix(dst, src); | |
1049 emitByte(0x89); | |
1050 emitOperandHelper(src, dst); | |
1051 } | |
1052 | |
1053 /** | |
1054 * New CPUs require use of movsd and movss to avoid partial register stall | |
1055 * when loading from memory. But for old Opteron use movlpd instead of movsd. | |
1056 * The selection is done in {@link AMD64MacroAssembler#movdbl(Register, Address)} | |
1057 * and {@link AMD64MacroAssembler#movflt(Register, Register)}. | |
1058 */ | |
1059 public final void movlpd(Register dst, Address src) { | |
1060 assert dst.isFpu(); | |
1061 emitByte(0x66); | |
1062 prefix(src, dst); | |
1063 emitByte(0x0F); | |
1064 emitByte(0x12); | |
1065 emitOperandHelper(dst, src); | |
1066 } | |
1067 | |
1068 public final void movlpd(Address dst, Register src) { | |
1069 assert src.isFpu(); | |
1070 emitByte(0x66); | |
1071 prefix(dst, src); | |
1072 emitByte(0x0F); | |
1073 emitByte(0x13); | |
1074 emitOperandHelper(src, dst); | |
1075 } | |
1076 | |
1077 public final void movq(Register dst, Address src) { | |
1078 if (dst.isFpu()) { | |
1079 emitByte(0xF3); | |
1080 prefixq(src, dst); | |
1081 emitByte(0x0F); | |
1082 emitByte(0x7E); | |
1083 emitOperandHelper(dst, src); | |
1084 } else { | |
1085 prefixq(src, dst); | |
1086 emitByte(0x8B); | |
1087 emitOperandHelper(dst, src); | |
1088 } | |
1089 } | |
1090 | |
1091 public final void movq(Register dst, Register src) { | |
1092 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
1093 emitByte(0x8B); | |
1094 emitByte(0xC0 | encode); | |
1095 } | |
1096 | |
1097 public final void movq(Address dst, Register src) { | |
1098 if (src.isFpu()) { | |
1099 emitByte(0x66); | |
1100 prefixq(dst, src); | |
1101 emitByte(0x0F); | |
1102 emitByte(0xD6); | |
1103 emitOperandHelper(src, dst); | |
1104 } else { | |
1105 prefixq(dst, src); | |
1106 emitByte(0x89); | |
1107 emitOperandHelper(src, dst); | |
1108 } | |
1109 } | |
1110 | |
1111 public final void movsxb(Register dst, Address src) { // movsxb | |
1112 prefix(src, dst); | |
1113 emitByte(0x0F); | |
1114 emitByte(0xBE); | |
1115 emitOperandHelper(dst, src); | |
1116 } | |
1117 | |
1118 public final void movsxb(Register dst, Register src) { // movsxb | |
1119 int encode = prefixAndEncode(dst.encoding, src.encoding, true); | |
1120 emitByte(0x0F); | |
1121 emitByte(0xBE); | |
1122 emitByte(0xC0 | encode); | |
1123 } | |
1124 | |
1125 public final void movsd(Register dst, Register src) { | |
1126 assert dst.isFpu(); | |
1127 assert src.isFpu(); | |
1128 emitByte(0xF2); | |
1129 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1130 emitByte(0x0F); | |
1131 emitByte(0x10); | |
1132 emitByte(0xC0 | encode); | |
1133 } | |
1134 | |
1135 public final void movsd(Register dst, Address src) { | |
1136 assert dst.isFpu(); | |
1137 emitByte(0xF2); | |
1138 prefix(src, dst); | |
1139 emitByte(0x0F); | |
1140 emitByte(0x10); | |
1141 emitOperandHelper(dst, src); | |
1142 } | |
1143 | |
1144 public final void movsd(Address dst, Register src) { | |
1145 assert src.isFpu(); | |
1146 emitByte(0xF2); | |
1147 prefix(dst, src); | |
1148 emitByte(0x0F); | |
1149 emitByte(0x11); | |
1150 emitOperandHelper(src, dst); | |
1151 } | |
1152 | |
1153 public final void movss(Register dst, Register src) { | |
1154 assert dst.isFpu(); | |
1155 assert src.isFpu(); | |
1156 emitByte(0xF3); | |
1157 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1158 emitByte(0x0F); | |
1159 emitByte(0x10); | |
1160 emitByte(0xC0 | encode); | |
1161 } | |
1162 | |
1163 public final void movss(Register dst, Address src) { | |
1164 assert dst.isFpu(); | |
1165 emitByte(0xF3); | |
1166 prefix(src, dst); | |
1167 emitByte(0x0F); | |
1168 emitByte(0x10); | |
1169 emitOperandHelper(dst, src); | |
1170 } | |
1171 | |
1172 public final void movss(Address dst, Register src) { | |
1173 assert src.isFpu(); | |
1174 emitByte(0xF3); | |
1175 prefix(dst, src); | |
1176 emitByte(0x0F); | |
1177 emitByte(0x11); | |
1178 emitOperandHelper(src, dst); | |
1179 } | |
1180 | |
1181 public final void movswl(Register dst, Address src) { | |
1182 prefix(src, dst); | |
1183 emitByte(0x0F); | |
1184 emitByte(0xBF); | |
1185 emitOperandHelper(dst, src); | |
1186 } | |
1187 | |
1188 public final void movsxw(Register dst, Register src) { // movsxw | |
1189 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1190 emitByte(0x0F); | |
1191 emitByte(0xBF); | |
1192 emitByte(0xC0 | encode); | |
1193 } | |
1194 | |
1195 public final void movsxw(Register dst, Address src) { // movsxw | |
1196 prefix(src, dst); | |
1197 emitByte(0x0F); | |
1198 emitByte(0xBF); | |
1199 emitOperandHelper(dst, src); | |
1200 } | |
1201 | |
1202 public final void movzxd(Register dst, Register src) { // movzxd | |
1203 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1204 emitByte(0x63); | |
1205 emitByte(0xC0 | encode); | |
1206 } | |
1207 | |
1208 public final void movzxd(Register dst, Address src) { // movzxd | |
1209 prefix(src, dst); | |
1210 emitByte(0x63); | |
1211 emitOperandHelper(dst, src); | |
1212 } | |
1213 | |
1214 public final void movw(Address dst, int imm16) { | |
1215 emitByte(0x66); // switch to 16-bit mode | |
1216 prefix(dst); | |
1217 emitByte(0xC7); | |
1218 emitOperandHelper(rax, dst); | |
1219 emitShort(imm16); | |
1220 } | |
1221 | |
1222 public final void movw(Register dst, Address src) { | |
1223 emitByte(0x66); | |
1224 prefix(src, dst); | |
1225 emitByte(0x8B); | |
1226 emitOperandHelper(dst, src); | |
1227 } | |
1228 | |
1229 public final void movw(Address dst, Register src) { | |
1230 emitByte(0x66); | |
1231 prefix(dst, src); | |
1232 emitByte(0x89); | |
1233 emitOperandHelper(src, dst); | |
1234 } | |
1235 | |
1236 public final void movzxb(Register dst, Address src) { // movzxb | |
1237 prefix(src, dst); | |
1238 emitByte(0x0F); | |
1239 emitByte(0xB6); | |
1240 emitOperandHelper(dst, src); | |
1241 } | |
1242 | |
1243 public final void movzxb(Register dst, Register src) { // movzxb | |
1244 int encode = prefixAndEncode(dst.encoding, src.encoding, true); | |
1245 emitByte(0x0F); | |
1246 emitByte(0xB6); | |
1247 emitByte(0xC0 | encode); | |
1248 } | |
1249 | |
1250 public final void movzxl(Register dst, Address src) { // movzxw | |
1251 prefix(src, dst); | |
1252 emitByte(0x0F); | |
1253 emitByte(0xB7); | |
1254 emitOperandHelper(dst, src); | |
1255 } | |
1256 | |
1257 public final void movzxl(Register dst, Register src) { // movzxw | |
1258 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1259 emitByte(0x0F); | |
1260 emitByte(0xB7); | |
1261 emitByte(0xC0 | encode); | |
1262 } | |
1263 | |
1264 public final void mull(Address src) { | |
1265 prefix(src); | |
1266 emitByte(0xF7); | |
1267 emitOperandHelper(rsp, src); | |
1268 } | |
1269 | |
1270 public final void mulsd(Register dst, Address src) { | |
1271 assert dst.isFpu(); | |
1272 emitByte(0xF2); | |
1273 prefix(src, dst); | |
1274 emitByte(0x0F); | |
1275 emitByte(0x59); | |
1276 emitOperandHelper(dst, src); | |
1277 } | |
1278 | |
1279 public final void mulsd(Register dst, Register src) { | |
1280 assert dst.isFpu(); | |
1281 assert src.isFpu(); | |
1282 | |
1283 emitByte(0xF2); | |
1284 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1285 emitByte(0x0F); | |
1286 emitByte(0x59); | |
1287 emitByte(0xC0 | encode); | |
1288 } | |
1289 | |
1290 public final void mulss(Register dst, Address src) { | |
1291 assert dst.isFpu(); | |
1292 | |
1293 emitByte(0xF3); | |
1294 prefix(src, dst); | |
1295 emitByte(0x0F); | |
1296 emitByte(0x59); | |
1297 emitOperandHelper(dst, src); | |
1298 } | |
1299 | |
1300 public final void mulss(Register dst, Register src) { | |
1301 assert dst.isFpu(); | |
1302 assert src.isFpu(); | |
1303 emitByte(0xF3); | |
1304 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1305 emitByte(0x0F); | |
1306 emitByte(0x59); | |
1307 emitByte(0xC0 | encode); | |
1308 } | |
1309 | |
1310 public final void negl(Register dst) { | |
1311 int encode = prefixAndEncode(dst.encoding); | |
1312 emitByte(0xF7); | |
1313 emitByte(0xD8 | encode); | |
1314 } | |
1315 | |
1316 public final void ensureUniquePC() { | |
1317 nop(); | |
1318 } | |
1319 | |
1320 public final void nop() { | |
1321 nop(1); | |
1322 } | |
1323 | |
1324 public void nop(int count) { | |
1325 int i = count; | |
1326 if (UseNormalNop) { | |
1327 assert i > 0 : " "; | |
1328 // The fancy nops aren't currently recognized by debuggers making it a | |
1329 // pain to disassemble code while debugging. If assert are on clearly | |
1330 // speed is not an issue so simply use the single byte traditional nop | |
1331 // to do alignment. | |
1332 | |
1333 for (; i > 0; i--) { | |
1334 emitByte(0x90); | |
1335 } | |
1336 return; | |
1337 } | |
1338 | |
1339 if (UseAddressNop) { | |
1340 // | |
1341 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. | |
1342 // 1: 0x90 | |
1343 // 2: 0x66 0x90 | |
1344 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) | |
1345 // 4: 0x0F 0x1F 0x40 0x00 | |
1346 // 5: 0x0F 0x1F 0x44 0x00 0x00 | |
1347 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 | |
1348 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 | |
1349 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1350 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1351 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1352 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1353 | |
1354 // The rest coding is AMD specific - use consecutive Address nops | |
1355 | |
1356 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 | |
1357 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 | |
1358 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 | |
1359 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 | |
1360 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1361 // Size prefixes (0x66) are added for larger sizes | |
1362 | |
1363 while (i >= 22) { | |
1364 i -= 11; | |
1365 emitByte(0x66); // size prefix | |
1366 emitByte(0x66); // size prefix | |
1367 emitByte(0x66); // size prefix | |
1368 addrNop8(); | |
1369 } | |
1370 // Generate first nop for size between 21-12 | |
1371 switch (i) { | |
1372 case 21: | |
1373 i -= 1; | |
1374 emitByte(0x66); // size prefix | |
1375 // fall through | |
1376 case 20: | |
1377 // fall through | |
1378 case 19: | |
1379 i -= 1; | |
1380 emitByte(0x66); // size prefix | |
1381 // fall through | |
1382 case 18: | |
1383 // fall through | |
1384 case 17: | |
1385 i -= 1; | |
1386 emitByte(0x66); // size prefix | |
1387 // fall through | |
1388 case 16: | |
1389 // fall through | |
1390 case 15: | |
1391 i -= 8; | |
1392 addrNop8(); | |
1393 break; | |
1394 case 14: | |
1395 case 13: | |
1396 i -= 7; | |
1397 addrNop7(); | |
1398 break; | |
1399 case 12: | |
1400 i -= 6; | |
1401 emitByte(0x66); // size prefix | |
1402 addrNop5(); | |
1403 break; | |
1404 default: | |
1405 assert i < 12; | |
1406 } | |
1407 | |
1408 // Generate second nop for size between 11-1 | |
1409 switch (i) { | |
1410 case 11: | |
1411 emitByte(0x66); // size prefix | |
1412 emitByte(0x66); // size prefix | |
1413 emitByte(0x66); // size prefix | |
1414 addrNop8(); | |
1415 break; | |
1416 case 10: | |
1417 emitByte(0x66); // size prefix | |
1418 emitByte(0x66); // size prefix | |
1419 addrNop8(); | |
1420 break; | |
1421 case 9: | |
1422 emitByte(0x66); // size prefix | |
1423 addrNop8(); | |
1424 break; | |
1425 case 8: | |
1426 addrNop8(); | |
1427 break; | |
1428 case 7: | |
1429 addrNop7(); | |
1430 break; | |
1431 case 6: | |
1432 emitByte(0x66); // size prefix | |
1433 addrNop5(); | |
1434 break; | |
1435 case 5: | |
1436 addrNop5(); | |
1437 break; | |
1438 case 4: | |
1439 addrNop4(); | |
1440 break; | |
1441 case 3: | |
1442 // Don't use "0x0F 0x1F 0x00" - need patching safe padding | |
1443 emitByte(0x66); // size prefix | |
1444 emitByte(0x66); // size prefix | |
1445 emitByte(0x90); // nop | |
1446 break; | |
1447 case 2: | |
1448 emitByte(0x66); // size prefix | |
1449 emitByte(0x90); // nop | |
1450 break; | |
1451 case 1: | |
1452 emitByte(0x90); // nop | |
1453 break; | |
1454 default: | |
1455 assert i == 0; | |
1456 } | |
1457 return; | |
1458 } | |
1459 | |
1460 // Using nops with size prefixes "0x66 0x90". | |
1461 // From AMD Optimization Guide: | |
1462 // 1: 0x90 | |
1463 // 2: 0x66 0x90 | |
1464 // 3: 0x66 0x66 0x90 | |
1465 // 4: 0x66 0x66 0x66 0x90 | |
1466 // 5: 0x66 0x66 0x90 0x66 0x90 | |
1467 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 | |
1468 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 | |
1469 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 | |
1470 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 | |
1471 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 | |
1472 // | |
1473 while (i > 12) { | |
1474 i -= 4; | |
1475 emitByte(0x66); // size prefix | |
1476 emitByte(0x66); | |
1477 emitByte(0x66); | |
1478 emitByte(0x90); // nop | |
1479 } | |
1480 // 1 - 12 nops | |
1481 if (i > 8) { | |
1482 if (i > 9) { | |
1483 i -= 1; | |
1484 emitByte(0x66); | |
1485 } | |
1486 i -= 3; | |
1487 emitByte(0x66); | |
1488 emitByte(0x66); | |
1489 emitByte(0x90); | |
1490 } | |
1491 // 1 - 8 nops | |
1492 if (i > 4) { | |
1493 if (i > 6) { | |
1494 i -= 1; | |
1495 emitByte(0x66); | |
1496 } | |
1497 i -= 3; | |
1498 emitByte(0x66); | |
1499 emitByte(0x66); | |
1500 emitByte(0x90); | |
1501 } | |
1502 switch (i) { | |
1503 case 4: | |
1504 emitByte(0x66); | |
1505 emitByte(0x66); | |
1506 emitByte(0x66); | |
1507 emitByte(0x90); | |
1508 break; | |
1509 case 3: | |
1510 emitByte(0x66); | |
1511 emitByte(0x66); | |
1512 emitByte(0x90); | |
1513 break; | |
1514 case 2: | |
1515 emitByte(0x66); | |
1516 emitByte(0x90); | |
1517 break; | |
1518 case 1: | |
1519 emitByte(0x90); | |
1520 break; | |
1521 default: | |
1522 assert i == 0; | |
1523 } | |
1524 } | |
1525 | |
1526 public final void notl(Register dst) { | |
1527 int encode = prefixAndEncode(dst.encoding); | |
1528 emitByte(0xF7); | |
1529 emitByte(0xD0 | encode); | |
1530 } | |
1531 | |
1532 public final void orl(Address dst, int imm32) { | |
1533 prefix(dst); | |
1534 emitByte(0x81); | |
1535 emitOperandHelper(rcx, dst); | |
1536 emitInt(imm32); | |
1537 } | |
1538 | |
1539 public final void orl(Register dst, int imm32) { | |
1540 prefix(dst); | |
1541 emitArith(0x81, 0xC8, dst, imm32); | |
1542 } | |
1543 | |
1544 public final void orl(Register dst, Address src) { | |
1545 prefix(src, dst); | |
1546 emitByte(0x0B); | |
1547 emitOperandHelper(dst, src); | |
1548 } | |
1549 | |
1550 public final void orl(Register dst, Register src) { | |
1551 prefixAndEncode(dst.encoding, src.encoding); | |
1552 emitArith(0x0B, 0xC0, dst, src); | |
1553 } | |
1554 | |
1555 // generic | |
1556 public final void pop(Register dst) { | |
1557 int encode = prefixAndEncode(dst.encoding); | |
1558 emitByte(0x58 | encode); | |
1559 } | |
1560 | |
1561 public final void prefetchPrefix(Address src) { | |
1562 prefix(src); | |
1563 emitByte(0x0F); | |
1564 } | |
1565 | |
1566 public final void prefetchnta(Address src) { | |
1567 prefetchPrefix(src); | |
1568 emitByte(0x18); | |
1569 emitOperandHelper(rax, src); // 0, src | |
1570 } | |
1571 | |
1572 public final void prefetchr(Address src) { | |
1573 prefetchPrefix(src); | |
1574 emitByte(0x0D); | |
1575 emitOperandHelper(rax, src); // 0, src | |
1576 } | |
1577 | |
1578 public final void prefetcht0(Address src) { | |
1579 prefetchPrefix(src); | |
1580 emitByte(0x18); | |
1581 emitOperandHelper(rcx, src); // 1, src | |
1582 | |
1583 } | |
1584 | |
1585 public final void prefetcht1(Address src) { | |
1586 prefetchPrefix(src); | |
1587 emitByte(0x18); | |
1588 emitOperandHelper(rdx, src); // 2, src | |
1589 } | |
1590 | |
1591 public final void prefetcht2(Address src) { | |
1592 prefetchPrefix(src); | |
1593 emitByte(0x18); | |
1594 emitOperandHelper(rbx, src); // 3, src | |
1595 } | |
1596 | |
1597 public final void prefetchw(Address src) { | |
1598 prefetchPrefix(src); | |
1599 emitByte(0x0D); | |
1600 emitOperandHelper(rcx, src); // 1, src | |
1601 } | |
1602 | |
1603 public final void pshufd(Register dst, Register src, int mode) { | |
1604 assert dst.isFpu(); | |
1605 assert src.isFpu(); | |
1606 assert isUByte(mode) : "invalid value"; | |
1607 | |
1608 emitByte(0x66); | |
1609 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1610 emitByte(0x0F); | |
1611 emitByte(0x70); | |
1612 emitByte(0xC0 | encode); | |
1613 emitByte(mode & 0xFF); | |
1614 } | |
1615 | |
1616 public final void pshufd(Register dst, Address src, int mode) { | |
1617 assert dst.isFpu(); | |
1618 assert isUByte(mode) : "invalid value"; | |
1619 | |
1620 emitByte(0x66); | |
1621 prefix(src, dst); | |
1622 emitByte(0x0F); | |
1623 emitByte(0x70); | |
1624 emitOperandHelper(dst, src); | |
1625 emitByte(mode & 0xFF); | |
1626 | |
1627 } | |
1628 | |
1629 public final void pshuflw(Register dst, Register src, int mode) { | |
1630 assert dst.isFpu(); | |
1631 assert src.isFpu(); | |
1632 assert isUByte(mode) : "invalid value"; | |
1633 | |
1634 emitByte(0xF2); | |
1635 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1636 emitByte(0x0F); | |
1637 emitByte(0x70); | |
1638 emitByte(0xC0 | encode); | |
1639 emitByte(mode & 0xFF); | |
1640 } | |
1641 | |
1642 public final void pshuflw(Register dst, Address src, int mode) { | |
1643 assert dst.isFpu(); | |
1644 assert isUByte(mode) : "invalid value"; | |
1645 | |
1646 emitByte(0xF2); | |
1647 prefix(src, dst); // QQ new | |
1648 emitByte(0x0F); | |
1649 emitByte(0x70); | |
1650 emitOperandHelper(dst, src); | |
1651 emitByte(mode & 0xFF); | |
1652 } | |
1653 | |
1654 public final void psrlq(Register dst, int shift) { | |
1655 assert dst.isFpu(); | |
1656 // HMM Table D-1 says sse2 or mmx | |
1657 | |
1658 int encode = prefixqAndEncode(xmm2.encoding, dst.encoding); | |
1659 emitByte(0x66); | |
1660 emitByte(0x0F); | |
1661 emitByte(0x73); | |
1662 emitByte(0xC0 | encode); | |
1663 emitByte(shift); | |
1664 } | |
1665 | |
1666 public final void punpcklbw(Register dst, Register src) { | |
1667 assert dst.isFpu(); | |
1668 assert src.isFpu(); | |
1669 emitByte(0x66); | |
1670 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1671 emitByte(0x0F); | |
1672 emitByte(0x60); | |
1673 emitByte(0xC0 | encode); | |
1674 } | |
1675 | |
1676 public final void push(int imm32) { | |
1677 // in 64bits we push 64bits onto the stack but only | |
1678 // take a 32bit immediate | |
1679 emitByte(0x68); | |
1680 emitInt(imm32); | |
1681 } | |
1682 | |
1683 public final void push(Register src) { | |
1684 int encode = prefixAndEncode(src.encoding); | |
1685 emitByte(0x50 | encode); | |
1686 } | |
1687 | |
1688 public final void pushf() { | |
1689 emitByte(0x9C); | |
1690 } | |
1691 | |
1692 public final void pxor(Register dst, Address src) { | |
1693 assert dst.isFpu(); | |
1694 | |
1695 emitByte(0x66); | |
1696 prefix(src, dst); | |
1697 emitByte(0x0F); | |
1698 emitByte(0xEF); | |
1699 emitOperandHelper(dst, src); | |
1700 } | |
1701 | |
1702 public final void pxor(Register dst, Register src) { | |
1703 assert dst.isFpu(); | |
1704 assert src.isFpu(); | |
1705 | |
1706 emitByte(0x66); | |
1707 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1708 emitByte(0x0F); | |
1709 emitByte(0xEF); | |
1710 emitByte(0xC0 | encode); | |
1711 | |
1712 } | |
1713 | |
1714 public final void rcll(Register dst, int imm8) { | |
1715 assert isShiftCount(imm8) : "illegal shift count"; | |
1716 int encode = prefixAndEncode(dst.encoding); | |
1717 if (imm8 == 1) { | |
1718 emitByte(0xD1); | |
1719 emitByte(0xD0 | encode); | |
1720 } else { | |
1721 emitByte(0xC1); | |
1722 emitByte(0xD0 | encode); | |
1723 emitByte(imm8); | |
1724 } | |
1725 } | |
1726 | |
1727 public final void pause() { | |
1728 emitByte(0xF3); | |
1729 emitByte(0x90); | |
1730 } | |
1731 | |
1732 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx heap words. | |
1733 public final void repeatMoveWords() { | |
1734 emitByte(0xF3); | |
1735 emitByte(Prefix.REXW); | |
1736 emitByte(0xA5); | |
1737 } | |
1738 | |
1739 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx bytes. | |
1740 public final void repeatMoveBytes() { | |
1741 emitByte(0xF3); | |
1742 emitByte(Prefix.REXW); | |
1743 emitByte(0xA4); | |
1744 } | |
1745 | |
1746 // sets X86.rcx pointer sized words with X86.rax, value at [edi] | |
1747 // generic | |
1748 public final void repSet() { // repSet | |
1749 emitByte(0xF3); | |
1750 // STOSQ | |
1751 emitByte(Prefix.REXW); | |
1752 emitByte(0xAB); | |
1753 } | |
1754 | |
1755 // scans X86.rcx pointer sized words at [edi] for occurance of X86.rax, | |
1756 // generic | |
1757 public final void repneScan() { // repneScan | |
1758 emitByte(0xF2); | |
1759 // SCASQ | |
1760 emitByte(Prefix.REXW); | |
1761 emitByte(0xAF); | |
1762 } | |
1763 | |
1764 // scans X86.rcx 4 byte words at [edi] for occurance of X86.rax, | |
1765 // generic | |
1766 public final void repneScanl() { // repneScan | |
1767 emitByte(0xF2); | |
1768 // SCASL | |
1769 emitByte(0xAF); | |
1770 } | |
1771 | |
1772 public final void ret(int imm16) { | |
1773 if (imm16 == 0) { | |
1774 emitByte(0xC3); | |
1775 } else { | |
1776 emitByte(0xC2); | |
1777 emitShort(imm16); | |
1778 } | |
1779 } | |
1780 | |
1781 public final void sarl(Register dst, int imm8) { | |
1782 int encode = prefixAndEncode(dst.encoding); | |
1783 assert isShiftCount(imm8) : "illegal shift count"; | |
1784 if (imm8 == 1) { | |
1785 emitByte(0xD1); | |
1786 emitByte(0xF8 | encode); | |
1787 } else { | |
1788 emitByte(0xC1); | |
1789 emitByte(0xF8 | encode); | |
1790 emitByte(imm8); | |
1791 } | |
1792 } | |
1793 | |
1794 public final void sarl(Register dst) { | |
1795 int encode = prefixAndEncode(dst.encoding); | |
1796 emitByte(0xD3); | |
1797 emitByte(0xF8 | encode); | |
1798 } | |
1799 | |
1800 public final void sbbl(Address dst, int imm32) { | |
1801 prefix(dst); | |
1802 emitArithOperand(0x81, rbx, dst, imm32); | |
1803 } | |
1804 | |
1805 public final void sbbl(Register dst, int imm32) { | |
1806 prefix(dst); | |
1807 emitArith(0x81, 0xD8, dst, imm32); | |
1808 } | |
1809 | |
1810 public final void sbbl(Register dst, Address src) { | |
1811 prefix(src, dst); | |
1812 emitByte(0x1B); | |
1813 emitOperandHelper(dst, src); | |
1814 } | |
1815 | |
1816 public final void sbbl(Register dst, Register src) { | |
1817 prefixAndEncode(dst.encoding, src.encoding); | |
1818 emitArith(0x1B, 0xC0, dst, src); | |
1819 } | |
1820 | |
1821 public final void setb(ConditionFlag cc, Register dst) { | |
1822 assert 0 <= cc.value && cc.value < 16 : "illegal cc"; | |
1823 int encode = prefixAndEncode(dst.encoding, true); | |
1824 emitByte(0x0F); | |
1825 emitByte(0x90 | cc.value); | |
1826 emitByte(0xC0 | encode); | |
1827 } | |
1828 | |
1829 public final void shll(Register dst, int imm8) { | |
1830 assert isShiftCount(imm8) : "illegal shift count"; | |
1831 int encode = prefixAndEncode(dst.encoding); | |
1832 if (imm8 == 1) { | |
1833 emitByte(0xD1); | |
1834 emitByte(0xE0 | encode); | |
1835 } else { | |
1836 emitByte(0xC1); | |
1837 emitByte(0xE0 | encode); | |
1838 emitByte(imm8); | |
1839 } | |
1840 } | |
1841 | |
1842 public final void shll(Register dst) { | |
1843 int encode = prefixAndEncode(dst.encoding); | |
1844 emitByte(0xD3); | |
1845 emitByte(0xE0 | encode); | |
1846 } | |
1847 | |
1848 public final void shrl(Register dst, int imm8) { | |
1849 assert isShiftCount(imm8) : "illegal shift count"; | |
1850 int encode = prefixAndEncode(dst.encoding); | |
1851 emitByte(0xC1); | |
1852 emitByte(0xE8 | encode); | |
1853 emitByte(imm8); | |
1854 } | |
1855 | |
1856 public final void shrl(Register dst) { | |
1857 int encode = prefixAndEncode(dst.encoding); | |
1858 emitByte(0xD3); | |
1859 emitByte(0xE8 | encode); | |
1860 } | |
1861 | |
1862 // copies a single word from [esi] to [edi] | |
1863 public final void smovl() { | |
1864 emitByte(0xA5); | |
1865 } | |
1866 | |
1867 public final void sqrtsd(Register dst, Register src) { | |
1868 assert dst.isFpu(); | |
1869 assert src.isFpu(); | |
1870 // HMM Table D-1 says sse2 | |
1871 // assert is64 || target.supportsSSE(); | |
1872 emitByte(0xF2); | |
1873 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1874 emitByte(0x0F); | |
1875 emitByte(0x51); | |
1876 emitByte(0xC0 | encode); | |
1877 } | |
1878 | |
1879 public final void subl(Address dst, int imm32) { | |
1880 prefix(dst); | |
1881 if (isByte(imm32)) { | |
1882 emitByte(0x83); | |
1883 emitOperandHelper(rbp, dst); | |
1884 emitByte(imm32 & 0xFF); | |
1885 } else { | |
1886 emitByte(0x81); | |
1887 emitOperandHelper(rbp, dst); | |
1888 emitInt(imm32); | |
1889 } | |
1890 } | |
1891 | |
1892 public final void subl(Register dst, int imm32) { | |
1893 prefix(dst); | |
1894 emitArith(0x81, 0xE8, dst, imm32); | |
1895 } | |
1896 | |
1897 public final void subl(Address dst, Register src) { | |
1898 prefix(dst, src); | |
1899 emitByte(0x29); | |
1900 emitOperandHelper(src, dst); | |
1901 } | |
1902 | |
1903 public final void subl(Register dst, Address src) { | |
1904 prefix(src, dst); | |
1905 emitByte(0x2B); | |
1906 emitOperandHelper(dst, src); | |
1907 } | |
1908 | |
1909 public final void subl(Register dst, Register src) { | |
1910 prefixAndEncode(dst.encoding, src.encoding); | |
1911 emitArith(0x2B, 0xC0, dst, src); | |
1912 } | |
1913 | |
1914 public final void subsd(Register dst, Register src) { | |
1915 assert dst.isFpu(); | |
1916 assert src.isFpu(); | |
1917 emitByte(0xF2); | |
1918 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1919 emitByte(0x0F); | |
1920 emitByte(0x5C); | |
1921 emitByte(0xC0 | encode); | |
1922 } | |
1923 | |
1924 public final void subsd(Register dst, Address src) { | |
1925 assert dst.isFpu(); | |
1926 | |
1927 emitByte(0xF2); | |
1928 prefix(src, dst); | |
1929 emitByte(0x0F); | |
1930 emitByte(0x5C); | |
1931 emitOperandHelper(dst, src); | |
1932 } | |
1933 | |
1934 public final void subss(Register dst, Register src) { | |
1935 assert dst.isFpu(); | |
1936 assert src.isFpu(); | |
1937 emitByte(0xF3); | |
1938 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1939 emitByte(0x0F); | |
1940 emitByte(0x5C); | |
1941 emitByte(0xC0 | encode); | |
1942 } | |
1943 | |
1944 public final void subss(Register dst, Address src) { | |
1945 assert dst.isFpu(); | |
1946 | |
1947 emitByte(0xF3); | |
1948 prefix(src, dst); | |
1949 emitByte(0x0F); | |
1950 emitByte(0x5C); | |
1951 emitOperandHelper(dst, src); | |
1952 } | |
1953 | |
1954 public final void testb(Register dst, int imm8) { | |
1955 prefixAndEncode(dst.encoding, true); | |
1956 emitArithB(0xF6, 0xC0, dst, imm8); | |
1957 } | |
1958 | |
1959 public final void testl(Register dst, int imm32) { | |
1960 // not using emitArith because test | |
1961 // doesn't support sign-extension of | |
1962 // 8bit operands | |
1963 int encode = dst.encoding; | |
1964 if (encode == 0) { | |
1965 emitByte(0xA9); | |
1966 } else { | |
1967 encode = prefixAndEncode(encode); | |
1968 emitByte(0xF7); | |
1969 emitByte(0xC0 | encode); | |
1970 } | |
1971 emitInt(imm32); | |
1972 } | |
1973 | |
1974 public final void testl(Register dst, Register src) { | |
1975 prefixAndEncode(dst.encoding, src.encoding); | |
1976 emitArith(0x85, 0xC0, dst, src); | |
1977 } | |
1978 | |
1979 public final void testl(Register dst, Address src) { | |
1980 prefix(src, dst); | |
1981 emitByte(0x85); | |
1982 emitOperandHelper(dst, src); | |
1983 } | |
1984 | |
1985 public final void ucomisd(Register dst, Address src) { | |
1986 assert dst.isFpu(); | |
1987 emitByte(0x66); | |
1988 ucomiss(dst, src); | |
1989 } | |
1990 | |
1991 public final void ucomisd(Register dst, Register src) { | |
1992 assert dst.isFpu(); | |
1993 assert src.isFpu(); | |
1994 emitByte(0x66); | |
1995 ucomiss(dst, src); | |
1996 } | |
1997 | |
1998 public final void ucomiss(Register dst, Address src) { | |
1999 assert dst.isFpu(); | |
2000 | |
2001 prefix(src, dst); | |
2002 emitByte(0x0F); | |
2003 emitByte(0x2E); | |
2004 emitOperandHelper(dst, src); | |
2005 } | |
2006 | |
2007 public final void ucomiss(Register dst, Register src) { | |
2008 assert dst.isFpu(); | |
2009 assert src.isFpu(); | |
2010 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2011 emitByte(0x0F); | |
2012 emitByte(0x2E); | |
2013 emitByte(0xC0 | encode); | |
2014 } | |
2015 | |
2016 public final void xaddl(Address dst, Register src) { | |
2017 assert src.isFpu(); | |
2018 | |
2019 prefix(dst, src); | |
2020 emitByte(0x0F); | |
2021 emitByte(0xC1); | |
2022 emitOperandHelper(src, dst); | |
2023 } | |
2024 | |
2025 public final void xchgl(Register dst, Address src) { // xchg | |
2026 prefix(src, dst); | |
2027 emitByte(0x87); | |
2028 emitOperandHelper(dst, src); | |
2029 } | |
2030 | |
2031 public final void xchgl(Register dst, Register src) { | |
2032 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2033 emitByte(0x87); | |
2034 emitByte(0xc0 | encode); | |
2035 } | |
2036 | |
2037 public final void xorl(Register dst, int imm32) { | |
2038 prefix(dst); | |
2039 emitArith(0x81, 0xF0, dst, imm32); | |
2040 } | |
2041 | |
2042 public final void xorl(Register dst, Address src) { | |
2043 prefix(src, dst); | |
2044 emitByte(0x33); | |
2045 emitOperandHelper(dst, src); | |
2046 } | |
2047 | |
2048 public final void xorl(Register dst, Register src) { | |
2049 prefixAndEncode(dst.encoding, src.encoding); | |
2050 emitArith(0x33, 0xC0, dst, src); | |
2051 } | |
2052 | |
2053 public final void andpd(Register dst, Register src) { | |
2054 emitByte(0x66); | |
2055 andps(dst, src); | |
2056 } | |
2057 | |
2058 public final void andpd(Register dst, Address src) { | |
2059 emitByte(0x66); | |
2060 andps(dst, src); | |
2061 } | |
2062 | |
2063 public final void andps(Register dst, Register src) { | |
2064 assert dst.isFpu() && src.isFpu(); | |
2065 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2066 emitByte(0x0F); | |
2067 emitByte(0x54); | |
2068 emitByte(0xC0 | encode); | |
2069 } | |
2070 | |
2071 public final void andps(Register dst, Address src) { | |
2072 assert dst.isFpu(); | |
2073 prefix(src, dst); | |
2074 emitByte(0x0F); | |
2075 emitByte(0x54); | |
2076 emitOperandHelper(dst, src); | |
2077 } | |
2078 | |
2079 public final void orpd(Register dst, Register src) { | |
2080 emitByte(0x66); | |
2081 orps(dst, src); | |
2082 } | |
2083 | |
2084 public final void orpd(Register dst, Address src) { | |
2085 emitByte(0x66); | |
2086 orps(dst, src); | |
2087 } | |
2088 | |
2089 public final void orps(Register dst, Register src) { | |
2090 assert dst.isFpu() && src.isFpu(); | |
2091 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2092 emitByte(0x0F); | |
2093 emitByte(0x56); | |
2094 emitByte(0xC0 | encode); | |
2095 } | |
2096 | |
2097 public final void orps(Register dst, Address src) { | |
2098 assert dst.isFpu(); | |
2099 prefix(src, dst); | |
2100 emitByte(0x0F); | |
2101 emitByte(0x56); | |
2102 emitOperandHelper(dst, src); | |
2103 } | |
2104 | |
2105 public final void xorpd(Register dst, Register src) { | |
2106 emitByte(0x66); | |
2107 xorps(dst, src); | |
2108 } | |
2109 | |
2110 public final void xorpd(Register dst, Address src) { | |
2111 emitByte(0x66); | |
2112 xorps(dst, src); | |
2113 } | |
2114 | |
2115 public final void xorps(Register dst, Register src) { | |
2116 assert dst.isFpu() && src.isFpu(); | |
2117 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2118 emitByte(0x0F); | |
2119 emitByte(0x57); | |
2120 emitByte(0xC0 | encode); | |
2121 } | |
2122 | |
2123 public final void xorps(Register dst, Address src) { | |
2124 assert dst.isFpu(); | |
2125 prefix(src, dst); | |
2126 emitByte(0x0F); | |
2127 emitByte(0x57); | |
2128 emitOperandHelper(dst, src); | |
2129 } | |
2130 | |
2131 // 32bit only pieces of the assembler | |
2132 | |
2133 public final void decl(Register dst) { | |
2134 // Don't use it directly. Use Macrodecrementl() instead. | |
2135 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) | |
2136 int encode = prefixAndEncode(dst.encoding); | |
2137 emitByte(0xFF); | |
2138 emitByte(0xC8 | encode); | |
2139 } | |
2140 | |
2141 public final void incl(Register dst) { | |
2142 // Don't use it directly. Use Macroincrementl() instead. | |
2143 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) | |
2144 int encode = prefixAndEncode(dst.encoding); | |
2145 emitByte(0xFF); | |
2146 emitByte(0xC0 | encode); | |
2147 } | |
2148 | |
2149 int prefixAndEncode(int regEnc) { | |
2150 return prefixAndEncode(regEnc, false); | |
2151 } | |
2152 | |
2153 int prefixAndEncode(int regEnc, boolean byteinst) { | |
2154 if (regEnc >= 8) { | |
2155 emitByte(Prefix.REXB); | |
2156 return regEnc - 8; | |
2157 } else if (byteinst && regEnc >= 4) { | |
2158 emitByte(Prefix.REX); | |
2159 } | |
2160 return regEnc; | |
2161 } | |
2162 | |
2163 int prefixqAndEncode(int regEnc) { | |
2164 if (regEnc < 8) { | |
2165 emitByte(Prefix.REXW); | |
2166 return regEnc; | |
2167 } else { | |
2168 emitByte(Prefix.REXWB); | |
2169 return regEnc - 8; | |
2170 } | |
2171 } | |
2172 | |
2173 int prefixAndEncode(int dstEnc, int srcEnc) { | |
2174 return prefixAndEncode(dstEnc, srcEnc, false); | |
2175 } | |
2176 | |
2177 int prefixAndEncode(int dstEncoding, int srcEncoding, boolean byteinst) { | |
2178 int srcEnc = srcEncoding; | |
2179 int dstEnc = dstEncoding; | |
2180 if (dstEnc < 8) { | |
2181 if (srcEnc >= 8) { | |
2182 emitByte(Prefix.REXB); | |
2183 srcEnc -= 8; | |
2184 } else if (byteinst && srcEnc >= 4) { | |
2185 emitByte(Prefix.REX); | |
2186 } | |
2187 } else { | |
2188 if (srcEnc < 8) { | |
2189 emitByte(Prefix.REXR); | |
2190 } else { | |
2191 emitByte(Prefix.REXRB); | |
2192 srcEnc -= 8; | |
2193 } | |
2194 dstEnc -= 8; | |
2195 } | |
2196 return dstEnc << 3 | srcEnc; | |
2197 } | |
2198 | |
2199 /** | |
2200 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand prefix. If the given | |
2201 * operands exceed 3 bits, the 4th bit is encoded in the prefix. | |
2202 * | |
2203 * @param regEncoding the encoding of the register part of the ModRM-Byte | |
2204 * @param rmEncoding the encoding of the r/m part of the ModRM-Byte | |
2205 * @return the lower 6 bits of the ModRM-Byte that should be emitted | |
2206 */ | |
2207 private int prefixqAndEncode(int regEncoding, int rmEncoding) { | |
2208 int rmEnc = rmEncoding; | |
2209 int regEnc = regEncoding; | |
2210 if (regEnc < 8) { | |
2211 if (rmEnc < 8) { | |
2212 emitByte(Prefix.REXW); | |
2213 } else { | |
2214 emitByte(Prefix.REXWB); | |
2215 rmEnc -= 8; | |
2216 } | |
2217 } else { | |
2218 if (rmEnc < 8) { | |
2219 emitByte(Prefix.REXWR); | |
2220 } else { | |
2221 emitByte(Prefix.REXWRB); | |
2222 rmEnc -= 8; | |
2223 } | |
2224 regEnc -= 8; | |
2225 } | |
2226 return regEnc << 3 | rmEnc; | |
2227 } | |
2228 | |
2229 private void prefix(Register reg) { | |
2230 if (reg.encoding >= 8) { | |
2231 emitByte(Prefix.REXB); | |
2232 } | |
2233 } | |
2234 | |
2235 private static boolean needsRex(Value value) { | |
2236 return isRegister(value) && asRegister(value).encoding >= MinEncodingNeedsRex; | |
2237 } | |
2238 | |
2239 | |
2240 private void prefix(Address adr) { | |
2241 if (needsRex(adr.getBase())) { | |
2242 if (needsRex(adr.getIndex())) { | |
2243 emitByte(Prefix.REXXB); | |
2244 } else { | |
2245 emitByte(Prefix.REXB); | |
2246 } | |
2247 } else { | |
2248 if (needsRex(adr.getIndex())) { | |
2249 emitByte(Prefix.REXX); | |
2250 } | |
2251 } | |
2252 } | |
2253 | |
2254 private void prefixq(Address adr) { | |
2255 if (needsRex(adr.getBase())) { | |
2256 if (needsRex(adr.getIndex())) { | |
2257 emitByte(Prefix.REXWXB); | |
2258 } else { | |
2259 emitByte(Prefix.REXWB); | |
2260 } | |
2261 } else { | |
2262 if (needsRex(adr.getIndex())) { | |
2263 emitByte(Prefix.REXWX); | |
2264 } else { | |
2265 emitByte(Prefix.REXW); | |
2266 } | |
2267 } | |
2268 } | |
2269 | |
2270 private void prefix(Address adr, Register reg) { | |
2271 if (reg.encoding < 8) { | |
2272 if (needsRex(adr.getBase())) { | |
2273 if (needsRex(adr.getIndex())) { | |
2274 emitByte(Prefix.REXXB); | |
2275 } else { | |
2276 emitByte(Prefix.REXB); | |
2277 } | |
2278 } else { | |
2279 if (needsRex(adr.getIndex())) { | |
2280 emitByte(Prefix.REXX); | |
2281 } else if (reg.encoding >= 4) { | |
2282 emitByte(Prefix.REX); | |
2283 } | |
2284 } | |
2285 } else { | |
2286 if (needsRex(adr.getBase())) { | |
2287 if (needsRex(adr.getIndex())) { | |
2288 emitByte(Prefix.REXRXB); | |
2289 } else { | |
2290 emitByte(Prefix.REXRB); | |
2291 } | |
2292 } else { | |
2293 if (needsRex(adr.getIndex())) { | |
2294 emitByte(Prefix.REXRX); | |
2295 } else { | |
2296 emitByte(Prefix.REXR); | |
2297 } | |
2298 } | |
2299 } | |
2300 } | |
2301 | |
2302 private void prefixq(Address adr, Register src) { | |
2303 if (src.encoding < 8) { | |
2304 if (needsRex(adr.getBase())) { | |
2305 if (needsRex(adr.getIndex())) { | |
2306 emitByte(Prefix.REXWXB); | |
2307 } else { | |
2308 emitByte(Prefix.REXWB); | |
2309 } | |
2310 } else { | |
2311 if (needsRex(adr.getIndex())) { | |
2312 emitByte(Prefix.REXWX); | |
2313 } else { | |
2314 emitByte(Prefix.REXW); | |
2315 } | |
2316 } | |
2317 } else { | |
2318 if (needsRex(adr.getBase())) { | |
2319 if (needsRex(adr.getIndex())) { | |
2320 emitByte(Prefix.REXWRXB); | |
2321 } else { | |
2322 emitByte(Prefix.REXWRB); | |
2323 } | |
2324 } else { | |
2325 if (needsRex(adr.getIndex())) { | |
2326 emitByte(Prefix.REXWRX); | |
2327 } else { | |
2328 emitByte(Prefix.REXWR); | |
2329 } | |
2330 } | |
2331 } | |
2332 } | |
2333 | |
2334 public final void addq(Address dst, int imm32) { | |
2335 prefixq(dst); | |
2336 emitArithOperand(0x81, rax, dst, imm32); | |
2337 } | |
2338 | |
2339 public final void addq(Address dst, Register src) { | |
2340 prefixq(dst, src); | |
2341 emitByte(0x01); | |
2342 emitOperandHelper(src, dst); | |
2343 } | |
2344 | |
2345 public final void addq(Register dst, int imm32) { | |
2346 prefixqAndEncode(dst.encoding); | |
2347 emitArith(0x81, 0xC0, dst, imm32); | |
2348 } | |
2349 | |
2350 public final void addq(Register dst, Address src) { | |
2351 prefixq(src, dst); | |
2352 emitByte(0x03); | |
2353 emitOperandHelper(dst, src); | |
2354 } | |
2355 | |
2356 public final void addq(Register dst, Register src) { | |
2357 prefixqAndEncode(dst.encoding, src.encoding); | |
2358 emitArith(0x03, 0xC0, dst, src); | |
2359 } | |
2360 | |
2361 public final void andq(Register dst, int imm32) { | |
2362 prefixqAndEncode(dst.encoding); | |
2363 emitArith(0x81, 0xE0, dst, imm32); | |
2364 } | |
2365 | |
2366 public final void andq(Register dst, Address src) { | |
2367 prefixq(src, dst); | |
2368 emitByte(0x23); | |
2369 emitOperandHelper(dst, src); | |
2370 } | |
2371 | |
2372 public final void andq(Register dst, Register src) { | |
2373 prefixqAndEncode(dst.encoding, src.encoding); | |
2374 emitArith(0x23, 0xC0, dst, src); | |
2375 } | |
2376 | |
2377 public final void bswapq(Register reg) { | |
2378 int encode = prefixqAndEncode(reg.encoding); | |
2379 emitByte(0x0F); | |
2380 emitByte(0xC8 | encode); | |
2381 } | |
2382 | |
2383 public final void cdqq() { | |
2384 emitByte(Prefix.REXW); | |
2385 emitByte(0x99); | |
2386 } | |
2387 | |
2388 public final void cmovq(ConditionFlag cc, Register dst, Register src) { | |
2389 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2390 emitByte(0x0F); | |
2391 emitByte(0x40 | cc.value); | |
2392 emitByte(0xC0 | encode); | |
2393 } | |
2394 | |
2395 public final void cmovq(ConditionFlag cc, Register dst, Address src) { | |
2396 prefixq(src, dst); | |
2397 emitByte(0x0F); | |
2398 emitByte(0x40 | cc.value); | |
2399 emitOperandHelper(dst, src); | |
2400 } | |
2401 | |
2402 public final void cmpq(Address dst, int imm32) { | |
2403 prefixq(dst); | |
2404 emitByte(0x81); | |
2405 emitOperandHelper(rdi, dst); | |
2406 emitInt(imm32); | |
2407 } | |
2408 | |
2409 public final void cmpq(Register dst, int imm32) { | |
2410 prefixqAndEncode(dst.encoding); | |
2411 emitArith(0x81, 0xF8, dst, imm32); | |
2412 } | |
2413 | |
2414 public final void cmpq(Address dst, Register src) { | |
2415 prefixq(dst, src); | |
2416 emitByte(0x3B); | |
2417 emitOperandHelper(src, dst); | |
2418 } | |
2419 | |
2420 public final void cmpq(Register dst, Register src) { | |
2421 prefixqAndEncode(dst.encoding, src.encoding); | |
2422 emitArith(0x3B, 0xC0, dst, src); | |
2423 } | |
2424 | |
2425 public final void cmpq(Register dst, Address src) { | |
2426 prefixq(src, dst); | |
2427 emitByte(0x3B); | |
2428 emitOperandHelper(dst, src); | |
2429 } | |
2430 | |
2431 public final void cmpxchgq(Register reg, Address adr) { | |
2432 prefixq(adr, reg); | |
2433 emitByte(0x0F); | |
2434 emitByte(0xB1); | |
2435 emitOperandHelper(reg, adr); | |
2436 } | |
2437 | |
2438 public final void cvtsi2sdq(Register dst, Register src) { | |
2439 assert dst.isFpu(); | |
2440 emitByte(0xF2); | |
2441 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2442 emitByte(0x0F); | |
2443 emitByte(0x2A); | |
2444 emitByte(0xC0 | encode); | |
2445 } | |
2446 | |
2447 public final void cvtsi2ssq(Register dst, Register src) { | |
2448 assert dst.isFpu(); | |
2449 emitByte(0xF3); | |
2450 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2451 emitByte(0x0F); | |
2452 emitByte(0x2A); | |
2453 emitByte(0xC0 | encode); | |
2454 } | |
2455 | |
2456 public final void cvttsd2siq(Register dst, Register src) { | |
2457 assert src.isFpu(); | |
2458 emitByte(0xF2); | |
2459 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2460 emitByte(0x0F); | |
2461 emitByte(0x2C); | |
2462 emitByte(0xC0 | encode); | |
2463 } | |
2464 | |
2465 public final void cvttss2siq(Register dst, Register src) { | |
2466 assert src.isFpu(); | |
2467 emitByte(0xF3); | |
2468 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2469 emitByte(0x0F); | |
2470 emitByte(0x2C); | |
2471 emitByte(0xC0 | encode); | |
2472 } | |
2473 | |
2474 public final void decq(Register dst) { | |
2475 // Don't use it directly. Use Macrodecrementq() instead. | |
2476 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) | |
2477 int encode = prefixqAndEncode(dst.encoding); | |
2478 emitByte(0xFF); | |
2479 emitByte(0xC8 | encode); | |
2480 } | |
2481 | |
2482 public final void decq(Address dst) { | |
2483 // Don't use it directly. Use Macrodecrementq() instead. | |
2484 prefixq(dst); | |
2485 emitByte(0xFF); | |
2486 emitOperandHelper(rcx, dst); | |
2487 } | |
2488 | |
2489 public final void divq(Register src) { | |
2490 int encode = prefixqAndEncode(src.encoding); | |
2491 emitByte(0xF7); | |
2492 emitByte(0xF0 | encode); | |
2493 } | |
2494 | |
2495 public final void idivq(Register src) { | |
2496 int encode = prefixqAndEncode(src.encoding); | |
2497 emitByte(0xF7); | |
2498 emitByte(0xF8 | encode); | |
2499 } | |
2500 | |
2501 public final void imulq(Register dst, Register src) { | |
2502 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2503 emitByte(0x0F); | |
2504 emitByte(0xAF); | |
2505 emitByte(0xC0 | encode); | |
2506 } | |
2507 | |
2508 public final void imulq(Register dst, Register src, int value) { | |
2509 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2510 if (isByte(value)) { | |
2511 emitByte(0x6B); | |
2512 emitByte(0xC0 | encode); | |
2513 emitByte(value); | |
2514 } else { | |
2515 emitByte(0x69); | |
2516 emitByte(0xC0 | encode); | |
2517 emitInt(value); | |
2518 } | |
2519 } | |
2520 | |
2521 public final void incq(Register dst) { | |
2522 // Don't use it directly. Use Macroincrementq() instead. | |
2523 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) | |
2524 int encode = prefixqAndEncode(dst.encoding); | |
2525 emitByte(0xFF); | |
2526 emitByte(0xC0 | encode); | |
2527 } | |
2528 | |
2529 public final void incq(Address dst) { | |
2530 // Don't use it directly. Use Macroincrementq() instead. | |
2531 prefixq(dst); | |
2532 emitByte(0xFF); | |
2533 emitOperandHelper(rax, dst); | |
2534 } | |
2535 | |
2536 public final void movq(Register dst, long imm64) { | |
2537 int encode = prefixqAndEncode(dst.encoding); | |
2538 emitByte(0xB8 | encode); | |
2539 emitLong(imm64); | |
2540 } | |
2541 | |
2542 public final void movdq(Register dst, Register src) { | |
2543 | |
2544 // table D-1 says MMX/SSE2 | |
2545 emitByte(0x66); | |
2546 | |
2547 if (dst.isFpu()) { | |
2548 assert dst.isFpu(); | |
2549 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2550 emitByte(0x0F); | |
2551 emitByte(0x6E); | |
2552 emitByte(0xC0 | encode); | |
2553 } else if (src.isFpu()) { | |
2554 | |
2555 // swap src/dst to get correct prefix | |
2556 int encode = prefixqAndEncode(src.encoding, dst.encoding); | |
2557 emitByte(0x0F); | |
2558 emitByte(0x7E); | |
2559 emitByte(0xC0 | encode); | |
2560 } else { | |
2561 throw new InternalError("should not reach here"); | |
2562 } | |
2563 } | |
2564 | |
2565 public final void movsbq(Register dst, Address src) { | |
2566 prefixq(src, dst); | |
2567 emitByte(0x0F); | |
2568 emitByte(0xBE); | |
2569 emitOperandHelper(dst, src); | |
2570 } | |
2571 | |
2572 public final void movsbq(Register dst, Register src) { | |
2573 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2574 emitByte(0x0F); | |
2575 emitByte(0xBE); | |
2576 emitByte(0xC0 | encode); | |
2577 } | |
2578 | |
2579 public final void movslq(Register dst, int imm32) { | |
2580 int encode = prefixqAndEncode(dst.encoding); | |
2581 emitByte(0xC7 | encode); | |
2582 emitInt(imm32); | |
2583 // dbx shows movslq(X86.rcx, 3) as movq $0x0000000049000000,(%X86.rbx) | |
2584 // and movslq(X86.r8, 3); as movl $0x0000000048000000,(%X86.rbx) | |
2585 // as a result we shouldn't use until tested at runtime... | |
2586 throw new InternalError("untested"); | |
2587 } | |
2588 | |
2589 public final void movslq(Address dst, int imm32) { | |
2590 prefixq(dst); | |
2591 emitByte(0xC7); | |
2592 emitOperandHelper(rax, dst); | |
2593 emitInt(imm32); | |
2594 } | |
2595 | |
2596 public final void movslq(Register dst, Address src) { | |
2597 prefixq(src, dst); | |
2598 emitByte(0x63); | |
2599 emitOperandHelper(dst, src); | |
2600 } | |
2601 | |
2602 public final void movslq(Register dst, Register src) { | |
2603 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2604 emitByte(0x63); | |
2605 emitByte(0xC0 | encode); | |
2606 } | |
2607 | |
2608 public final void movswq(Register dst, Address src) { | |
2609 prefixq(src, dst); | |
2610 emitByte(0x0F); | |
2611 emitByte(0xBF); | |
2612 emitOperandHelper(dst, src); | |
2613 } | |
2614 | |
2615 public final void movswq(Register dst, Register src) { | |
2616 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2617 emitByte(0x0F); | |
2618 emitByte(0xBF); | |
2619 emitByte(0xC0 | encode); | |
2620 } | |
2621 | |
2622 public final void movzbq(Register dst, Address src) { | |
2623 prefixq(src, dst); | |
2624 emitByte(0x0F); | |
2625 emitByte(0xB6); | |
2626 emitOperandHelper(dst, src); | |
2627 } | |
2628 | |
2629 public final void movzbq(Register dst, Register src) { | |
2630 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2631 emitByte(0x0F); | |
2632 emitByte(0xB6); | |
2633 emitByte(0xC0 | encode); | |
2634 } | |
2635 | |
2636 public final void movzwq(Register dst, Address src) { | |
2637 prefixq(src, dst); | |
2638 emitByte(0x0F); | |
2639 emitByte(0xB7); | |
2640 emitOperandHelper(dst, src); | |
2641 } | |
2642 | |
2643 public final void movzwq(Register dst, Register src) { | |
2644 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2645 emitByte(0x0F); | |
2646 emitByte(0xB7); | |
2647 emitByte(0xC0 | encode); | |
2648 } | |
2649 | |
2650 public final void negq(Register dst) { | |
2651 int encode = prefixqAndEncode(dst.encoding); | |
2652 emitByte(0xF7); | |
2653 emitByte(0xD8 | encode); | |
2654 } | |
2655 | |
2656 public final void notq(Register dst) { | |
2657 int encode = prefixqAndEncode(dst.encoding); | |
2658 emitByte(0xF7); | |
2659 emitByte(0xD0 | encode); | |
2660 } | |
2661 | |
2662 public final void orq(Address dst, int imm32) { | |
2663 prefixq(dst); | |
2664 emitByte(0x81); | |
2665 emitOperandHelper(rcx, dst); | |
2666 emitInt(imm32); | |
2667 } | |
2668 | |
2669 public final void orq(Register dst, int imm32) { | |
2670 prefixqAndEncode(dst.encoding); | |
2671 emitArith(0x81, 0xC8, dst, imm32); | |
2672 } | |
2673 | |
2674 public final void orq(Register dst, Address src) { | |
2675 prefixq(src, dst); | |
2676 emitByte(0x0B); | |
2677 emitOperandHelper(dst, src); | |
2678 } | |
2679 | |
2680 public final void orq(Register dst, Register src) { | |
2681 prefixqAndEncode(dst.encoding, src.encoding); | |
2682 emitArith(0x0B, 0xC0, dst, src); | |
2683 } | |
2684 | |
2685 public final void popq(Address dst) { | |
2686 prefixq(dst); | |
2687 emitByte(0x8F); | |
2688 emitOperandHelper(rax, dst); | |
2689 } | |
2690 | |
2691 public final void pushq(Address src) { | |
2692 prefixq(src); | |
2693 emitByte(0xFF); | |
2694 emitOperandHelper(rsi, src); | |
2695 } | |
2696 | |
2697 public final void rclq(Register dst, int imm8) { | |
2698 assert isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2699 int encode = prefixqAndEncode(dst.encoding); | |
2700 if (imm8 == 1) { | |
2701 emitByte(0xD1); | |
2702 emitByte(0xD0 | encode); | |
2703 } else { | |
2704 emitByte(0xC1); | |
2705 emitByte(0xD0 | encode); | |
2706 emitByte(imm8); | |
2707 } | |
2708 } | |
2709 | |
2710 public final void sarq(Register dst, int imm8) { | |
2711 assert isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2712 int encode = prefixqAndEncode(dst.encoding); | |
2713 if (imm8 == 1) { | |
2714 emitByte(0xD1); | |
2715 emitByte(0xF8 | encode); | |
2716 } else { | |
2717 emitByte(0xC1); | |
2718 emitByte(0xF8 | encode); | |
2719 emitByte(imm8); | |
2720 } | |
2721 } | |
2722 | |
2723 public final void sarq(Register dst) { | |
2724 int encode = prefixqAndEncode(dst.encoding); | |
2725 emitByte(0xD3); | |
2726 emitByte(0xF8 | encode); | |
2727 } | |
2728 | |
2729 public final void shlq(Register dst, int imm8) { | |
2730 assert isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2731 int encode = prefixqAndEncode(dst.encoding); | |
2732 if (imm8 == 1) { | |
2733 emitByte(0xD1); | |
2734 emitByte(0xE0 | encode); | |
2735 } else { | |
2736 emitByte(0xC1); | |
2737 emitByte(0xE0 | encode); | |
2738 emitByte(imm8); | |
2739 } | |
2740 } | |
2741 | |
2742 public final void shlq(Register dst) { | |
2743 int encode = prefixqAndEncode(dst.encoding); | |
2744 emitByte(0xD3); | |
2745 emitByte(0xE0 | encode); | |
2746 } | |
2747 | |
2748 public final void shrq(Register dst, int imm8) { | |
2749 assert isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2750 int encode = prefixqAndEncode(dst.encoding); | |
2751 emitByte(0xC1); | |
2752 emitByte(0xE8 | encode); | |
2753 emitByte(imm8); | |
2754 } | |
2755 | |
2756 public final void shrq(Register dst) { | |
2757 int encode = prefixqAndEncode(dst.encoding); | |
2758 emitByte(0xD3); | |
2759 emitByte(0xE8 | encode); | |
2760 } | |
2761 | |
2762 public final void sqrtsd(Register dst, Address src) { | |
2763 assert dst.isFpu(); | |
2764 | |
2765 emitByte(0xF2); | |
2766 prefix(src, dst); | |
2767 emitByte(0x0F); | |
2768 emitByte(0x51); | |
2769 emitOperandHelper(dst, src); | |
2770 } | |
2771 | |
2772 public final void subq(Address dst, int imm32) { | |
2773 prefixq(dst); | |
2774 if (isByte(imm32)) { | |
2775 emitByte(0x83); | |
2776 emitOperandHelper(rbp, dst); | |
2777 emitByte(imm32 & 0xFF); | |
2778 } else { | |
2779 emitByte(0x81); | |
2780 emitOperandHelper(rbp, dst); | |
2781 emitInt(imm32); | |
2782 } | |
2783 } | |
2784 | |
2785 public final void subq(Register dst, int imm32) { | |
2786 prefixqAndEncode(dst.encoding); | |
2787 emitArith(0x81, 0xE8, dst, imm32); | |
2788 } | |
2789 | |
2790 public final void subq(Address dst, Register src) { | |
2791 prefixq(dst, src); | |
2792 emitByte(0x29); | |
2793 emitOperandHelper(src, dst); | |
2794 } | |
2795 | |
2796 public final void subq(Register dst, Address src) { | |
2797 prefixq(src, dst); | |
2798 emitByte(0x2B); | |
2799 emitOperandHelper(dst, src); | |
2800 } | |
2801 | |
2802 public final void subq(Register dst, Register src) { | |
2803 prefixqAndEncode(dst.encoding, src.encoding); | |
2804 emitArith(0x2B, 0xC0, dst, src); | |
2805 } | |
2806 | |
2807 public final void testq(Register dst, int imm32) { | |
2808 // not using emitArith because test | |
2809 // doesn't support sign-extension of | |
2810 // 8bit operands | |
2811 int encode = dst.encoding; | |
2812 if (encode == 0) { | |
2813 emitByte(Prefix.REXW); | |
2814 emitByte(0xA9); | |
2815 } else { | |
2816 encode = prefixqAndEncode(encode); | |
2817 emitByte(0xF7); | |
2818 emitByte(0xC0 | encode); | |
2819 } | |
2820 emitInt(imm32); | |
2821 } | |
2822 | |
2823 public final void testq(Register dst, Register src) { | |
2824 prefixqAndEncode(dst.encoding, src.encoding); | |
2825 emitArith(0x85, 0xC0, dst, src); | |
2826 } | |
2827 | |
2828 public final void xaddq(Address dst, Register src) { | |
2829 prefixq(dst, src); | |
2830 emitByte(0x0F); | |
2831 emitByte(0xC1); | |
2832 emitOperandHelper(src, dst); | |
2833 } | |
2834 | |
2835 public final void xchgq(Register dst, Address src) { | |
2836 prefixq(src, dst); | |
2837 emitByte(0x87); | |
2838 emitOperandHelper(dst, src); | |
2839 } | |
2840 | |
2841 public final void xchgq(Register dst, Register src) { | |
2842 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2843 emitByte(0x87); | |
2844 emitByte(0xc0 | encode); | |
2845 } | |
2846 | |
2847 public final void xorq(Register dst, int imm32) { | |
2848 prefixqAndEncode(dst.encoding); | |
2849 emitArith(0x81, 0xF0, dst, imm32); | |
2850 } | |
2851 | |
2852 public final void xorq(Register dst, Register src) { | |
2853 prefixqAndEncode(dst.encoding, src.encoding); | |
2854 emitArith(0x33, 0xC0, dst, src); | |
2855 } | |
2856 | |
2857 public final void xorq(Register dst, Address src) { | |
2858 | |
2859 prefixq(src, dst); | |
2860 emitByte(0x33); | |
2861 emitOperandHelper(dst, src); | |
2862 | |
2863 } | |
2864 | |
2865 public final void membar(int barriers) { | |
2866 if (target.isMP) { | |
2867 // We only have to handle StoreLoad | |
2868 if ((barriers & STORE_LOAD) != 0) { | |
2869 // All usable chips support "locked" instructions which suffice | |
2870 // as barriers, and are much faster than the alternative of | |
2871 // using cpuid instruction. We use here a locked add [rsp],0. | |
2872 // This is conveniently otherwise a no-op except for blowing | |
2873 // flags. | |
2874 // Any change to this code may need to revisit other places in | |
2875 // the code where this idiom is used, in particular the | |
2876 // orderAccess code. | |
2877 lock(); | |
2878 addl(new Address(Word, RSP, 0), 0); // Assert the lock# signal here | |
2879 } | |
2880 } | |
2881 } | |
2882 | |
2883 @Override | |
2884 protected final void patchJumpTarget(int branch, int branchTarget) { | |
2885 int op = codeBuffer.getByte(branch); | |
2886 assert op == 0xE8 // call | |
2887 || op == 0x00 // jump table entry | |
2888 || op == 0xE9 // jmp | |
2889 || op == 0xEB // short jmp | |
2890 || (op & 0xF0) == 0x70 // short jcc | |
2891 || op == 0x0F && (codeBuffer.getByte(branch + 1) & 0xF0) == 0x80 // jcc | |
2892 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; | |
2893 | |
2894 if (op == 0x00) { | |
2895 int offsetToJumpTableBase = codeBuffer.getShort(branch + 1); | |
2896 int jumpTableBase = branch - offsetToJumpTableBase; | |
2897 int imm32 = branchTarget - jumpTableBase; | |
2898 codeBuffer.emitInt(imm32, branch); | |
2899 } else if (op == 0xEB || (op & 0xF0) == 0x70) { | |
2900 | |
2901 // short offset operators (jmp and jcc) | |
2902 int imm8 = branchTarget - (branch + 2); | |
2903 codeBuffer.emitByte(imm8, branch + 1); | |
2904 | |
2905 } else { | |
2906 | |
2907 int off = 1; | |
2908 if (op == 0x0F) { | |
2909 off = 2; | |
2910 } | |
2911 | |
2912 int imm32 = branchTarget - (branch + 4 + off); | |
2913 codeBuffer.emitInt(imm32, branch + off); | |
2914 } | |
2915 } | |
2916 | |
2917 public void nullCheck(Register r) { | |
2918 testl(AMD64.rax, new Address(Word, r.asValue(Word), 0)); | |
2919 } | |
2920 | |
2921 @Override | |
2922 public void align(int modulus) { | |
2923 if (codeBuffer.position() % modulus != 0) { | |
2924 nop(modulus - (codeBuffer.position() % modulus)); | |
2925 } | |
2926 } | |
2927 | |
2928 public void pushfq() { | |
2929 emitByte(0x9c); | |
2930 } | |
2931 | |
2932 public void popfq() { | |
2933 emitByte(0x9D); | |
2934 } | |
2935 | |
2936 /** | |
2937 * Makes sure that a subsequent {@linkplain #call} does not fail the alignment check. | |
2938 */ | |
2939 public final void alignForPatchableDirectCall() { | |
2940 int dispStart = codeBuffer.position() + 1; | |
2941 int mask = target.wordSize - 1; | |
2942 if ((dispStart & ~mask) != ((dispStart + 3) & ~mask)) { | |
2943 nop(target.wordSize - (dispStart & mask)); | |
2944 assert ((codeBuffer.position() + 1) & mask) == 0; | |
2945 } | |
2946 } | |
2947 | |
2948 /** | |
2949 * Emits a direct call instruction. Note that the actual call target is not specified, because all calls | |
2950 * need patching anyway. Therefore, 0 is emitted as the call target, and the user is responsible | |
2951 * to add the call address to the appropriate patching tables. | |
2952 */ | |
2953 public final void call() { | |
2954 emitByte(0xE8); | |
2955 emitInt(0); | |
2956 } | |
2957 | |
2958 public final void call(Register src) { | |
2959 int encode = prefixAndEncode(src.encoding); | |
2960 emitByte(0xFF); | |
2961 emitByte(0xD0 | encode); | |
2962 } | |
2963 | |
2964 public void int3() { | |
2965 emitByte(0xCC); | |
2966 } | |
2967 | |
2968 public void enter(short imm16, byte imm8) { | |
2969 emitByte(0xC8); | |
2970 // appended: | |
2971 emitByte(imm16 & 0xff); | |
2972 emitByte((imm16 >> 8) & 0xff); | |
2973 emitByte(imm8); | |
2974 } | |
2975 | |
2976 private void emitx87(int b1, int b2, int i) { | |
2977 assert 0 <= i && i < 8 : "illegal stack offset"; | |
2978 emitByte(b1); | |
2979 emitByte(b2 + i); | |
2980 } | |
2981 | |
2982 public void fld(Address src) { | |
2983 emitByte(0xDD); | |
2984 emitOperandHelper(rax, src); | |
2985 } | |
2986 | |
2987 public void fld(int i) { | |
2988 emitx87(0xD9, 0xC0, i); | |
2989 } | |
2990 | |
2991 public void fldln2() { | |
2992 emitByte(0xD9); | |
2993 emitByte(0xED); | |
2994 } | |
2995 | |
2996 public void fldlg2() { | |
2997 emitByte(0xD9); | |
2998 emitByte(0xEC); | |
2999 } | |
3000 | |
3001 public void fyl2x() { | |
3002 emitByte(0xD9); | |
3003 emitByte(0xF1); | |
3004 } | |
3005 | |
3006 public void fstp(Address src) { | |
3007 emitByte(0xDD); | |
3008 emitOperandHelper(rbx, src); | |
3009 } | |
3010 | |
3011 public void fsin() { | |
3012 emitByte(0xD9); | |
3013 emitByte(0xFE); | |
3014 } | |
3015 | |
3016 public void fcos() { | |
3017 emitByte(0xD9); | |
3018 emitByte(0xFF); | |
3019 } | |
3020 | |
3021 public void fptan() { | |
3022 emitByte(0xD9); | |
3023 emitByte(0xF2); | |
3024 } | |
3025 | |
3026 public void fstp(int i) { | |
3027 emitx87(0xDD, 0xD8, i); | |
3028 } | |
3029 | |
3030 @Override | |
3031 public void bangStack(int disp) { | |
3032 movq(new Address(target.wordKind, AMD64.RSP, -disp), AMD64.rax); | |
3033 } | |
3034 } |