Mercurial > hg > graal-jvmci-8
comparison graal/Compiler/src/com/sun/c1x/target/amd64/AMD64Assembler.java @ 2507:9ec15d6914ca
Pull over of compiler from maxine repository.
author | Thomas Wuerthinger <thomas@wuerthinger.net> |
---|---|
date | Wed, 27 Apr 2011 11:43:22 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2506:4a3bf8a5bf41 | 2507:9ec15d6914ca |
---|---|
1 /* | |
2 * Copyright (c) 2009, 2011, Oracle and/or its affiliates. All rights reserved. | |
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
4 * | |
5 * This code is free software; you can redistribute it and/or modify it | |
6 * under the terms of the GNU General Public License version 2 only, as | |
7 * published by the Free Software Foundation. | |
8 * | |
9 * This code is distributed in the hope that it will be useful, but WITHOUT | |
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
12 * version 2 for more details (a copy is included in the LICENSE file that | |
13 * accompanied this code). | |
14 * | |
15 * You should have received a copy of the GNU General Public License version | |
16 * 2 along with this work; if not, write to the Free Software Foundation, | |
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | |
18 * | |
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | |
20 * or visit www.oracle.com if you need additional information or have any | |
21 * questions. | |
22 */ | |
23 package com.sun.c1x.target.amd64; | |
24 | |
25 import static com.sun.c1x.target.amd64.AMD64.*; | |
26 import static com.sun.cri.bytecode.Bytecodes.MemoryBarriers.*; | |
27 import static com.sun.cri.ci.CiKind.*; | |
28 | |
29 import com.sun.c1x.*; | |
30 import com.sun.c1x.asm.*; | |
31 import com.sun.c1x.lir.*; | |
32 import com.sun.c1x.util.*; | |
33 import com.sun.cri.ci.*; | |
34 import com.sun.cri.ri.*; | |
35 | |
36 /** | |
37 * This class implements an assembler that can encode most X86 instructions. | |
38 * | |
39 * @author Thomas Wuerthinger | |
40 */ | |
41 public class AMD64Assembler extends AbstractAssembler { | |
42 | |
43 private static final int MinEncodingNeedsRex = 8; | |
44 | |
45 /** | |
46 * The x86 condition codes used for conditional jumps/moves. | |
47 */ | |
48 public enum ConditionFlag { | |
49 zero(0x4), | |
50 notZero(0x5), | |
51 equal(0x4), | |
52 notEqual(0x5), | |
53 less(0xc), | |
54 lessEqual(0xe), | |
55 greater(0xf), | |
56 greaterEqual(0xd), | |
57 below(0x2), | |
58 belowEqual(0x6), | |
59 above(0x7), | |
60 aboveEqual(0x3), | |
61 overflow(0x0), | |
62 noOverflow(0x1), | |
63 carrySet(0x2), | |
64 carryClear(0x3), | |
65 negative(0x8), | |
66 positive(0x9), | |
67 parity(0xa), | |
68 noParity(0xb); | |
69 | |
70 public final int value; | |
71 | |
72 private ConditionFlag(int value) { | |
73 this.value = value; | |
74 } | |
75 | |
76 public static final ConditionFlag[] values = values(); | |
77 } | |
78 | |
79 /** | |
80 * Constants for X86 prefix bytes. | |
81 */ | |
82 private class Prefix { | |
83 private static final int REX = 0x40; | |
84 private static final int REXB = 0x41; | |
85 private static final int REXX = 0x42; | |
86 private static final int REXXB = 0x43; | |
87 private static final int REXR = 0x44; | |
88 private static final int REXRB = 0x45; | |
89 private static final int REXRX = 0x46; | |
90 private static final int REXRXB = 0x47; | |
91 private static final int REXW = 0x48; | |
92 private static final int REXWB = 0x49; | |
93 private static final int REXWX = 0x4A; | |
94 private static final int REXWXB = 0x4B; | |
95 private static final int REXWR = 0x4C; | |
96 private static final int REXWRB = 0x4D; | |
97 private static final int REXWRX = 0x4E; | |
98 private static final int REXWRXB = 0x4F; | |
99 } | |
100 | |
101 /** | |
102 * The register to which {@link CiRegister#Frame} and {@link CiRegister#CallerFrame} are bound. | |
103 */ | |
104 public final CiRegister frameRegister; | |
105 | |
106 /** | |
107 * Constructs an assembler for the AMD64 architecture. | |
108 * | |
109 * @param registerConfig the register configuration used to bind {@link CiRegister#Frame} and | |
110 * {@link CiRegister#CallerFrame} to physical registers. This value can be null if this assembler | |
111 * instance will not be used to assemble instructions using these logical registers. | |
112 */ | |
113 public AMD64Assembler(CiTarget target, RiRegisterConfig registerConfig) { | |
114 super(target); | |
115 this.frameRegister = registerConfig == null ? null : registerConfig.getFrameRegister(); | |
116 } | |
117 | |
118 private static int encode(CiRegister r) { | |
119 assert r.encoding < 16 && r.encoding >= 0 : "encoding out of range: " + r.encoding; | |
120 return r.encoding & 0x7; | |
121 } | |
122 | |
123 private void emitArithB(int op1, int op2, CiRegister dst, int imm8) { | |
124 assert dst.isByte() : "must have byte register"; | |
125 assert Util.isUByte(op1) && Util.isUByte(op2) : "wrong opcode"; | |
126 assert Util.isUByte(imm8) : "not a byte"; | |
127 assert (op1 & 0x01) == 0 : "should be 8bit operation"; | |
128 emitByte(op1); | |
129 emitByte(op2 | encode(dst)); | |
130 emitByte(imm8); | |
131 } | |
132 | |
133 private void emitArith(int op1, int op2, CiRegister dst, int imm32) { | |
134 assert Util.isUByte(op1) && Util.isUByte(op2) : "wrong opcode"; | |
135 assert (op1 & 0x01) == 1 : "should be 32bit operation"; | |
136 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set"; | |
137 if (Util.isByte(imm32)) { | |
138 emitByte(op1 | 0x02); // set sign bit | |
139 emitByte(op2 | encode(dst)); | |
140 emitByte(imm32 & 0xFF); | |
141 } else { | |
142 emitByte(op1); | |
143 emitByte(op2 | encode(dst)); | |
144 emitInt(imm32); | |
145 } | |
146 } | |
147 | |
148 // immediate-to-memory forms | |
149 private void emitArithOperand(int op1, CiRegister rm, CiAddress adr, int imm32) { | |
150 assert (op1 & 0x01) == 1 : "should be 32bit operation"; | |
151 assert (op1 & 0x02) == 0 : "sign-extension bit should not be set"; | |
152 if (Util.isByte(imm32)) { | |
153 emitByte(op1 | 0x02); // set sign bit | |
154 emitOperandHelper(rm, adr); | |
155 emitByte(imm32 & 0xFF); | |
156 } else { | |
157 emitByte(op1); | |
158 emitOperandHelper(rm, adr); | |
159 emitInt(imm32); | |
160 } | |
161 } | |
162 | |
163 private void emitArith(int op1, int op2, CiRegister dst, CiRegister src) { | |
164 assert Util.isUByte(op1) && Util.isUByte(op2) : "wrong opcode"; | |
165 emitByte(op1); | |
166 emitByte(op2 | encode(dst) << 3 | encode(src)); | |
167 } | |
168 | |
169 private void emitOperandHelper(CiRegister reg, CiAddress addr) { | |
170 CiRegister base = addr.base(); | |
171 | |
172 CiRegister index = addr.index(); | |
173 CiAddress.Scale scale = addr.scale; | |
174 int disp = addr.displacement; | |
175 | |
176 if (base == CiRegister.Frame) { | |
177 assert frameRegister != null : "cannot use register " + CiRegister.Frame + " in assembler with null register configuration"; | |
178 base = frameRegister; | |
179 } else if (base == CiRegister.CallerFrame) { | |
180 assert frameRegister != null : "cannot use register " + CiRegister.Frame + " in assembler with null register configuration"; | |
181 base = frameRegister; | |
182 disp += targetMethod.frameSize() + 8; | |
183 } | |
184 | |
185 // Encode the registers as needed in the fields they are used in | |
186 | |
187 assert reg != CiRegister.None; | |
188 | |
189 int regenc = encode(reg) << 3; | |
190 int indexenc = index.isValid() ? encode(index) << 3 : 0; | |
191 int baseenc = base.isValid() ? encode(base) : 0; | |
192 | |
193 if (base.isValid()) { | |
194 if (index.isValid()) { | |
195 // [base + indexscale + disp] | |
196 if (disp == 0 && base != rbp && (base != r13)) { | |
197 // [base + indexscale] | |
198 // [00 reg 100][ss index base] | |
199 assert index != rsp : "illegal addressing mode"; | |
200 emitByte(0x04 | regenc); | |
201 emitByte(scale.log2 << 6 | indexenc | baseenc); | |
202 } else if (Util.isByte(disp)) { | |
203 // [base + indexscale + imm8] | |
204 // [01 reg 100][ss index base] imm8 | |
205 assert index != rsp : "illegal addressing mode"; | |
206 emitByte(0x44 | regenc); | |
207 emitByte(scale.log2 << 6 | indexenc | baseenc); | |
208 emitByte(disp & 0xFF); | |
209 } else { | |
210 // [base + indexscale + disp32] | |
211 // [10 reg 100][ss index base] disp32 | |
212 assert index != rsp : "illegal addressing mode"; | |
213 emitByte(0x84 | regenc); | |
214 emitByte(scale.log2 << 6 | indexenc | baseenc); | |
215 emitInt(disp); | |
216 } | |
217 } else if (base == rsp || (base == r12)) { | |
218 // [rsp + disp] | |
219 if (disp == 0) { | |
220 // [rsp] | |
221 // [00 reg 100][00 100 100] | |
222 emitByte(0x04 | regenc); | |
223 emitByte(0x24); | |
224 } else if (Util.isByte(disp)) { | |
225 // [rsp + imm8] | |
226 // [01 reg 100][00 100 100] disp8 | |
227 emitByte(0x44 | regenc); | |
228 emitByte(0x24); | |
229 emitByte(disp & 0xFF); | |
230 } else { | |
231 // [rsp + imm32] | |
232 // [10 reg 100][00 100 100] disp32 | |
233 emitByte(0x84 | regenc); | |
234 emitByte(0x24); | |
235 emitInt(disp); | |
236 } | |
237 } else { | |
238 // [base + disp] | |
239 assert base != rsp && (base != r12) : "illegal addressing mode"; | |
240 if (disp == 0 && base != rbp && (base != r13)) { | |
241 // [base] | |
242 // [00 reg base] | |
243 emitByte(0x00 | regenc | baseenc); | |
244 } else if (Util.isByte(disp)) { | |
245 // [base + disp8] | |
246 // [01 reg base] disp8 | |
247 emitByte(0x40 | regenc | baseenc); | |
248 emitByte(disp & 0xFF); | |
249 } else { | |
250 // [base + disp32] | |
251 // [10 reg base] disp32 | |
252 emitByte(0x80 | regenc | baseenc); | |
253 emitInt(disp); | |
254 } | |
255 } | |
256 } else { | |
257 if (index.isValid()) { | |
258 // [indexscale + disp] | |
259 // [00 reg 100][ss index 101] disp32 | |
260 assert index != rsp : "illegal addressing mode"; | |
261 emitByte(0x04 | regenc); | |
262 emitByte(scale.log2 << 6 | indexenc | 0x05); | |
263 emitInt(disp); | |
264 } else if (base == CiRegister.InstructionRelative) { | |
265 // Adjust disp which is currently relative to the start of the instruction | |
266 int instrStart = codeBuffer.mark(); | |
267 assert instrStart >= 0; | |
268 int instrSize = (codeBuffer.position() - instrStart) + 5; | |
269 disp = disp - instrSize; | |
270 // [00 000 101] disp32 | |
271 emitByte(0x05 | regenc); | |
272 emitInt(disp); | |
273 } else if (addr == CiAddress.Placeholder) { | |
274 // [00 000 101] disp32 | |
275 emitByte(0x05 | regenc); | |
276 emitInt(0); | |
277 } else { | |
278 // [disp] ABSOLUTE | |
279 // [00 reg 100][00 100 101] disp32 | |
280 emitByte(0x04 | regenc); | |
281 emitByte(0x25); | |
282 emitInt(disp); | |
283 } | |
284 } | |
285 } | |
286 | |
287 public final void addl(CiAddress dst, int imm32) { | |
288 prefix(dst); | |
289 emitArithOperand(0x81, rax, dst, imm32); | |
290 } | |
291 | |
292 public final void addl(CiAddress dst, CiRegister src) { | |
293 prefix(dst, src); | |
294 emitByte(0x01); | |
295 emitOperandHelper(src, dst); | |
296 } | |
297 | |
298 public final void addl(CiRegister dst, int imm32) { | |
299 prefix(dst); | |
300 emitArith(0x81, 0xC0, dst, imm32); | |
301 } | |
302 | |
303 public final void addl(CiRegister dst, CiAddress src) { | |
304 prefix(src, dst); | |
305 emitByte(0x03); | |
306 emitOperandHelper(dst, src); | |
307 } | |
308 | |
309 public final void addl(CiRegister dst, CiRegister src) { | |
310 prefixAndEncode(dst.encoding, src.encoding); | |
311 emitArith(0x03, 0xC0, dst, src); | |
312 } | |
313 | |
314 private void addrNop4() { | |
315 // 4 bytes: NOP DWORD PTR [EAX+0] | |
316 emitByte(0x0F); | |
317 emitByte(0x1F); | |
318 emitByte(0x40); // emitRm(cbuf, 0x1, EAXEnc, EAXEnc); | |
319 emitByte(0); // 8-bits offset (1 byte) | |
320 } | |
321 | |
322 private void addrNop5() { | |
323 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset | |
324 emitByte(0x0F); | |
325 emitByte(0x1F); | |
326 emitByte(0x44); // emitRm(cbuf, 0x1, EAXEnc, 0x4); | |
327 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); | |
328 emitByte(0); // 8-bits offset (1 byte) | |
329 } | |
330 | |
331 private void addrNop7() { | |
332 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset | |
333 emitByte(0x0F); | |
334 emitByte(0x1F); | |
335 emitByte(0x80); // emitRm(cbuf, 0x2, EAXEnc, EAXEnc); | |
336 emitInt(0); // 32-bits offset (4 bytes) | |
337 } | |
338 | |
339 private void addrNop8() { | |
340 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset | |
341 emitByte(0x0F); | |
342 emitByte(0x1F); | |
343 emitByte(0x84); // emitRm(cbuf, 0x2, EAXEnc, 0x4); | |
344 emitByte(0x00); // emitRm(cbuf, 0x0, EAXEnc, EAXEnc); | |
345 emitInt(0); // 32-bits offset (4 bytes) | |
346 } | |
347 | |
348 public final void addsd(CiRegister dst, CiRegister src) { | |
349 assert dst.isFpu() && src.isFpu(); | |
350 emitByte(0xF2); | |
351 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
352 emitByte(0x0F); | |
353 emitByte(0x58); | |
354 emitByte(0xC0 | encode); | |
355 } | |
356 | |
357 public final void addsd(CiRegister dst, CiAddress src) { | |
358 assert dst.isFpu(); | |
359 emitByte(0xF2); | |
360 prefix(src, dst); | |
361 emitByte(0x0F); | |
362 emitByte(0x58); | |
363 emitOperandHelper(dst, src); | |
364 } | |
365 | |
366 public final void addss(CiRegister dst, CiRegister src) { | |
367 assert dst.isFpu() && src.isFpu(); | |
368 emitByte(0xF3); | |
369 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
370 emitByte(0x0F); | |
371 emitByte(0x58); | |
372 emitByte(0xC0 | encode); | |
373 } | |
374 | |
375 public final void addss(CiRegister dst, CiAddress src) { | |
376 assert dst.isFpu(); | |
377 emitByte(0xF3); | |
378 prefix(src, dst); | |
379 emitByte(0x0F); | |
380 emitByte(0x58); | |
381 emitOperandHelper(dst, src); | |
382 } | |
383 | |
384 public final void andl(CiRegister dst, int imm32) { | |
385 prefix(dst); | |
386 emitArith(0x81, 0xE0, dst, imm32); | |
387 } | |
388 | |
389 public final void andl(CiRegister dst, CiAddress src) { | |
390 prefix(src, dst); | |
391 emitByte(0x23); | |
392 emitOperandHelper(dst, src); | |
393 } | |
394 | |
395 public final void andl(CiRegister dst, CiRegister src) { | |
396 prefixAndEncode(dst.encoding, src.encoding); | |
397 emitArith(0x23, 0xC0, dst, src); | |
398 } | |
399 | |
400 public final void andpd(CiRegister dst, CiAddress src) { | |
401 assert dst.isFpu(); | |
402 emitByte(0x66); | |
403 prefix(src, dst); | |
404 emitByte(0x0F); | |
405 emitByte(0x54); | |
406 emitOperandHelper(dst, src); | |
407 } | |
408 | |
409 public final void bsfq(CiRegister dst, CiRegister src) { | |
410 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
411 emitByte(0x0F); | |
412 emitByte(0xBC); | |
413 emitByte(0xC0 | encode); | |
414 } | |
415 | |
416 public final void bsfq(CiRegister dst, CiAddress src) { | |
417 prefixq(src, dst); | |
418 emitByte(0xBC); | |
419 emitOperandHelper(dst, src); | |
420 } | |
421 | |
422 public final void bsrq(CiRegister dst, CiRegister src) { | |
423 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
424 emitByte(0x0F); | |
425 emitByte(0xBD); | |
426 emitByte(0xC0 | encode); | |
427 } | |
428 | |
429 | |
430 public final void bsrq(CiRegister dst, CiAddress src) { | |
431 prefixq(src, dst); | |
432 emitByte(0xBD); | |
433 emitOperandHelper(dst, src); | |
434 } | |
435 | |
436 public final void bswapl(CiRegister reg) { // bswap | |
437 int encode = prefixAndEncode(reg.encoding); | |
438 emitByte(0x0F); | |
439 emitByte(0xC8 | encode); | |
440 } | |
441 | |
442 public final void btli(CiAddress src, int imm8) { | |
443 prefixq(src); | |
444 emitByte(0x0F); | |
445 emitByte(0xBA); | |
446 emitOperandHelper(rsp, src); | |
447 emitByte(imm8); | |
448 } | |
449 | |
450 public final void nativeCall(CiRegister dst, String symbol, LIRDebugInfo info) { | |
451 int before = codeBuffer.position(); | |
452 int encode = prefixAndEncode(dst.encoding); | |
453 emitByte(0xFF); | |
454 emitByte(0xD0 | encode); | |
455 int after = codeBuffer.position(); | |
456 recordIndirectCall(before, after, symbol, info); | |
457 recordExceptionHandlers(after, info); | |
458 } | |
459 | |
460 public final int directCall(Object target, LIRDebugInfo info) { | |
461 int before = codeBuffer.position(); | |
462 emitByte(0xE8); | |
463 emitInt(0); | |
464 int after = codeBuffer.position(); | |
465 recordDirectCall(before, after, target, info); | |
466 recordExceptionHandlers(after, info); | |
467 return before; | |
468 } | |
469 | |
470 public final int directJmp(Object target) { | |
471 int before = codeBuffer.position(); | |
472 emitByte(0xE9); | |
473 emitInt(0); | |
474 int after = codeBuffer.position(); | |
475 recordDirectCall(before, after, target, null); | |
476 return before; | |
477 } | |
478 | |
479 public final int indirectCall(CiRegister dst, Object target, LIRDebugInfo info) { | |
480 int before = codeBuffer.position(); | |
481 int encode = prefixAndEncode(dst.encoding); | |
482 | |
483 emitByte(0xFF); | |
484 emitByte(0xD0 | encode); | |
485 int after = codeBuffer.position(); | |
486 recordIndirectCall(before, after, target, info); | |
487 recordExceptionHandlers(after, info); | |
488 return before; | |
489 } | |
490 | |
491 public final void cdql() { | |
492 emitByte(0x99); | |
493 } | |
494 | |
495 public final void cmovl(ConditionFlag cc, CiRegister dst, CiRegister src) { | |
496 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
497 emitByte(0x0F); | |
498 emitByte(0x40 | cc.value); | |
499 emitByte(0xC0 | encode); | |
500 } | |
501 | |
502 public final void cmovl(ConditionFlag cc, CiRegister dst, CiAddress src) { | |
503 prefix(src, dst); | |
504 emitByte(0x0F); | |
505 emitByte(0x40 | cc.value); | |
506 emitOperandHelper(dst, src); | |
507 } | |
508 | |
509 public final void cmpb(CiAddress dst, int imm8) { | |
510 prefix(dst); | |
511 emitByte(0x80); | |
512 emitOperandHelper(rdi, dst); | |
513 emitByte(imm8); | |
514 } | |
515 | |
516 public final void cmpl(CiAddress dst, int imm32) { | |
517 prefix(dst); | |
518 emitByte(0x81); | |
519 emitOperandHelper(rdi, dst); | |
520 emitInt(imm32); | |
521 } | |
522 | |
523 public final void cmpl(CiRegister dst, int imm32) { | |
524 prefix(dst); | |
525 emitArith(0x81, 0xF8, dst, imm32); | |
526 } | |
527 | |
528 public final void cmpl(CiRegister dst, CiRegister src) { | |
529 prefixAndEncode(dst.encoding, src.encoding); | |
530 emitArith(0x3B, 0xC0, dst, src); | |
531 } | |
532 | |
533 public final void cmpl(CiRegister dst, CiAddress src) { | |
534 prefix(src, dst); | |
535 emitByte(0x3B); | |
536 emitOperandHelper(dst, src); | |
537 } | |
538 | |
539 // The 32-bit cmpxchg compares the value at adr with the contents of X86.rax, | |
540 // and stores reg into adr if so; otherwise, the value at adr is loaded into X86.rax,. | |
541 // The ZF is set if the compared values were equal, and cleared otherwise. | |
542 public final void cmpxchgl(CiRegister reg, CiAddress adr) { // cmpxchg | |
543 if ((C1XOptions.Atomics & 2) != 0) { | |
544 // caveat: no instructionmark, so this isn't relocatable. | |
545 // Emit a synthetic, non-atomic, CAS equivalent. | |
546 // Beware. The synthetic form sets all ICCs, not just ZF. | |
547 // cmpxchg r,[m] is equivalent to X86.rax, = CAS (m, X86.rax, r) | |
548 cmpl(rax, adr); | |
549 movl(rax, adr); | |
550 if (reg != rax) { | |
551 Label l = new Label(); | |
552 jcc(ConditionFlag.notEqual, l); | |
553 movl(adr, reg); | |
554 bind(l); | |
555 } | |
556 } else { | |
557 | |
558 prefix(adr, reg); | |
559 emitByte(0x0F); | |
560 emitByte(0xB1); | |
561 emitOperandHelper(reg, adr); | |
562 } | |
563 } | |
564 | |
565 public final void comisd(CiRegister dst, CiAddress src) { | |
566 assert dst.isFpu(); | |
567 // NOTE: dbx seems to decode this as comiss even though the | |
568 // 0x66 is there. Strangly ucomisd comes out correct | |
569 emitByte(0x66); | |
570 comiss(dst, src); | |
571 } | |
572 | |
573 public final void comiss(CiRegister dst, CiAddress src) { | |
574 assert dst.isFpu(); | |
575 | |
576 prefix(src, dst); | |
577 emitByte(0x0F); | |
578 emitByte(0x2F); | |
579 emitOperandHelper(dst, src); | |
580 } | |
581 | |
582 public final void cvtdq2pd(CiRegister dst, CiRegister src) { | |
583 assert dst.isFpu(); | |
584 assert src.isFpu(); | |
585 | |
586 emitByte(0xF3); | |
587 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
588 emitByte(0x0F); | |
589 emitByte(0xE6); | |
590 emitByte(0xC0 | encode); | |
591 } | |
592 | |
593 public final void cvtdq2ps(CiRegister dst, CiRegister src) { | |
594 assert dst.isFpu(); | |
595 assert src.isFpu(); | |
596 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
597 emitByte(0x0F); | |
598 emitByte(0x5B); | |
599 emitByte(0xC0 | encode); | |
600 } | |
601 | |
602 public final void cvtsd2ss(CiRegister dst, CiRegister src) { | |
603 assert dst.isFpu(); | |
604 assert src.isFpu(); | |
605 emitByte(0xF2); | |
606 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
607 emitByte(0x0F); | |
608 emitByte(0x5A); | |
609 emitByte(0xC0 | encode); | |
610 } | |
611 | |
612 public final void cvtsi2sdl(CiRegister dst, CiRegister src) { | |
613 assert dst.isFpu(); | |
614 emitByte(0xF2); | |
615 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
616 emitByte(0x0F); | |
617 emitByte(0x2A); | |
618 emitByte(0xC0 | encode); | |
619 } | |
620 | |
621 public final void cvtsi2ssl(CiRegister dst, CiRegister src) { | |
622 assert dst.isFpu(); | |
623 emitByte(0xF3); | |
624 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
625 emitByte(0x0F); | |
626 emitByte(0x2A); | |
627 emitByte(0xC0 | encode); | |
628 } | |
629 | |
630 public final void cvtss2sd(CiRegister dst, CiRegister src) { | |
631 assert dst.isFpu(); | |
632 assert src.isFpu(); | |
633 emitByte(0xF3); | |
634 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
635 emitByte(0x0F); | |
636 emitByte(0x5A); | |
637 emitByte(0xC0 | encode); | |
638 } | |
639 | |
640 public final void cvttsd2sil(CiRegister dst, CiRegister src) { | |
641 assert src.isFpu(); | |
642 emitByte(0xF2); | |
643 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
644 emitByte(0x0F); | |
645 emitByte(0x2C); | |
646 emitByte(0xC0 | encode); | |
647 } | |
648 | |
649 public final void cvttss2sil(CiRegister dst, CiRegister src) { | |
650 assert src.isFpu(); | |
651 emitByte(0xF3); | |
652 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
653 emitByte(0x0F); | |
654 emitByte(0x2C); | |
655 emitByte(0xC0 | encode); | |
656 } | |
657 | |
658 public final void decl(CiAddress dst) { | |
659 // Don't use it directly. Use Macrodecrement() instead. | |
660 prefix(dst); | |
661 emitByte(0xFF); | |
662 emitOperandHelper(rcx, dst); | |
663 } | |
664 | |
665 public final void divsd(CiRegister dst, CiAddress src) { | |
666 assert dst.isFpu(); | |
667 emitByte(0xF2); | |
668 prefix(src, dst); | |
669 emitByte(0x0F); | |
670 emitByte(0x5E); | |
671 emitOperandHelper(dst, src); | |
672 } | |
673 | |
674 public final void divsd(CiRegister dst, CiRegister src) { | |
675 assert dst.isFpu(); | |
676 assert src.isFpu(); | |
677 emitByte(0xF2); | |
678 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
679 emitByte(0x0F); | |
680 emitByte(0x5E); | |
681 emitByte(0xC0 | encode); | |
682 } | |
683 | |
684 public final void divss(CiRegister dst, CiAddress src) { | |
685 assert dst.isFpu(); | |
686 emitByte(0xF3); | |
687 prefix(src, dst); | |
688 emitByte(0x0F); | |
689 emitByte(0x5E); | |
690 emitOperandHelper(dst, src); | |
691 } | |
692 | |
693 public final void divss(CiRegister dst, CiRegister src) { | |
694 assert dst.isFpu(); | |
695 assert src.isFpu(); | |
696 emitByte(0xF3); | |
697 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
698 emitByte(0x0F); | |
699 emitByte(0x5E); | |
700 emitByte(0xC0 | encode); | |
701 } | |
702 | |
703 public final void hlt() { | |
704 emitByte(0xF4); | |
705 } | |
706 | |
707 public final void idivl(CiRegister src) { | |
708 int encode = prefixAndEncode(src.encoding); | |
709 emitByte(0xF7); | |
710 emitByte(0xF8 | encode); | |
711 } | |
712 | |
713 public final void imull(CiRegister dst, CiRegister src) { | |
714 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
715 emitByte(0x0F); | |
716 emitByte(0xAF); | |
717 emitByte(0xC0 | encode); | |
718 } | |
719 | |
720 public final void imull(CiRegister dst, CiRegister src, int value) { | |
721 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
722 if (Util.isByte(value)) { | |
723 emitByte(0x6B); | |
724 emitByte(0xC0 | encode); | |
725 emitByte(value); | |
726 } else { | |
727 emitByte(0x69); | |
728 emitByte(0xC0 | encode); | |
729 emitInt(value); | |
730 } | |
731 } | |
732 | |
733 public final void incl(CiAddress dst) { | |
734 // Don't use it directly. Use Macroincrement() instead. | |
735 prefix(dst); | |
736 emitByte(0xFF); | |
737 emitOperandHelper(rax, dst); | |
738 } | |
739 | |
740 public final void jcc(ConditionFlag cc, int target, boolean forceDisp32) { | |
741 int shortSize = 2; | |
742 int longSize = 6; | |
743 long disp = target - codeBuffer.position(); | |
744 if (!forceDisp32 && Util.isByte(disp - shortSize)) { | |
745 // 0111 tttn #8-bit disp | |
746 emitByte(0x70 | cc.value); | |
747 emitByte((int) ((disp - shortSize) & 0xFF)); | |
748 } else { | |
749 // 0000 1111 1000 tttn #32-bit disp | |
750 assert Util.isInt(disp - longSize) : "must be 32bit offset (call4)"; | |
751 emitByte(0x0F); | |
752 emitByte(0x80 | cc.value); | |
753 emitInt((int) (disp - longSize)); | |
754 } | |
755 } | |
756 | |
757 public final void jcc(ConditionFlag cc, Label l) { | |
758 assert (0 <= cc.value) && (cc.value < 16) : "illegal cc"; | |
759 if (l.isBound()) { | |
760 jcc(cc, l.position(), false); | |
761 } else { | |
762 // Note: could eliminate cond. jumps to this jump if condition | |
763 // is the same however, seems to be rather unlikely case. | |
764 // Note: use jccb() if label to be bound is very close to get | |
765 // an 8-bit displacement | |
766 l.addPatchAt(codeBuffer.position()); | |
767 emitByte(0x0F); | |
768 emitByte(0x80 | cc.value); | |
769 emitInt(0); | |
770 } | |
771 | |
772 } | |
773 | |
774 public final void jccb(ConditionFlag cc, Label l) { | |
775 if (l.isBound()) { | |
776 int shortSize = 2; | |
777 int entry = l.position(); | |
778 assert Util.isByte(entry - (codeBuffer.position() + shortSize)) : "Dispacement too large for a short jmp"; | |
779 long disp = entry - codeBuffer.position(); | |
780 // 0111 tttn #8-bit disp | |
781 emitByte(0x70 | cc.value); | |
782 emitByte((int) ((disp - shortSize) & 0xFF)); | |
783 } else { | |
784 | |
785 l.addPatchAt(codeBuffer.position()); | |
786 emitByte(0x70 | cc.value); | |
787 emitByte(0); | |
788 } | |
789 } | |
790 | |
791 public final void jmp(CiAddress adr) { | |
792 prefix(adr); | |
793 emitByte(0xFF); | |
794 emitOperandHelper(rsp, adr); | |
795 } | |
796 | |
797 public final void jmp(int target, boolean forceDisp32) { | |
798 int shortSize = 2; | |
799 int longSize = 5; | |
800 long disp = target - codeBuffer.position(); | |
801 if (!forceDisp32 && Util.isByte(disp - shortSize)) { | |
802 emitByte(0xEB); | |
803 emitByte((int) ((disp - shortSize) & 0xFF)); | |
804 } else { | |
805 emitByte(0xE9); | |
806 emitInt((int) (disp - longSize)); | |
807 } | |
808 } | |
809 | |
810 public final void jmp(Label l) { | |
811 if (l.isBound()) { | |
812 jmp(l.position(), false); | |
813 } else { | |
814 // By default, forward jumps are always 32-bit displacements, since | |
815 // we can't yet know where the label will be bound. If you're sure that | |
816 // the forward jump will not run beyond 256 bytes, use jmpb to | |
817 // force an 8-bit displacement. | |
818 | |
819 l.addPatchAt(codeBuffer.position()); | |
820 emitByte(0xE9); | |
821 emitInt(0); | |
822 } | |
823 } | |
824 | |
825 public final void jmp(CiRegister entry) { | |
826 int encode = prefixAndEncode(entry.encoding); | |
827 emitByte(0xFF); | |
828 emitByte(0xE0 | encode); | |
829 } | |
830 | |
831 public final void jmpb(Label l) { | |
832 if (l.isBound()) { | |
833 int shortSize = 2; | |
834 int entry = l.position(); | |
835 assert Util.isByte((entry - codeBuffer.position()) + shortSize) : "Dispacement too large for a short jmp"; | |
836 long offs = entry - codeBuffer.position(); | |
837 emitByte(0xEB); | |
838 emitByte((int) ((offs - shortSize) & 0xFF)); | |
839 } else { | |
840 | |
841 l.addPatchAt(codeBuffer.position()); | |
842 emitByte(0xEB); | |
843 emitByte(0); | |
844 } | |
845 } | |
846 | |
847 public final void leaq(CiRegister dst, CiAddress src) { | |
848 prefixq(src, dst); | |
849 emitByte(0x8D); | |
850 emitOperandHelper(dst, src); | |
851 } | |
852 | |
853 public final void enter(int imm16, int imm8) { | |
854 emitByte(0xC8); | |
855 emitShort(imm16); | |
856 emitByte(imm8); | |
857 } | |
858 | |
859 public final void leave() { | |
860 emitByte(0xC9); | |
861 } | |
862 | |
863 public final void lock() { | |
864 if ((C1XOptions.Atomics & 1) != 0) { | |
865 // Emit either nothing, a NOP, or a NOP: prefix | |
866 emitByte(0x90); | |
867 } else { | |
868 emitByte(0xF0); | |
869 } | |
870 } | |
871 | |
872 // Emit mfence instruction | |
873 public final void mfence() { | |
874 emitByte(0x0F); | |
875 emitByte(0xAE); | |
876 emitByte(0xF0); | |
877 } | |
878 | |
879 public final void mov(CiRegister dst, CiRegister src) { | |
880 movq(dst, src); | |
881 } | |
882 | |
883 public final void movapd(CiRegister dst, CiRegister src) { | |
884 assert dst.isFpu(); | |
885 assert src.isFpu(); | |
886 int dstenc = dst.encoding; | |
887 int srcenc = src.encoding; | |
888 emitByte(0x66); | |
889 if (dstenc < 8) { | |
890 if (srcenc >= 8) { | |
891 emitByte(Prefix.REXB); | |
892 srcenc -= 8; | |
893 } | |
894 } else { | |
895 if (srcenc < 8) { | |
896 emitByte(Prefix.REXR); | |
897 } else { | |
898 emitByte(Prefix.REXRB); | |
899 srcenc -= 8; | |
900 } | |
901 dstenc -= 8; | |
902 } | |
903 emitByte(0x0F); | |
904 emitByte(0x28); | |
905 emitByte(0xC0 | dstenc << 3 | srcenc); | |
906 } | |
907 | |
908 public final void movaps(CiRegister dst, CiRegister src) { | |
909 assert dst.isFpu(); | |
910 assert src.isFpu(); | |
911 int dstenc = dst.encoding; | |
912 int srcenc = src.encoding; | |
913 if (dstenc < 8) { | |
914 if (srcenc >= 8) { | |
915 emitByte(Prefix.REXB); | |
916 srcenc -= 8; | |
917 } | |
918 } else { | |
919 if (srcenc < 8) { | |
920 emitByte(Prefix.REXR); | |
921 } else { | |
922 emitByte(Prefix.REXRB); | |
923 srcenc -= 8; | |
924 } | |
925 dstenc -= 8; | |
926 } | |
927 emitByte(0x0F); | |
928 emitByte(0x28); | |
929 emitByte(0xC0 | dstenc << 3 | srcenc); | |
930 } | |
931 | |
932 public final void movb(CiRegister dst, CiAddress src) { | |
933 prefix(src, dst); // , true) | |
934 emitByte(0x8A); | |
935 emitOperandHelper(dst, src); | |
936 } | |
937 | |
938 public final void movb(CiAddress dst, int imm8) { | |
939 prefix(dst); | |
940 emitByte(0xC6); | |
941 emitOperandHelper(rax, dst); | |
942 emitByte(imm8); | |
943 } | |
944 | |
945 public final void movb(CiAddress dst, CiRegister src) { | |
946 assert src.isByte() : "must have byte register"; | |
947 prefix(dst, src); // , true) | |
948 emitByte(0x88); | |
949 emitOperandHelper(src, dst); | |
950 } | |
951 | |
952 public final void movdl(CiRegister dst, CiRegister src) { | |
953 if (dst.isFpu()) { | |
954 assert !src.isFpu() : "does this hold?"; | |
955 emitByte(0x66); | |
956 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
957 emitByte(0x0F); | |
958 emitByte(0x6E); | |
959 emitByte(0xC0 | encode); | |
960 } else if (src.isFpu()) { | |
961 assert !dst.isFpu(); | |
962 emitByte(0x66); | |
963 // swap src/dst to get correct prefix | |
964 int encode = prefixAndEncode(src.encoding, dst.encoding); | |
965 emitByte(0x0F); | |
966 emitByte(0x7E); | |
967 emitByte(0xC0 | encode); | |
968 } | |
969 } | |
970 | |
971 public final void movdqa(CiRegister dst, CiAddress src) { | |
972 assert dst.isFpu(); | |
973 emitByte(0x66); | |
974 prefix(src, dst); | |
975 emitByte(0x0F); | |
976 emitByte(0x6F); | |
977 emitOperandHelper(dst, src); | |
978 } | |
979 | |
980 public final void movdqa(CiRegister dst, CiRegister src) { | |
981 assert dst.isFpu(); | |
982 emitByte(0x66); | |
983 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
984 emitByte(0x0F); | |
985 emitByte(0x6F); | |
986 emitByte(0xC0 | encode); | |
987 } | |
988 | |
989 public final void movdqa(CiAddress dst, CiRegister src) { | |
990 assert src.isFpu(); | |
991 emitByte(0x66); | |
992 prefix(dst, src); | |
993 emitByte(0x0F); | |
994 emitByte(0x7F); | |
995 emitOperandHelper(src, dst); | |
996 } | |
997 | |
998 public final void movdqu(CiRegister dst, CiAddress src) { | |
999 assert dst.isFpu(); | |
1000 emitByte(0xF3); | |
1001 prefix(src, dst); | |
1002 emitByte(0x0F); | |
1003 emitByte(0x6F); | |
1004 emitOperandHelper(dst, src); | |
1005 } | |
1006 | |
1007 public final void movdqu(CiRegister dst, CiRegister src) { | |
1008 assert dst.isFpu(); | |
1009 assert src.isFpu(); | |
1010 | |
1011 emitByte(0xF3); | |
1012 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
1013 emitByte(0x0F); | |
1014 emitByte(0x6F); | |
1015 emitByte(0xC0 | encode); | |
1016 } | |
1017 | |
1018 public final void movdqu(CiAddress dst, CiRegister src) { | |
1019 assert src.isFpu(); | |
1020 | |
1021 emitByte(0xF3); | |
1022 prefix(dst, src); | |
1023 emitByte(0x0F); | |
1024 emitByte(0x7F); | |
1025 emitOperandHelper(src, dst); | |
1026 } | |
1027 | |
1028 public final void movl(CiRegister dst, int imm32) { | |
1029 int encode = prefixAndEncode(dst.encoding); | |
1030 emitByte(0xB8 | encode); | |
1031 emitInt(imm32); | |
1032 } | |
1033 | |
1034 public final void movl(CiRegister dst, CiRegister src) { | |
1035 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1036 emitByte(0x8B); | |
1037 emitByte(0xC0 | encode); | |
1038 } | |
1039 | |
1040 public final void movl(CiRegister dst, CiAddress src) { | |
1041 prefix(src, dst); | |
1042 emitByte(0x8B); | |
1043 emitOperandHelper(dst, src); | |
1044 } | |
1045 | |
1046 public final void movl(CiAddress dst, int imm32) { | |
1047 prefix(dst); | |
1048 emitByte(0xC7); | |
1049 emitOperandHelper(rax, dst); | |
1050 emitInt(imm32); | |
1051 } | |
1052 | |
1053 public final void movl(CiAddress dst, CiRegister src) { | |
1054 prefix(dst, src); | |
1055 emitByte(0x89); | |
1056 emitOperandHelper(src, dst); | |
1057 } | |
1058 | |
1059 // New cpus require to use movsd and movss to avoid partial register stall | |
1060 // when loading from memory. But for old Opteron use movlpd instead of movsd. | |
1061 // The selection is done in Macromovdbl() and movflt(). | |
1062 public final void movlpd(CiRegister dst, CiAddress src) { | |
1063 assert dst.isFpu(); | |
1064 | |
1065 emitByte(0x66); | |
1066 prefix(src, dst); | |
1067 emitByte(0x0F); | |
1068 emitByte(0x12); | |
1069 emitOperandHelper(dst, src); | |
1070 | |
1071 } | |
1072 | |
1073 public final void movq(CiRegister dst, CiAddress src) { | |
1074 if (dst.isFpu()) { | |
1075 emitByte(0xF3); | |
1076 prefixq(src, dst); | |
1077 emitByte(0x0F); | |
1078 emitByte(0x7E); | |
1079 emitOperandHelper(dst, src); | |
1080 } else { | |
1081 prefixq(src, dst); | |
1082 emitByte(0x8B); | |
1083 emitOperandHelper(dst, src); | |
1084 } | |
1085 } | |
1086 | |
1087 public final void movq(CiRegister dst, CiRegister src) { | |
1088 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
1089 emitByte(0x8B); | |
1090 emitByte(0xC0 | encode); | |
1091 } | |
1092 | |
1093 public final void movq(CiAddress dst, CiRegister src) { | |
1094 if (src.isFpu()) { | |
1095 emitByte(0x66); | |
1096 prefixq(dst, src); | |
1097 emitByte(0x0F); | |
1098 emitByte(0xD6); | |
1099 emitOperandHelper(src, dst); | |
1100 } else { | |
1101 prefixq(dst, src); | |
1102 emitByte(0x89); | |
1103 emitOperandHelper(src, dst); | |
1104 } | |
1105 } | |
1106 | |
1107 public final void movsxb(CiRegister dst, CiAddress src) { // movsxb | |
1108 prefix(src, dst); | |
1109 emitByte(0x0F); | |
1110 emitByte(0xBE); | |
1111 emitOperandHelper(dst, src); | |
1112 } | |
1113 | |
1114 public final void movsxb(CiRegister dst, CiRegister src) { // movsxb | |
1115 int encode = prefixAndEncode(dst.encoding, src.encoding, true); | |
1116 emitByte(0x0F); | |
1117 emitByte(0xBE); | |
1118 emitByte(0xC0 | encode); | |
1119 } | |
1120 | |
1121 public final void movsd(CiRegister dst, CiRegister src) { | |
1122 assert dst.isFpu(); | |
1123 assert src.isFpu(); | |
1124 emitByte(0xF2); | |
1125 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1126 emitByte(0x0F); | |
1127 emitByte(0x10); | |
1128 emitByte(0xC0 | encode); | |
1129 } | |
1130 | |
1131 public final void movsd(CiRegister dst, CiAddress src) { | |
1132 assert dst.isFpu(); | |
1133 emitByte(0xF2); | |
1134 prefix(src, dst); | |
1135 emitByte(0x0F); | |
1136 emitByte(0x10); | |
1137 emitOperandHelper(dst, src); | |
1138 } | |
1139 | |
1140 public final void movsd(CiAddress dst, CiRegister src) { | |
1141 assert src.isFpu(); | |
1142 emitByte(0xF2); | |
1143 prefix(dst, src); | |
1144 emitByte(0x0F); | |
1145 emitByte(0x11); | |
1146 emitOperandHelper(src, dst); | |
1147 } | |
1148 | |
1149 public final void movss(CiRegister dst, CiRegister src) { | |
1150 assert dst.isFpu(); | |
1151 assert src.isFpu(); | |
1152 emitByte(0xF3); | |
1153 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1154 emitByte(0x0F); | |
1155 emitByte(0x10); | |
1156 emitByte(0xC0 | encode); | |
1157 } | |
1158 | |
1159 public final void movss(CiRegister dst, CiAddress src) { | |
1160 assert dst.isFpu(); | |
1161 emitByte(0xF3); | |
1162 prefix(src, dst); | |
1163 emitByte(0x0F); | |
1164 emitByte(0x10); | |
1165 emitOperandHelper(dst, src); | |
1166 } | |
1167 | |
1168 public final void movss(CiAddress dst, CiRegister src) { | |
1169 assert src.isFpu(); | |
1170 emitByte(0xF3); | |
1171 prefix(dst, src); | |
1172 emitByte(0x0F); | |
1173 emitByte(0x11); | |
1174 emitOperandHelper(src, dst); | |
1175 } | |
1176 | |
1177 public final void movswl(CiRegister dst, CiAddress src) { | |
1178 prefix(src, dst); | |
1179 emitByte(0x0F); | |
1180 emitByte(0xBF); | |
1181 emitOperandHelper(dst, src); | |
1182 } | |
1183 | |
1184 public final void movsxw(CiRegister dst, CiRegister src) { // movsxw | |
1185 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1186 emitByte(0x0F); | |
1187 emitByte(0xBF); | |
1188 emitByte(0xC0 | encode); | |
1189 } | |
1190 | |
1191 public final void movsxw(CiRegister dst, CiAddress src) { // movsxw | |
1192 prefix(src, dst); | |
1193 emitByte(0x0F); | |
1194 emitByte(0xBF); | |
1195 emitOperandHelper(dst, src); | |
1196 } | |
1197 | |
1198 public final void movzxd(CiRegister dst, CiRegister src) { // movzxd | |
1199 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1200 emitByte(0x63); | |
1201 emitByte(0xC0 | encode); | |
1202 } | |
1203 | |
1204 public final void movzxd(CiRegister dst, CiAddress src) { // movzxd | |
1205 prefix(src, dst); | |
1206 emitByte(0x63); | |
1207 emitOperandHelper(dst, src); | |
1208 } | |
1209 | |
1210 public final void movw(CiAddress dst, int imm16) { | |
1211 emitByte(0x66); // switch to 16-bit mode | |
1212 prefix(dst); | |
1213 emitByte(0xC7); | |
1214 emitOperandHelper(rax, dst); | |
1215 emitShort(imm16); | |
1216 } | |
1217 | |
1218 public final void movw(CiRegister dst, CiAddress src) { | |
1219 emitByte(0x66); | |
1220 prefix(src, dst); | |
1221 emitByte(0x8B); | |
1222 emitOperandHelper(dst, src); | |
1223 } | |
1224 | |
1225 public final void movw(CiAddress dst, CiRegister src) { | |
1226 emitByte(0x66); | |
1227 prefix(dst, src); | |
1228 emitByte(0x89); | |
1229 emitOperandHelper(src, dst); | |
1230 } | |
1231 | |
1232 public final void movzxb(CiRegister dst, CiAddress src) { // movzxb | |
1233 prefix(src, dst); | |
1234 emitByte(0x0F); | |
1235 emitByte(0xB6); | |
1236 emitOperandHelper(dst, src); | |
1237 } | |
1238 | |
1239 public final void movzxb(CiRegister dst, CiRegister src) { // movzxb | |
1240 int encode = prefixAndEncode(dst.encoding, src.encoding, true); | |
1241 emitByte(0x0F); | |
1242 emitByte(0xB6); | |
1243 emitByte(0xC0 | encode); | |
1244 } | |
1245 | |
1246 public final void movzxl(CiRegister dst, CiAddress src) { // movzxw | |
1247 prefix(src, dst); | |
1248 emitByte(0x0F); | |
1249 emitByte(0xB7); | |
1250 emitOperandHelper(dst, src); | |
1251 } | |
1252 | |
1253 public final void movzxl(CiRegister dst, CiRegister src) { // movzxw | |
1254 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1255 emitByte(0x0F); | |
1256 emitByte(0xB7); | |
1257 emitByte(0xC0 | encode); | |
1258 } | |
1259 | |
1260 public final void mull(CiAddress src) { | |
1261 prefix(src); | |
1262 emitByte(0xF7); | |
1263 emitOperandHelper(rsp, src); | |
1264 } | |
1265 | |
1266 public final void mulsd(CiRegister dst, CiAddress src) { | |
1267 assert dst.isFpu(); | |
1268 emitByte(0xF2); | |
1269 prefix(src, dst); | |
1270 emitByte(0x0F); | |
1271 emitByte(0x59); | |
1272 emitOperandHelper(dst, src); | |
1273 } | |
1274 | |
1275 public final void mulsd(CiRegister dst, CiRegister src) { | |
1276 assert dst.isFpu(); | |
1277 assert src.isFpu(); | |
1278 | |
1279 emitByte(0xF2); | |
1280 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1281 emitByte(0x0F); | |
1282 emitByte(0x59); | |
1283 emitByte(0xC0 | encode); | |
1284 } | |
1285 | |
1286 public final void mulss(CiRegister dst, CiAddress src) { | |
1287 assert dst.isFpu(); | |
1288 | |
1289 emitByte(0xF3); | |
1290 prefix(src, dst); | |
1291 emitByte(0x0F); | |
1292 emitByte(0x59); | |
1293 emitOperandHelper(dst, src); | |
1294 } | |
1295 | |
1296 public final void mulss(CiRegister dst, CiRegister src) { | |
1297 assert dst.isFpu(); | |
1298 assert src.isFpu(); | |
1299 emitByte(0xF3); | |
1300 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1301 emitByte(0x0F); | |
1302 emitByte(0x59); | |
1303 emitByte(0xC0 | encode); | |
1304 } | |
1305 | |
1306 public final void negl(CiRegister dst) { | |
1307 int encode = prefixAndEncode(dst.encoding); | |
1308 emitByte(0xF7); | |
1309 emitByte(0xD8 | encode); | |
1310 } | |
1311 | |
1312 @Override | |
1313 public final void nop() { | |
1314 nop(1); | |
1315 } | |
1316 | |
1317 public void nop(int i) { | |
1318 if (C1XOptions.UseNormalNop) { | |
1319 assert i > 0 : " "; | |
1320 // The fancy nops aren't currently recognized by debuggers making it a | |
1321 // pain to disassemble code while debugging. If assert are on clearly | |
1322 // speed is not an issue so simply use the single byte traditional nop | |
1323 // to do alignment. | |
1324 | |
1325 for (; i > 0; i--) { | |
1326 emitByte(0x90); | |
1327 } | |
1328 return; | |
1329 } | |
1330 | |
1331 if (C1XOptions.UseAddressNop) { | |
1332 // | |
1333 // Using multi-bytes nops "0x0F 0x1F [Address]" for AMD. | |
1334 // 1: 0x90 | |
1335 // 2: 0x66 0x90 | |
1336 // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding) | |
1337 // 4: 0x0F 0x1F 0x40 0x00 | |
1338 // 5: 0x0F 0x1F 0x44 0x00 0x00 | |
1339 // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00 | |
1340 // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 | |
1341 // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1342 // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1343 // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1344 // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1345 | |
1346 // The rest coding is AMD specific - use consecutive Address nops | |
1347 | |
1348 // 12: 0x66 0x0F 0x1F 0x44 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 | |
1349 // 13: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x66 0x0F 0x1F 0x44 0x00 0x00 | |
1350 // 14: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 | |
1351 // 15: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00 | |
1352 // 16: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 | |
1353 // Size prefixes (0x66) are added for larger sizes | |
1354 | |
1355 while (i >= 22) { | |
1356 i -= 11; | |
1357 emitByte(0x66); // size prefix | |
1358 emitByte(0x66); // size prefix | |
1359 emitByte(0x66); // size prefix | |
1360 addrNop8(); | |
1361 } | |
1362 // Generate first nop for size between 21-12 | |
1363 switch (i) { | |
1364 case 21: | |
1365 i -= 1; | |
1366 emitByte(0x66); // size prefix | |
1367 // fall through | |
1368 case 20: | |
1369 // fall through | |
1370 case 19: | |
1371 i -= 1; | |
1372 emitByte(0x66); // size prefix | |
1373 // fall through | |
1374 case 18: | |
1375 // fall through | |
1376 case 17: | |
1377 i -= 1; | |
1378 emitByte(0x66); // size prefix | |
1379 // fall through | |
1380 case 16: | |
1381 // fall through | |
1382 case 15: | |
1383 i -= 8; | |
1384 addrNop8(); | |
1385 break; | |
1386 case 14: | |
1387 case 13: | |
1388 i -= 7; | |
1389 addrNop7(); | |
1390 break; | |
1391 case 12: | |
1392 i -= 6; | |
1393 emitByte(0x66); // size prefix | |
1394 addrNop5(); | |
1395 break; | |
1396 default: | |
1397 assert i < 12; | |
1398 } | |
1399 | |
1400 // Generate second nop for size between 11-1 | |
1401 switch (i) { | |
1402 case 11: | |
1403 emitByte(0x66); // size prefix | |
1404 emitByte(0x66); // size prefix | |
1405 emitByte(0x66); // size prefix | |
1406 addrNop8(); | |
1407 break; | |
1408 case 10: | |
1409 emitByte(0x66); // size prefix | |
1410 emitByte(0x66); // size prefix | |
1411 addrNop8(); | |
1412 break; | |
1413 case 9: | |
1414 emitByte(0x66); // size prefix | |
1415 addrNop8(); | |
1416 break; | |
1417 case 8: | |
1418 addrNop8(); | |
1419 break; | |
1420 case 7: | |
1421 addrNop7(); | |
1422 break; | |
1423 case 6: | |
1424 emitByte(0x66); // size prefix | |
1425 addrNop5(); | |
1426 break; | |
1427 case 5: | |
1428 addrNop5(); | |
1429 break; | |
1430 case 4: | |
1431 addrNop4(); | |
1432 break; | |
1433 case 3: | |
1434 // Don't use "0x0F 0x1F 0x00" - need patching safe padding | |
1435 emitByte(0x66); // size prefix | |
1436 emitByte(0x66); // size prefix | |
1437 emitByte(0x90); // nop | |
1438 break; | |
1439 case 2: | |
1440 emitByte(0x66); // size prefix | |
1441 emitByte(0x90); // nop | |
1442 break; | |
1443 case 1: | |
1444 emitByte(0x90); // nop | |
1445 break; | |
1446 default: | |
1447 assert i == 0; | |
1448 } | |
1449 return; | |
1450 } | |
1451 | |
1452 // Using nops with size prefixes "0x66 0x90". | |
1453 // From AMD Optimization Guide: | |
1454 // 1: 0x90 | |
1455 // 2: 0x66 0x90 | |
1456 // 3: 0x66 0x66 0x90 | |
1457 // 4: 0x66 0x66 0x66 0x90 | |
1458 // 5: 0x66 0x66 0x90 0x66 0x90 | |
1459 // 6: 0x66 0x66 0x90 0x66 0x66 0x90 | |
1460 // 7: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 | |
1461 // 8: 0x66 0x66 0x66 0x90 0x66 0x66 0x66 0x90 | |
1462 // 9: 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 | |
1463 // 10: 0x66 0x66 0x66 0x90 0x66 0x66 0x90 0x66 0x66 0x90 | |
1464 // | |
1465 while (i > 12) { | |
1466 i -= 4; | |
1467 emitByte(0x66); // size prefix | |
1468 emitByte(0x66); | |
1469 emitByte(0x66); | |
1470 emitByte(0x90); // nop | |
1471 } | |
1472 // 1 - 12 nops | |
1473 if (i > 8) { | |
1474 if (i > 9) { | |
1475 i -= 1; | |
1476 emitByte(0x66); | |
1477 } | |
1478 i -= 3; | |
1479 emitByte(0x66); | |
1480 emitByte(0x66); | |
1481 emitByte(0x90); | |
1482 } | |
1483 // 1 - 8 nops | |
1484 if (i > 4) { | |
1485 if (i > 6) { | |
1486 i -= 1; | |
1487 emitByte(0x66); | |
1488 } | |
1489 i -= 3; | |
1490 emitByte(0x66); | |
1491 emitByte(0x66); | |
1492 emitByte(0x90); | |
1493 } | |
1494 switch (i) { | |
1495 case 4: | |
1496 emitByte(0x66); | |
1497 emitByte(0x66); | |
1498 emitByte(0x66); | |
1499 emitByte(0x90); | |
1500 break; | |
1501 case 3: | |
1502 emitByte(0x66); | |
1503 emitByte(0x66); | |
1504 emitByte(0x90); | |
1505 break; | |
1506 case 2: | |
1507 emitByte(0x66); | |
1508 emitByte(0x90); | |
1509 break; | |
1510 case 1: | |
1511 emitByte(0x90); | |
1512 break; | |
1513 default: | |
1514 assert i == 0; | |
1515 } | |
1516 } | |
1517 | |
1518 public final void notl(CiRegister dst) { | |
1519 int encode = prefixAndEncode(dst.encoding); | |
1520 emitByte(0xF7); | |
1521 emitByte(0xD0 | encode); | |
1522 } | |
1523 | |
1524 public final void orl(CiAddress dst, int imm32) { | |
1525 prefix(dst); | |
1526 emitByte(0x81); | |
1527 emitOperandHelper(rcx, dst); | |
1528 emitInt(imm32); | |
1529 } | |
1530 | |
1531 public final void orl(CiRegister dst, int imm32) { | |
1532 prefix(dst); | |
1533 emitArith(0x81, 0xC8, dst, imm32); | |
1534 } | |
1535 | |
1536 public final void orl(CiRegister dst, CiAddress src) { | |
1537 prefix(src, dst); | |
1538 emitByte(0x0B); | |
1539 emitOperandHelper(dst, src); | |
1540 } | |
1541 | |
1542 public final void orl(CiRegister dst, CiRegister src) { | |
1543 prefixAndEncode(dst.encoding, src.encoding); | |
1544 emitArith(0x0B, 0xC0, dst, src); | |
1545 } | |
1546 | |
1547 // generic | |
1548 public final void pop(CiRegister dst) { | |
1549 int encode = prefixAndEncode(dst.encoding); | |
1550 emitByte(0x58 | encode); | |
1551 } | |
1552 | |
1553 public final void popl(CiAddress dst) { | |
1554 // NOTE: this will adjust stack by 8byte on 64bits | |
1555 prefix(dst); | |
1556 emitByte(0x8F); | |
1557 emitOperandHelper(rax, dst); | |
1558 } | |
1559 | |
1560 public final void prefetchPrefix(CiAddress src) { | |
1561 prefix(src); | |
1562 emitByte(0x0F); | |
1563 } | |
1564 | |
1565 public final void prefetchnta(CiAddress src) { | |
1566 prefetchPrefix(src); | |
1567 emitByte(0x18); | |
1568 emitOperandHelper(rax, src); // 0, src | |
1569 } | |
1570 | |
1571 public final void prefetchr(CiAddress src) { | |
1572 prefetchPrefix(src); | |
1573 emitByte(0x0D); | |
1574 emitOperandHelper(rax, src); // 0, src | |
1575 } | |
1576 | |
1577 public final void prefetcht0(CiAddress src) { | |
1578 prefetchPrefix(src); | |
1579 emitByte(0x18); | |
1580 emitOperandHelper(rcx, src); // 1, src | |
1581 | |
1582 } | |
1583 | |
1584 public final void prefetcht1(CiAddress src) { | |
1585 prefetchPrefix(src); | |
1586 emitByte(0x18); | |
1587 emitOperandHelper(rdx, src); // 2, src | |
1588 } | |
1589 | |
1590 public final void prefetcht2(CiAddress src) { | |
1591 prefetchPrefix(src); | |
1592 emitByte(0x18); | |
1593 emitOperandHelper(rbx, src); // 3, src | |
1594 } | |
1595 | |
1596 public final void prefetchw(CiAddress src) { | |
1597 prefetchPrefix(src); | |
1598 emitByte(0x0D); | |
1599 emitOperandHelper(rcx, src); // 1, src | |
1600 } | |
1601 | |
1602 public final void pshufd(CiRegister dst, CiRegister src, int mode) { | |
1603 assert dst.isFpu(); | |
1604 assert src.isFpu(); | |
1605 assert Util.isUByte(mode) : "invalid value"; | |
1606 | |
1607 emitByte(0x66); | |
1608 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1609 emitByte(0x0F); | |
1610 emitByte(0x70); | |
1611 emitByte(0xC0 | encode); | |
1612 emitByte(mode & 0xFF); | |
1613 } | |
1614 | |
1615 public final void pshufd(CiRegister dst, CiAddress src, int mode) { | |
1616 assert dst.isFpu(); | |
1617 assert Util.isUByte(mode) : "invalid value"; | |
1618 | |
1619 emitByte(0x66); | |
1620 prefix(src, dst); | |
1621 emitByte(0x0F); | |
1622 emitByte(0x70); | |
1623 emitOperandHelper(dst, src); | |
1624 emitByte(mode & 0xFF); | |
1625 | |
1626 } | |
1627 | |
1628 public final void pshuflw(CiRegister dst, CiRegister src, int mode) { | |
1629 assert dst.isFpu(); | |
1630 assert src.isFpu(); | |
1631 assert Util.isUByte(mode) : "invalid value"; | |
1632 | |
1633 emitByte(0xF2); | |
1634 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1635 emitByte(0x0F); | |
1636 emitByte(0x70); | |
1637 emitByte(0xC0 | encode); | |
1638 emitByte(mode & 0xFF); | |
1639 } | |
1640 | |
1641 public final void pshuflw(CiRegister dst, CiAddress src, int mode) { | |
1642 assert dst.isFpu(); | |
1643 assert Util.isUByte(mode) : "invalid value"; | |
1644 | |
1645 emitByte(0xF2); | |
1646 prefix(src, dst); // QQ new | |
1647 emitByte(0x0F); | |
1648 emitByte(0x70); | |
1649 emitOperandHelper(dst, src); | |
1650 emitByte(mode & 0xFF); | |
1651 } | |
1652 | |
1653 public final void psrlq(CiRegister dst, int shift) { | |
1654 assert dst.isFpu(); | |
1655 // HMM Table D-1 says sse2 or mmx | |
1656 | |
1657 int encode = prefixqAndEncode(xmm2.encoding, dst.encoding); | |
1658 emitByte(0x66); | |
1659 emitByte(0x0F); | |
1660 emitByte(0x73); | |
1661 emitByte(0xC0 | encode); | |
1662 emitByte(shift); | |
1663 } | |
1664 | |
1665 public final void punpcklbw(CiRegister dst, CiRegister src) { | |
1666 assert dst.isFpu(); | |
1667 assert src.isFpu(); | |
1668 emitByte(0x66); | |
1669 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1670 emitByte(0x0F); | |
1671 emitByte(0x60); | |
1672 emitByte(0xC0 | encode); | |
1673 } | |
1674 | |
1675 public final void push(int imm32) { | |
1676 // in 64bits we push 64bits onto the stack but only | |
1677 // take a 32bit immediate | |
1678 emitByte(0x68); | |
1679 emitInt(imm32); | |
1680 } | |
1681 | |
1682 public final void push(CiRegister src) { | |
1683 int encode = prefixAndEncode(src.encoding); | |
1684 emitByte(0x50 | encode); | |
1685 } | |
1686 | |
1687 public final void pushf() { | |
1688 emitByte(0x9C); | |
1689 } | |
1690 | |
1691 public final void pushl(CiAddress src) { | |
1692 // Note this will push 64bit on 64bit | |
1693 prefix(src); | |
1694 emitByte(0xFF); | |
1695 emitOperandHelper(rsi, src); | |
1696 } | |
1697 | |
1698 public final void pxor(CiRegister dst, CiAddress src) { | |
1699 assert dst.isFpu(); | |
1700 | |
1701 emitByte(0x66); | |
1702 prefix(src, dst); | |
1703 emitByte(0x0F); | |
1704 emitByte(0xEF); | |
1705 emitOperandHelper(dst, src); | |
1706 } | |
1707 | |
1708 public final void pxor(CiRegister dst, CiRegister src) { | |
1709 assert dst.isFpu(); | |
1710 assert src.isFpu(); | |
1711 | |
1712 emitByte(0x66); | |
1713 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1714 emitByte(0x0F); | |
1715 emitByte(0xEF); | |
1716 emitByte(0xC0 | encode); | |
1717 | |
1718 } | |
1719 | |
1720 public final void rcll(CiRegister dst, int imm8) { | |
1721 assert Util.isShiftCount(imm8) : "illegal shift count"; | |
1722 int encode = prefixAndEncode(dst.encoding); | |
1723 if (imm8 == 1) { | |
1724 emitByte(0xD1); | |
1725 emitByte(0xD0 | encode); | |
1726 } else { | |
1727 emitByte(0xC1); | |
1728 emitByte(0xD0 | encode); | |
1729 emitByte(imm8); | |
1730 } | |
1731 } | |
1732 | |
1733 public final void pause() { | |
1734 emitByte(0xF3); | |
1735 emitByte(0x90); | |
1736 } | |
1737 | |
1738 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx heap words. | |
1739 public final void repeatMoveWords() { | |
1740 emitByte(0xF3); | |
1741 emitByte(Prefix.REXW); | |
1742 emitByte(0xA5); | |
1743 } | |
1744 | |
1745 // Copies data from [X86.rsi] to [X86.rdi] using X86.rcx bytes. | |
1746 public final void repeatMoveBytes() { | |
1747 emitByte(0xF3); | |
1748 emitByte(Prefix.REXW); | |
1749 emitByte(0xA4); | |
1750 } | |
1751 | |
1752 // sets X86.rcx pointer sized words with X86.rax, value at [edi] | |
1753 // generic | |
1754 public final void repSet() { // repSet | |
1755 emitByte(0xF3); | |
1756 // STOSQ | |
1757 emitByte(Prefix.REXW); | |
1758 emitByte(0xAB); | |
1759 } | |
1760 | |
1761 // scans X86.rcx pointer sized words at [edi] for occurance of X86.rax, | |
1762 // generic | |
1763 public final void repneScan() { // repneScan | |
1764 emitByte(0xF2); | |
1765 // SCASQ | |
1766 emitByte(Prefix.REXW); | |
1767 emitByte(0xAF); | |
1768 } | |
1769 | |
1770 // scans X86.rcx 4 byte words at [edi] for occurance of X86.rax, | |
1771 // generic | |
1772 public final void repneScanl() { // repneScan | |
1773 emitByte(0xF2); | |
1774 // SCASL | |
1775 emitByte(0xAF); | |
1776 } | |
1777 | |
1778 public final void ret(int imm16) { | |
1779 if (imm16 == 0) { | |
1780 emitByte(0xC3); | |
1781 } else { | |
1782 emitByte(0xC2); | |
1783 emitShort(imm16); | |
1784 } | |
1785 } | |
1786 | |
1787 public final void sarl(CiRegister dst, int imm8) { | |
1788 int encode = prefixAndEncode(dst.encoding); | |
1789 assert Util.isShiftCount(imm8) : "illegal shift count"; | |
1790 if (imm8 == 1) { | |
1791 emitByte(0xD1); | |
1792 emitByte(0xF8 | encode); | |
1793 } else { | |
1794 emitByte(0xC1); | |
1795 emitByte(0xF8 | encode); | |
1796 emitByte(imm8); | |
1797 } | |
1798 } | |
1799 | |
1800 public final void sarl(CiRegister dst) { | |
1801 int encode = prefixAndEncode(dst.encoding); | |
1802 emitByte(0xD3); | |
1803 emitByte(0xF8 | encode); | |
1804 } | |
1805 | |
1806 public final void sbbl(CiAddress dst, int imm32) { | |
1807 prefix(dst); | |
1808 emitArithOperand(0x81, rbx, dst, imm32); | |
1809 } | |
1810 | |
1811 public final void sbbl(CiRegister dst, int imm32) { | |
1812 prefix(dst); | |
1813 emitArith(0x81, 0xD8, dst, imm32); | |
1814 } | |
1815 | |
1816 public final void sbbl(CiRegister dst, CiAddress src) { | |
1817 prefix(src, dst); | |
1818 emitByte(0x1B); | |
1819 emitOperandHelper(dst, src); | |
1820 } | |
1821 | |
1822 public final void sbbl(CiRegister dst, CiRegister src) { | |
1823 prefixAndEncode(dst.encoding, src.encoding); | |
1824 emitArith(0x1B, 0xC0, dst, src); | |
1825 } | |
1826 | |
1827 public final void setb(ConditionFlag cc, CiRegister dst) { | |
1828 assert 0 <= cc.value && cc.value < 16 : "illegal cc"; | |
1829 int encode = prefixAndEncode(dst.encoding, true); | |
1830 emitByte(0x0F); | |
1831 emitByte(0x90 | cc.value); | |
1832 emitByte(0xC0 | encode); | |
1833 } | |
1834 | |
1835 public final void shll(CiRegister dst, int imm8) { | |
1836 assert Util.isShiftCount(imm8) : "illegal shift count"; | |
1837 int encode = prefixAndEncode(dst.encoding); | |
1838 if (imm8 == 1) { | |
1839 emitByte(0xD1); | |
1840 emitByte(0xE0 | encode); | |
1841 } else { | |
1842 emitByte(0xC1); | |
1843 emitByte(0xE0 | encode); | |
1844 emitByte(imm8); | |
1845 } | |
1846 } | |
1847 | |
1848 public final void shll(CiRegister dst) { | |
1849 int encode = prefixAndEncode(dst.encoding); | |
1850 emitByte(0xD3); | |
1851 emitByte(0xE0 | encode); | |
1852 } | |
1853 | |
1854 public final void shrl(CiRegister dst, int imm8) { | |
1855 assert Util.isShiftCount(imm8) : "illegal shift count"; | |
1856 int encode = prefixAndEncode(dst.encoding); | |
1857 emitByte(0xC1); | |
1858 emitByte(0xE8 | encode); | |
1859 emitByte(imm8); | |
1860 } | |
1861 | |
1862 public final void shrl(CiRegister dst) { | |
1863 int encode = prefixAndEncode(dst.encoding); | |
1864 emitByte(0xD3); | |
1865 emitByte(0xE8 | encode); | |
1866 } | |
1867 | |
1868 // copies a single word from [esi] to [edi] | |
1869 public final void smovl() { | |
1870 emitByte(0xA5); | |
1871 } | |
1872 | |
1873 public final void sqrtsd(CiRegister dst, CiRegister src) { | |
1874 assert dst.isFpu(); | |
1875 assert src.isFpu(); | |
1876 // HMM Table D-1 says sse2 | |
1877 // assert is64 || target.supportsSSE(); | |
1878 emitByte(0xF2); | |
1879 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1880 emitByte(0x0F); | |
1881 emitByte(0x51); | |
1882 emitByte(0xC0 | encode); | |
1883 } | |
1884 | |
1885 public final void subl(CiAddress dst, int imm32) { | |
1886 prefix(dst); | |
1887 if (Util.isByte(imm32)) { | |
1888 emitByte(0x83); | |
1889 emitOperandHelper(rbp, dst); | |
1890 emitByte(imm32 & 0xFF); | |
1891 } else { | |
1892 emitByte(0x81); | |
1893 emitOperandHelper(rbp, dst); | |
1894 emitInt(imm32); | |
1895 } | |
1896 } | |
1897 | |
1898 public final void subl(CiRegister dst, int imm32) { | |
1899 prefix(dst); | |
1900 emitArith(0x81, 0xE8, dst, imm32); | |
1901 } | |
1902 | |
1903 public final void subl(CiAddress dst, CiRegister src) { | |
1904 prefix(dst, src); | |
1905 emitByte(0x29); | |
1906 emitOperandHelper(src, dst); | |
1907 } | |
1908 | |
1909 public final void subl(CiRegister dst, CiAddress src) { | |
1910 prefix(src, dst); | |
1911 emitByte(0x2B); | |
1912 emitOperandHelper(dst, src); | |
1913 } | |
1914 | |
1915 public final void subl(CiRegister dst, CiRegister src) { | |
1916 prefixAndEncode(dst.encoding, src.encoding); | |
1917 emitArith(0x2B, 0xC0, dst, src); | |
1918 } | |
1919 | |
1920 public final void subsd(CiRegister dst, CiRegister src) { | |
1921 assert dst.isFpu(); | |
1922 assert src.isFpu(); | |
1923 emitByte(0xF2); | |
1924 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1925 emitByte(0x0F); | |
1926 emitByte(0x5C); | |
1927 emitByte(0xC0 | encode); | |
1928 } | |
1929 | |
1930 public final void subsd(CiRegister dst, CiAddress src) { | |
1931 assert dst.isFpu(); | |
1932 | |
1933 emitByte(0xF2); | |
1934 prefix(src, dst); | |
1935 emitByte(0x0F); | |
1936 emitByte(0x5C); | |
1937 emitOperandHelper(dst, src); | |
1938 } | |
1939 | |
1940 public final void subss(CiRegister dst, CiRegister src) { | |
1941 assert dst.isFpu(); | |
1942 assert src.isFpu(); | |
1943 emitByte(0xF3); | |
1944 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
1945 emitByte(0x0F); | |
1946 emitByte(0x5C); | |
1947 emitByte(0xC0 | encode); | |
1948 } | |
1949 | |
1950 public final void subss(CiRegister dst, CiAddress src) { | |
1951 assert dst.isFpu(); | |
1952 | |
1953 emitByte(0xF3); | |
1954 prefix(src, dst); | |
1955 emitByte(0x0F); | |
1956 emitByte(0x5C); | |
1957 emitOperandHelper(dst, src); | |
1958 } | |
1959 | |
1960 public final void testb(CiRegister dst, int imm8) { | |
1961 prefixAndEncode(dst.encoding, true); | |
1962 emitArithB(0xF6, 0xC0, dst, imm8); | |
1963 } | |
1964 | |
1965 public final void testl(CiRegister dst, int imm32) { | |
1966 // not using emitArith because test | |
1967 // doesn't support sign-extension of | |
1968 // 8bit operands | |
1969 int encode = dst.encoding; | |
1970 if (encode == 0) { | |
1971 emitByte(0xA9); | |
1972 } else { | |
1973 encode = prefixAndEncode(encode); | |
1974 emitByte(0xF7); | |
1975 emitByte(0xC0 | encode); | |
1976 } | |
1977 emitInt(imm32); | |
1978 } | |
1979 | |
1980 public final void testl(CiRegister dst, CiRegister src) { | |
1981 prefixAndEncode(dst.encoding, src.encoding); | |
1982 emitArith(0x85, 0xC0, dst, src); | |
1983 } | |
1984 | |
1985 public final void testl(CiRegister dst, CiAddress src) { | |
1986 prefix(src, dst); | |
1987 emitByte(0x85); | |
1988 emitOperandHelper(dst, src); | |
1989 } | |
1990 | |
1991 public final void ucomisd(CiRegister dst, CiAddress src) { | |
1992 assert dst.isFpu(); | |
1993 emitByte(0x66); | |
1994 ucomiss(dst, src); | |
1995 } | |
1996 | |
1997 public final void ucomisd(CiRegister dst, CiRegister src) { | |
1998 assert dst.isFpu(); | |
1999 assert src.isFpu(); | |
2000 emitByte(0x66); | |
2001 ucomiss(dst, src); | |
2002 } | |
2003 | |
2004 public final void ucomiss(CiRegister dst, CiAddress src) { | |
2005 assert dst.isFpu(); | |
2006 | |
2007 prefix(src, dst); | |
2008 emitByte(0x0F); | |
2009 emitByte(0x2E); | |
2010 emitOperandHelper(dst, src); | |
2011 } | |
2012 | |
2013 public final void ucomiss(CiRegister dst, CiRegister src) { | |
2014 assert dst.isFpu(); | |
2015 assert src.isFpu(); | |
2016 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2017 emitByte(0x0F); | |
2018 emitByte(0x2E); | |
2019 emitByte(0xC0 | encode); | |
2020 } | |
2021 | |
2022 public final void xaddl(CiAddress dst, CiRegister src) { | |
2023 assert src.isFpu(); | |
2024 | |
2025 prefix(dst, src); | |
2026 emitByte(0x0F); | |
2027 emitByte(0xC1); | |
2028 emitOperandHelper(src, dst); | |
2029 } | |
2030 | |
2031 public final void xchgl(CiRegister dst, CiAddress src) { // xchg | |
2032 prefix(src, dst); | |
2033 emitByte(0x87); | |
2034 emitOperandHelper(dst, src); | |
2035 } | |
2036 | |
2037 public final void xchgl(CiRegister dst, CiRegister src) { | |
2038 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2039 emitByte(0x87); | |
2040 emitByte(0xc0 | encode); | |
2041 } | |
2042 | |
2043 public final void xorl(CiRegister dst, int imm32) { | |
2044 prefix(dst); | |
2045 emitArith(0x81, 0xF0, dst, imm32); | |
2046 } | |
2047 | |
2048 public final void xorl(CiRegister dst, CiAddress src) { | |
2049 prefix(src, dst); | |
2050 emitByte(0x33); | |
2051 emitOperandHelper(dst, src); | |
2052 } | |
2053 | |
2054 public final void xorl(CiRegister dst, CiRegister src) { | |
2055 prefixAndEncode(dst.encoding, src.encoding); | |
2056 emitArith(0x33, 0xC0, dst, src); | |
2057 } | |
2058 | |
2059 public final void xorpd(CiRegister dst, CiRegister src) { | |
2060 assert dst.isFpu(); | |
2061 assert src.isFpu(); | |
2062 emitByte(0x66); | |
2063 xorps(dst, src); | |
2064 } | |
2065 | |
2066 public final void xorpd(CiRegister dst, CiAddress src) { | |
2067 assert dst.isFpu(); | |
2068 | |
2069 emitByte(0x66); | |
2070 prefix(src, dst); | |
2071 emitByte(0x0F); | |
2072 emitByte(0x57); | |
2073 emitOperandHelper(dst, src); | |
2074 } | |
2075 | |
2076 public final void xorps(CiRegister dst, CiRegister src) { | |
2077 | |
2078 assert dst.isFpu(); | |
2079 int encode = prefixAndEncode(dst.encoding, src.encoding); | |
2080 emitByte(0x0F); | |
2081 emitByte(0x57); | |
2082 emitByte(0xC0 | encode); | |
2083 } | |
2084 | |
2085 public final void xorps(CiRegister dst, CiAddress src) { | |
2086 assert dst.isFpu(); | |
2087 | |
2088 prefix(src, dst); | |
2089 emitByte(0x0F); | |
2090 emitByte(0x57); | |
2091 emitOperandHelper(dst, src); | |
2092 } | |
2093 | |
2094 // 32bit only pieces of the assembler | |
2095 | |
2096 public final void decl(CiRegister dst) { | |
2097 // Don't use it directly. Use Macrodecrementl() instead. | |
2098 // Use two-byte form (one-byte form is a REX prefix in 64-bit mode) | |
2099 int encode = prefixAndEncode(dst.encoding); | |
2100 emitByte(0xFF); | |
2101 emitByte(0xC8 | encode); | |
2102 } | |
2103 | |
2104 public final void incl(CiRegister dst) { | |
2105 // Don't use it directly. Use Macroincrementl() instead. | |
2106 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) | |
2107 int encode = prefixAndEncode(dst.encoding); | |
2108 emitByte(0xFF); | |
2109 emitByte(0xC0 | encode); | |
2110 } | |
2111 | |
2112 int prefixAndEncode(int regEnc) { | |
2113 return prefixAndEncode(regEnc, false); | |
2114 } | |
2115 | |
2116 int prefixAndEncode(int regEnc, boolean byteinst) { | |
2117 if (regEnc >= 8) { | |
2118 emitByte(Prefix.REXB); | |
2119 regEnc -= 8; | |
2120 } else if (byteinst && regEnc >= 4) { | |
2121 emitByte(Prefix.REX); | |
2122 } | |
2123 return regEnc; | |
2124 } | |
2125 | |
2126 int prefixqAndEncode(int regEnc) { | |
2127 if (regEnc < 8) { | |
2128 emitByte(Prefix.REXW); | |
2129 } else { | |
2130 emitByte(Prefix.REXWB); | |
2131 regEnc -= 8; | |
2132 } | |
2133 return regEnc; | |
2134 } | |
2135 | |
2136 int prefixAndEncode(int dstEnc, int srcEnc) { | |
2137 return prefixAndEncode(dstEnc, srcEnc, false); | |
2138 } | |
2139 | |
2140 int prefixAndEncode(int dstEnc, int srcEnc, boolean byteinst) { | |
2141 if (dstEnc < 8) { | |
2142 if (srcEnc >= 8) { | |
2143 emitByte(Prefix.REXB); | |
2144 srcEnc -= 8; | |
2145 } else if (byteinst && srcEnc >= 4) { | |
2146 emitByte(Prefix.REX); | |
2147 } | |
2148 } else { | |
2149 if (srcEnc < 8) { | |
2150 emitByte(Prefix.REXR); | |
2151 } else { | |
2152 emitByte(Prefix.REXRB); | |
2153 srcEnc -= 8; | |
2154 } | |
2155 dstEnc -= 8; | |
2156 } | |
2157 return dstEnc << 3 | srcEnc; | |
2158 } | |
2159 | |
2160 /** | |
2161 * Creates prefix and the encoding of the lower 6 bits of the ModRM-Byte. It emits an operand prefix. If the given | |
2162 * operands exceed 3 bits, the 4th bit is encoded in the prefix. | |
2163 * | |
2164 * @param regEnc the encoding of the register part of the ModRM-Byte | |
2165 * @param rmEnc the encoding of the r/m part of the ModRM-Byte | |
2166 * @return the lower 6 bits of the ModRM-Byte that should be emitted | |
2167 */ | |
2168 private int prefixqAndEncode(int regEnc, int rmEnc) { | |
2169 if (regEnc < 8) { | |
2170 if (rmEnc < 8) { | |
2171 emitByte(Prefix.REXW); | |
2172 } else { | |
2173 emitByte(Prefix.REXWB); | |
2174 rmEnc -= 8; | |
2175 } | |
2176 } else { | |
2177 if (rmEnc < 8) { | |
2178 emitByte(Prefix.REXWR); | |
2179 } else { | |
2180 emitByte(Prefix.REXWRB); | |
2181 rmEnc -= 8; | |
2182 } | |
2183 regEnc -= 8; | |
2184 } | |
2185 return regEnc << 3 | rmEnc; | |
2186 } | |
2187 | |
2188 private void prefix(CiRegister reg) { | |
2189 if (reg.encoding >= 8) { | |
2190 emitByte(Prefix.REXB); | |
2191 } | |
2192 } | |
2193 | |
2194 private void prefix(CiAddress adr) { | |
2195 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2196 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2197 emitByte(Prefix.REXXB); | |
2198 } else { | |
2199 emitByte(Prefix.REXB); | |
2200 } | |
2201 } else { | |
2202 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2203 emitByte(Prefix.REXX); | |
2204 } | |
2205 } | |
2206 } | |
2207 | |
2208 private void prefixq(CiAddress adr) { | |
2209 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2210 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2211 emitByte(Prefix.REXWXB); | |
2212 } else { | |
2213 emitByte(Prefix.REXWB); | |
2214 } | |
2215 } else { | |
2216 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2217 emitByte(Prefix.REXWX); | |
2218 } else { | |
2219 emitByte(Prefix.REXW); | |
2220 } | |
2221 } | |
2222 } | |
2223 | |
2224 private void prefix(CiAddress adr, CiRegister reg) { | |
2225 if (reg.encoding < 8) { | |
2226 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2227 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2228 emitByte(Prefix.REXXB); | |
2229 } else { | |
2230 emitByte(Prefix.REXB); | |
2231 } | |
2232 } else { | |
2233 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2234 emitByte(Prefix.REXX); | |
2235 } else if (reg.encoding >= 4) { | |
2236 emitByte(Prefix.REX); | |
2237 } | |
2238 } | |
2239 } else { | |
2240 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2241 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2242 emitByte(Prefix.REXRXB); | |
2243 } else { | |
2244 emitByte(Prefix.REXRB); | |
2245 } | |
2246 } else { | |
2247 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2248 emitByte(Prefix.REXRX); | |
2249 } else { | |
2250 emitByte(Prefix.REXR); | |
2251 } | |
2252 } | |
2253 } | |
2254 } | |
2255 | |
2256 private void prefixq(CiAddress adr, CiRegister src) { | |
2257 if (src.encoding < 8) { | |
2258 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2259 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2260 emitByte(Prefix.REXWXB); | |
2261 } else { | |
2262 emitByte(Prefix.REXWB); | |
2263 } | |
2264 } else { | |
2265 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2266 emitByte(Prefix.REXWX); | |
2267 } else { | |
2268 emitByte(Prefix.REXW); | |
2269 } | |
2270 } | |
2271 } else { | |
2272 if (adr.base().encoding >= MinEncodingNeedsRex) { | |
2273 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2274 emitByte(Prefix.REXWRXB); | |
2275 } else { | |
2276 emitByte(Prefix.REXWRB); | |
2277 } | |
2278 } else { | |
2279 if (adr.index().encoding >= MinEncodingNeedsRex) { | |
2280 emitByte(Prefix.REXWRX); | |
2281 } else { | |
2282 emitByte(Prefix.REXWR); | |
2283 } | |
2284 } | |
2285 } | |
2286 } | |
2287 | |
2288 public final void addq(CiAddress dst, int imm32) { | |
2289 prefixq(dst); | |
2290 emitArithOperand(0x81, rax, dst, imm32); | |
2291 } | |
2292 | |
2293 public final void addq(CiAddress dst, CiRegister src) { | |
2294 prefixq(dst, src); | |
2295 emitByte(0x01); | |
2296 emitOperandHelper(src, dst); | |
2297 } | |
2298 | |
2299 public final void addq(CiRegister dst, int imm32) { | |
2300 prefixqAndEncode(dst.encoding); | |
2301 emitArith(0x81, 0xC0, dst, imm32); | |
2302 } | |
2303 | |
2304 public final void addq(CiRegister dst, CiAddress src) { | |
2305 prefixq(src, dst); | |
2306 emitByte(0x03); | |
2307 emitOperandHelper(dst, src); | |
2308 } | |
2309 | |
2310 public final void addq(CiRegister dst, CiRegister src) { | |
2311 prefixqAndEncode(dst.encoding, src.encoding); | |
2312 emitArith(0x03, 0xC0, dst, src); | |
2313 } | |
2314 | |
2315 public final void andq(CiRegister dst, int imm32) { | |
2316 prefixqAndEncode(dst.encoding); | |
2317 emitArith(0x81, 0xE0, dst, imm32); | |
2318 } | |
2319 | |
2320 public final void andq(CiRegister dst, CiAddress src) { | |
2321 prefixq(src, dst); | |
2322 emitByte(0x23); | |
2323 emitOperandHelper(dst, src); | |
2324 } | |
2325 | |
2326 public final void andq(CiRegister dst, CiRegister src) { | |
2327 prefixqAndEncode(dst.encoding, src.encoding); | |
2328 emitArith(0x23, 0xC0, dst, src); | |
2329 } | |
2330 | |
2331 public final void bswapq(CiRegister reg) { | |
2332 int encode = prefixqAndEncode(reg.encoding); | |
2333 emitByte(0x0F); | |
2334 emitByte(0xC8 | encode); | |
2335 } | |
2336 | |
2337 public final void cdqq() { | |
2338 emitByte(Prefix.REXW); | |
2339 emitByte(0x99); | |
2340 } | |
2341 | |
2342 public final void cmovq(ConditionFlag cc, CiRegister dst, CiRegister src) { | |
2343 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2344 emitByte(0x0F); | |
2345 emitByte(0x40 | cc.value); | |
2346 emitByte(0xC0 | encode); | |
2347 } | |
2348 | |
2349 public final void cmovq(ConditionFlag cc, CiRegister dst, CiAddress src) { | |
2350 prefixq(src, dst); | |
2351 emitByte(0x0F); | |
2352 emitByte(0x40 | cc.value); | |
2353 emitOperandHelper(dst, src); | |
2354 } | |
2355 | |
2356 public final void cmpq(CiAddress dst, int imm32) { | |
2357 prefixq(dst); | |
2358 emitByte(0x81); | |
2359 emitOperandHelper(rdi, dst); | |
2360 emitInt(imm32); | |
2361 } | |
2362 | |
2363 public final void cmpq(CiRegister dst, int imm32) { | |
2364 prefixqAndEncode(dst.encoding); | |
2365 emitArith(0x81, 0xF8, dst, imm32); | |
2366 } | |
2367 | |
2368 public final void cmpq(CiAddress dst, CiRegister src) { | |
2369 prefixq(dst, src); | |
2370 emitByte(0x3B); | |
2371 emitOperandHelper(src, dst); | |
2372 } | |
2373 | |
2374 public final void cmpq(CiRegister dst, CiRegister src) { | |
2375 prefixqAndEncode(dst.encoding, src.encoding); | |
2376 emitArith(0x3B, 0xC0, dst, src); | |
2377 } | |
2378 | |
2379 public final void cmpq(CiRegister dst, CiAddress src) { | |
2380 prefixq(src, dst); | |
2381 emitByte(0x3B); | |
2382 emitOperandHelper(dst, src); | |
2383 } | |
2384 | |
2385 public final void cmpxchgq(CiRegister reg, CiAddress adr) { | |
2386 prefixq(adr, reg); | |
2387 emitByte(0x0F); | |
2388 emitByte(0xB1); | |
2389 emitOperandHelper(reg, adr); | |
2390 } | |
2391 | |
2392 public final void cvtsi2sdq(CiRegister dst, CiRegister src) { | |
2393 assert dst.isFpu(); | |
2394 emitByte(0xF2); | |
2395 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2396 emitByte(0x0F); | |
2397 emitByte(0x2A); | |
2398 emitByte(0xC0 | encode); | |
2399 } | |
2400 | |
2401 public final void cvtsi2ssq(CiRegister dst, CiRegister src) { | |
2402 assert dst.isFpu(); | |
2403 emitByte(0xF3); | |
2404 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2405 emitByte(0x0F); | |
2406 emitByte(0x2A); | |
2407 emitByte(0xC0 | encode); | |
2408 } | |
2409 | |
2410 public final void cvttsd2siq(CiRegister dst, CiRegister src) { | |
2411 assert src.isFpu(); | |
2412 emitByte(0xF2); | |
2413 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2414 emitByte(0x0F); | |
2415 emitByte(0x2C); | |
2416 emitByte(0xC0 | encode); | |
2417 } | |
2418 | |
2419 public final void cvttss2siq(CiRegister dst, CiRegister src) { | |
2420 assert src.isFpu(); | |
2421 emitByte(0xF3); | |
2422 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2423 emitByte(0x0F); | |
2424 emitByte(0x2C); | |
2425 emitByte(0xC0 | encode); | |
2426 } | |
2427 | |
2428 public final void decq(CiRegister dst) { | |
2429 // Don't use it directly. Use Macrodecrementq() instead. | |
2430 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) | |
2431 int encode = prefixqAndEncode(dst.encoding); | |
2432 emitByte(0xFF); | |
2433 emitByte(0xC8 | encode); | |
2434 } | |
2435 | |
2436 public final void decq(CiAddress dst) { | |
2437 // Don't use it directly. Use Macrodecrementq() instead. | |
2438 prefixq(dst); | |
2439 emitByte(0xFF); | |
2440 emitOperandHelper(rcx, dst); | |
2441 } | |
2442 | |
2443 public final void divq(CiRegister src) { | |
2444 int encode = prefixqAndEncode(src.encoding); | |
2445 emitByte(0xF7); | |
2446 emitByte(0xF0 | encode); | |
2447 } | |
2448 | |
2449 public final void idivq(CiRegister src) { | |
2450 int encode = prefixqAndEncode(src.encoding); | |
2451 emitByte(0xF7); | |
2452 emitByte(0xF8 | encode); | |
2453 } | |
2454 | |
2455 public final void imulq(CiRegister dst, CiRegister src) { | |
2456 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2457 emitByte(0x0F); | |
2458 emitByte(0xAF); | |
2459 emitByte(0xC0 | encode); | |
2460 } | |
2461 | |
2462 public final void imulq(CiRegister dst, CiRegister src, int value) { | |
2463 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2464 if (Util.isByte(value)) { | |
2465 emitByte(0x6B); | |
2466 emitByte(0xC0 | encode); | |
2467 emitByte(value); | |
2468 } else { | |
2469 emitByte(0x69); | |
2470 emitByte(0xC0 | encode); | |
2471 emitInt(value); | |
2472 } | |
2473 } | |
2474 | |
2475 public final void incq(CiRegister dst) { | |
2476 // Don't use it directly. Use Macroincrementq() instead. | |
2477 // Use two-byte form (one-byte from is a REX prefix in 64-bit mode) | |
2478 int encode = prefixqAndEncode(dst.encoding); | |
2479 emitByte(0xFF); | |
2480 emitByte(0xC0 | encode); | |
2481 } | |
2482 | |
2483 public final void incq(CiAddress dst) { | |
2484 // Don't use it directly. Use Macroincrementq() instead. | |
2485 prefixq(dst); | |
2486 emitByte(0xFF); | |
2487 emitOperandHelper(rax, dst); | |
2488 } | |
2489 | |
2490 public final void movq(CiRegister dst, long imm64) { | |
2491 int encode = prefixqAndEncode(dst.encoding); | |
2492 emitByte(0xB8 | encode); | |
2493 emitLong(imm64); | |
2494 } | |
2495 | |
2496 public final void movdq(CiRegister dst, CiRegister src) { | |
2497 | |
2498 // table D-1 says MMX/SSE2 | |
2499 emitByte(0x66); | |
2500 | |
2501 if (dst.isFpu()) { | |
2502 assert dst.isFpu(); | |
2503 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2504 emitByte(0x0F); | |
2505 emitByte(0x6E); | |
2506 emitByte(0xC0 | encode); | |
2507 } else if (src.isFpu()) { | |
2508 | |
2509 // swap src/dst to get correct prefix | |
2510 int encode = prefixqAndEncode(src.encoding, dst.encoding); | |
2511 emitByte(0x0F); | |
2512 emitByte(0x7E); | |
2513 emitByte(0xC0 | encode); | |
2514 } else { | |
2515 Util.shouldNotReachHere(); | |
2516 } | |
2517 } | |
2518 | |
2519 public final void movsbq(CiRegister dst, CiAddress src) { | |
2520 prefixq(src, dst); | |
2521 emitByte(0x0F); | |
2522 emitByte(0xBE); | |
2523 emitOperandHelper(dst, src); | |
2524 } | |
2525 | |
2526 public final void movsbq(CiRegister dst, CiRegister src) { | |
2527 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2528 emitByte(0x0F); | |
2529 emitByte(0xBE); | |
2530 emitByte(0xC0 | encode); | |
2531 } | |
2532 | |
2533 public final void movslq(CiRegister dst, int imm32) { | |
2534 // dbx shows movslq(X86.rcx, 3) as movq $0x0000000049000000,(%X86.rbx) | |
2535 // and movslq(X86.r8, 3); as movl $0x0000000048000000,(%X86.rbx) | |
2536 // as a result we shouldn't use until tested at runtime... | |
2537 Util.shouldNotReachHere(); | |
2538 | |
2539 int encode = prefixqAndEncode(dst.encoding); | |
2540 emitByte(0xC7 | encode); | |
2541 emitInt(imm32); | |
2542 } | |
2543 | |
2544 public final void movslq(CiAddress dst, int imm32) { | |
2545 prefixq(dst); | |
2546 emitByte(0xC7); | |
2547 emitOperandHelper(rax, dst); | |
2548 emitInt(imm32); | |
2549 } | |
2550 | |
2551 public final void movslq(CiRegister dst, CiAddress src) { | |
2552 prefixq(src, dst); | |
2553 emitByte(0x63); | |
2554 emitOperandHelper(dst, src); | |
2555 } | |
2556 | |
2557 public final void movslq(CiRegister dst, CiRegister src) { | |
2558 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2559 emitByte(0x63); | |
2560 emitByte(0xC0 | encode); | |
2561 } | |
2562 | |
2563 public final void movswq(CiRegister dst, CiAddress src) { | |
2564 prefixq(src, dst); | |
2565 emitByte(0x0F); | |
2566 emitByte(0xBF); | |
2567 emitOperandHelper(dst, src); | |
2568 } | |
2569 | |
2570 public final void movswq(CiRegister dst, CiRegister src) { | |
2571 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2572 emitByte(0x0F); | |
2573 emitByte(0xBF); | |
2574 emitByte(0xC0 | encode); | |
2575 } | |
2576 | |
2577 public final void movzbq(CiRegister dst, CiAddress src) { | |
2578 prefixq(src, dst); | |
2579 emitByte(0x0F); | |
2580 emitByte(0xB6); | |
2581 emitOperandHelper(dst, src); | |
2582 } | |
2583 | |
2584 public final void movzbq(CiRegister dst, CiRegister src) { | |
2585 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2586 emitByte(0x0F); | |
2587 emitByte(0xB6); | |
2588 emitByte(0xC0 | encode); | |
2589 } | |
2590 | |
2591 public final void movzwq(CiRegister dst, CiAddress src) { | |
2592 prefixq(src, dst); | |
2593 emitByte(0x0F); | |
2594 emitByte(0xB7); | |
2595 emitOperandHelper(dst, src); | |
2596 } | |
2597 | |
2598 public final void movzwq(CiRegister dst, CiRegister src) { | |
2599 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2600 emitByte(0x0F); | |
2601 emitByte(0xB7); | |
2602 emitByte(0xC0 | encode); | |
2603 } | |
2604 | |
2605 public final void negq(CiRegister dst) { | |
2606 int encode = prefixqAndEncode(dst.encoding); | |
2607 emitByte(0xF7); | |
2608 emitByte(0xD8 | encode); | |
2609 } | |
2610 | |
2611 public final void notq(CiRegister dst) { | |
2612 int encode = prefixqAndEncode(dst.encoding); | |
2613 emitByte(0xF7); | |
2614 emitByte(0xD0 | encode); | |
2615 } | |
2616 | |
2617 public final void orq(CiAddress dst, int imm32) { | |
2618 prefixq(dst); | |
2619 emitByte(0x81); | |
2620 emitOperandHelper(rcx, dst); | |
2621 emitInt(imm32); | |
2622 } | |
2623 | |
2624 public final void orq(CiRegister dst, int imm32) { | |
2625 prefixqAndEncode(dst.encoding); | |
2626 emitArith(0x81, 0xC8, dst, imm32); | |
2627 } | |
2628 | |
2629 public final void orq(CiRegister dst, CiAddress src) { | |
2630 prefixq(src, dst); | |
2631 emitByte(0x0B); | |
2632 emitOperandHelper(dst, src); | |
2633 } | |
2634 | |
2635 public final void orq(CiRegister dst, CiRegister src) { | |
2636 prefixqAndEncode(dst.encoding, src.encoding); | |
2637 emitArith(0x0B, 0xC0, dst, src); | |
2638 } | |
2639 | |
2640 public final void popq(CiAddress dst) { | |
2641 prefixq(dst); | |
2642 emitByte(0x8F); | |
2643 emitOperandHelper(rax, dst); | |
2644 } | |
2645 | |
2646 public final void pushq(CiAddress src) { | |
2647 prefixq(src); | |
2648 emitByte(0xFF); | |
2649 emitOperandHelper(rsi, src); | |
2650 } | |
2651 | |
2652 public final void rclq(CiRegister dst, int imm8) { | |
2653 assert Util.isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2654 int encode = prefixqAndEncode(dst.encoding); | |
2655 if (imm8 == 1) { | |
2656 emitByte(0xD1); | |
2657 emitByte(0xD0 | encode); | |
2658 } else { | |
2659 emitByte(0xC1); | |
2660 emitByte(0xD0 | encode); | |
2661 emitByte(imm8); | |
2662 } | |
2663 } | |
2664 | |
2665 public final void sarq(CiRegister dst, int imm8) { | |
2666 assert Util.isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2667 int encode = prefixqAndEncode(dst.encoding); | |
2668 if (imm8 == 1) { | |
2669 emitByte(0xD1); | |
2670 emitByte(0xF8 | encode); | |
2671 } else { | |
2672 emitByte(0xC1); | |
2673 emitByte(0xF8 | encode); | |
2674 emitByte(imm8); | |
2675 } | |
2676 } | |
2677 | |
2678 public final void sarq(CiRegister dst) { | |
2679 int encode = prefixqAndEncode(dst.encoding); | |
2680 emitByte(0xD3); | |
2681 emitByte(0xF8 | encode); | |
2682 } | |
2683 | |
2684 public final void shlq(CiRegister dst, int imm8) { | |
2685 assert Util.isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2686 int encode = prefixqAndEncode(dst.encoding); | |
2687 if (imm8 == 1) { | |
2688 emitByte(0xD1); | |
2689 emitByte(0xE0 | encode); | |
2690 } else { | |
2691 emitByte(0xC1); | |
2692 emitByte(0xE0 | encode); | |
2693 emitByte(imm8); | |
2694 } | |
2695 } | |
2696 | |
2697 public final void shlq(CiRegister dst) { | |
2698 int encode = prefixqAndEncode(dst.encoding); | |
2699 emitByte(0xD3); | |
2700 emitByte(0xE0 | encode); | |
2701 } | |
2702 | |
2703 public final void shrq(CiRegister dst, int imm8) { | |
2704 assert Util.isShiftCount(imm8 >> 1) : "illegal shift count"; | |
2705 int encode = prefixqAndEncode(dst.encoding); | |
2706 emitByte(0xC1); | |
2707 emitByte(0xE8 | encode); | |
2708 emitByte(imm8); | |
2709 } | |
2710 | |
2711 public final void shrq(CiRegister dst) { | |
2712 int encode = prefixqAndEncode(dst.encoding); | |
2713 emitByte(0xD3); | |
2714 emitByte(0xE8 | encode); | |
2715 } | |
2716 | |
2717 public final void sqrtsd(CiRegister dst, CiAddress src) { | |
2718 assert dst.isFpu(); | |
2719 | |
2720 emitByte(0xF2); | |
2721 prefix(src, dst); | |
2722 emitByte(0x0F); | |
2723 emitByte(0x51); | |
2724 emitOperandHelper(dst, src); | |
2725 } | |
2726 | |
2727 public final void subq(CiAddress dst, int imm32) { | |
2728 prefixq(dst); | |
2729 if (Util.isByte(imm32)) { | |
2730 emitByte(0x83); | |
2731 emitOperandHelper(rbp, dst); | |
2732 emitByte(imm32 & 0xFF); | |
2733 } else { | |
2734 emitByte(0x81); | |
2735 emitOperandHelper(rbp, dst); | |
2736 emitInt(imm32); | |
2737 } | |
2738 } | |
2739 | |
2740 public final void subq(CiRegister dst, int imm32) { | |
2741 prefixqAndEncode(dst.encoding); | |
2742 emitArith(0x81, 0xE8, dst, imm32); | |
2743 } | |
2744 | |
2745 public final void subq(CiAddress dst, CiRegister src) { | |
2746 prefixq(dst, src); | |
2747 emitByte(0x29); | |
2748 emitOperandHelper(src, dst); | |
2749 } | |
2750 | |
2751 public final void subq(CiRegister dst, CiAddress src) { | |
2752 prefixq(src, dst); | |
2753 emitByte(0x2B); | |
2754 emitOperandHelper(dst, src); | |
2755 } | |
2756 | |
2757 public final void subq(CiRegister dst, CiRegister src) { | |
2758 prefixqAndEncode(dst.encoding, src.encoding); | |
2759 emitArith(0x2B, 0xC0, dst, src); | |
2760 } | |
2761 | |
2762 public final void testq(CiRegister dst, int imm32) { | |
2763 // not using emitArith because test | |
2764 // doesn't support sign-extension of | |
2765 // 8bit operands | |
2766 int encode = dst.encoding; | |
2767 if (encode == 0) { | |
2768 emitByte(Prefix.REXW); | |
2769 emitByte(0xA9); | |
2770 } else { | |
2771 encode = prefixqAndEncode(encode); | |
2772 emitByte(0xF7); | |
2773 emitByte(0xC0 | encode); | |
2774 } | |
2775 emitInt(imm32); | |
2776 } | |
2777 | |
2778 public final void testq(CiRegister dst, CiRegister src) { | |
2779 prefixqAndEncode(dst.encoding, src.encoding); | |
2780 emitArith(0x85, 0xC0, dst, src); | |
2781 } | |
2782 | |
2783 public final void xaddq(CiAddress dst, CiRegister src) { | |
2784 prefixq(dst, src); | |
2785 emitByte(0x0F); | |
2786 emitByte(0xC1); | |
2787 emitOperandHelper(src, dst); | |
2788 } | |
2789 | |
2790 public final void xchgq(CiRegister dst, CiAddress src) { | |
2791 prefixq(src, dst); | |
2792 emitByte(0x87); | |
2793 emitOperandHelper(dst, src); | |
2794 } | |
2795 | |
2796 public final void xchgq(CiRegister dst, CiRegister src) { | |
2797 int encode = prefixqAndEncode(dst.encoding, src.encoding); | |
2798 emitByte(0x87); | |
2799 emitByte(0xc0 | encode); | |
2800 } | |
2801 | |
2802 public final void xorq(CiRegister dst, CiRegister src) { | |
2803 prefixqAndEncode(dst.encoding, src.encoding); | |
2804 emitArith(0x33, 0xC0, dst, src); | |
2805 } | |
2806 | |
2807 public final void xorq(CiRegister dst, CiAddress src) { | |
2808 | |
2809 prefixq(src, dst); | |
2810 emitByte(0x33); | |
2811 emitOperandHelper(dst, src); | |
2812 | |
2813 } | |
2814 | |
2815 public final void membar(int barriers) { | |
2816 if (target.isMP) { | |
2817 // We only have to handle StoreLoad | |
2818 if ((barriers & STORE_LOAD) != 0) { | |
2819 // All usable chips support "locked" instructions which suffice | |
2820 // as barriers, and are much faster than the alternative of | |
2821 // using cpuid instruction. We use here a locked add [rsp],0. | |
2822 // This is conveniently otherwise a no-op except for blowing | |
2823 // flags. | |
2824 // Any change to this code may need to revisit other places in | |
2825 // the code where this idiom is used, in particular the | |
2826 // orderAccess code. | |
2827 lock(); | |
2828 addl(new CiAddress(CiKind.Word, RSP, 0), 0); // Assert the lock# signal here | |
2829 } | |
2830 } | |
2831 } | |
2832 | |
2833 @Override | |
2834 public final void patchJumpTarget(int branch, int branchTarget) { | |
2835 int op = codeBuffer.getByte(branch); | |
2836 assert op == 0xE8 // call | |
2837 || op == 0x00 // jump table entry | |
2838 || op == 0xE9 // jmp | |
2839 || op == 0xEB // short jmp | |
2840 || (op & 0xF0) == 0x70 // short jcc | |
2841 || op == 0x0F && (codeBuffer.getByte(branch + 1) & 0xF0) == 0x80 // jcc | |
2842 : "Invalid opcode at patch point branch=" + branch + ", branchTarget=" + branchTarget + ", op=" + op; | |
2843 | |
2844 if (op == 0x00) { | |
2845 int offsetToJumpTableBase = codeBuffer.getShort(branch + 1); | |
2846 int jumpTableBase = branch - offsetToJumpTableBase; | |
2847 int imm32 = branchTarget - jumpTableBase; | |
2848 codeBuffer.emitInt(imm32, branch); | |
2849 } else if (op == 0xEB || (op & 0xF0) == 0x70) { | |
2850 | |
2851 // short offset operators (jmp and jcc) | |
2852 int imm8 = branchTarget - (branch + 2); | |
2853 codeBuffer.emitByte(imm8, branch + 1); | |
2854 | |
2855 } else { | |
2856 | |
2857 int off = 1; | |
2858 if (op == 0x0F) { | |
2859 off = 2; | |
2860 } | |
2861 | |
2862 int imm32 = branchTarget - (branch + 4 + off); | |
2863 codeBuffer.emitInt(imm32, branch + off); | |
2864 } | |
2865 } | |
2866 | |
2867 @Override | |
2868 public void nullCheck(CiRegister r) { | |
2869 testl(AMD64.rax, new CiAddress(CiKind.Word, r.asValue(Word), 0)); | |
2870 } | |
2871 | |
2872 @Override | |
2873 public void align(int modulus) { | |
2874 if (codeBuffer.position() % modulus != 0) { | |
2875 nop(modulus - (codeBuffer.position() % modulus)); | |
2876 } | |
2877 } | |
2878 } |