Mercurial > hg > truffle
comparison src/cpu/x86/vm/assembler_x86.cpp @ 4947:fd8114661503
7125136: SIGILL on linux amd64 in gc/ArrayJuggle/Juggle29
Summary: For C2 moved saving EBP after ESP adjustment. For C1 generated 5 byte nop instruction first if needed.
Reviewed-by: never, twisti, azeemj
author | kvn |
---|---|
date | Wed, 15 Feb 2012 21:37:49 -0800 |
parents | 1cb50d7a9d95 |
children | 33df1aeaebbf fd09f2d8283e |
comparison
equal
deleted
inserted
replaced
4946:69333a2fbae2 | 4947:fd8114661503 |
---|---|
232 } else { | 232 } else { |
233 emit_byte(op1); | 233 emit_byte(op1); |
234 emit_byte(op2 | encode(dst)); | 234 emit_byte(op2 | encode(dst)); |
235 emit_long(imm32); | 235 emit_long(imm32); |
236 } | 236 } |
237 } | |
238 | |
239 // Force generation of a 4 byte immediate value even if it fits into 8bit | |
240 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) { | |
241 assert(isByte(op1) && isByte(op2), "wrong opcode"); | |
242 assert((op1 & 0x01) == 1, "should be 32bit operation"); | |
243 assert((op1 & 0x02) == 0, "sign-extension bit should not be set"); | |
244 emit_byte(op1); | |
245 emit_byte(op2 | encode(dst)); | |
246 emit_long(imm32); | |
237 } | 247 } |
238 | 248 |
239 // immediate-to-memory forms | 249 // immediate-to-memory forms |
240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { | 250 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { |
241 assert((op1 & 0x01) == 1, "should be 32bit operation"); | 251 assert((op1 & 0x01) == 1, "should be 32bit operation"); |
937 (void) prefix_and_encode(dst->encoding(), src->encoding()); | 947 (void) prefix_and_encode(dst->encoding(), src->encoding()); |
938 emit_arith(0x03, 0xC0, dst, src); | 948 emit_arith(0x03, 0xC0, dst, src); |
939 } | 949 } |
940 | 950 |
941 void Assembler::addr_nop_4() { | 951 void Assembler::addr_nop_4() { |
952 assert(UseAddressNop, "no CPU support"); | |
942 // 4 bytes: NOP DWORD PTR [EAX+0] | 953 // 4 bytes: NOP DWORD PTR [EAX+0] |
943 emit_byte(0x0F); | 954 emit_byte(0x0F); |
944 emit_byte(0x1F); | 955 emit_byte(0x1F); |
945 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); | 956 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); |
946 emit_byte(0); // 8-bits offset (1 byte) | 957 emit_byte(0); // 8-bits offset (1 byte) |
947 } | 958 } |
948 | 959 |
949 void Assembler::addr_nop_5() { | 960 void Assembler::addr_nop_5() { |
961 assert(UseAddressNop, "no CPU support"); | |
950 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset | 962 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset |
951 emit_byte(0x0F); | 963 emit_byte(0x0F); |
952 emit_byte(0x1F); | 964 emit_byte(0x1F); |
953 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); | 965 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); |
954 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); | 966 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); |
955 emit_byte(0); // 8-bits offset (1 byte) | 967 emit_byte(0); // 8-bits offset (1 byte) |
956 } | 968 } |
957 | 969 |
958 void Assembler::addr_nop_7() { | 970 void Assembler::addr_nop_7() { |
971 assert(UseAddressNop, "no CPU support"); | |
959 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset | 972 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset |
960 emit_byte(0x0F); | 973 emit_byte(0x0F); |
961 emit_byte(0x1F); | 974 emit_byte(0x1F); |
962 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); | 975 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); |
963 emit_long(0); // 32-bits offset (4 bytes) | 976 emit_long(0); // 32-bits offset (4 bytes) |
964 } | 977 } |
965 | 978 |
966 void Assembler::addr_nop_8() { | 979 void Assembler::addr_nop_8() { |
980 assert(UseAddressNop, "no CPU support"); | |
967 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset | 981 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset |
968 emit_byte(0x0F); | 982 emit_byte(0x0F); |
969 emit_byte(0x1F); | 983 emit_byte(0x1F); |
970 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); | 984 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); |
971 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); | 985 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); |
2767 void Assembler::subl(Register dst, int32_t imm32) { | 2781 void Assembler::subl(Register dst, int32_t imm32) { |
2768 prefix(dst); | 2782 prefix(dst); |
2769 emit_arith(0x81, 0xE8, dst, imm32); | 2783 emit_arith(0x81, 0xE8, dst, imm32); |
2770 } | 2784 } |
2771 | 2785 |
2786 // Force generation of a 4 byte immediate value even if it fits into 8bit | |
2787 void Assembler::subl_imm32(Register dst, int32_t imm32) { | |
2788 prefix(dst); | |
2789 emit_arith_imm32(0x81, 0xE8, dst, imm32); | |
2790 } | |
2791 | |
2772 void Assembler::subl(Register dst, Address src) { | 2792 void Assembler::subl(Register dst, Address src) { |
2773 InstructionMark im(this); | 2793 InstructionMark im(this); |
2774 prefix(src, dst); | 2794 prefix(src, dst); |
2775 emit_byte(0x2B); | 2795 emit_byte(0x2B); |
2776 emit_operand(dst, src); | 2796 emit_operand(dst, src); |
4758 void Assembler::subq(Register dst, int32_t imm32) { | 4778 void Assembler::subq(Register dst, int32_t imm32) { |
4759 (void) prefixq_and_encode(dst->encoding()); | 4779 (void) prefixq_and_encode(dst->encoding()); |
4760 emit_arith(0x81, 0xE8, dst, imm32); | 4780 emit_arith(0x81, 0xE8, dst, imm32); |
4761 } | 4781 } |
4762 | 4782 |
4783 // Force generation of a 4 byte immediate value even if it fits into 8bit | |
4784 void Assembler::subq_imm32(Register dst, int32_t imm32) { | |
4785 (void) prefixq_and_encode(dst->encoding()); | |
4786 emit_arith_imm32(0x81, 0xE8, dst, imm32); | |
4787 } | |
4788 | |
4763 void Assembler::subq(Register dst, Address src) { | 4789 void Assembler::subq(Register dst, Address src) { |
4764 InstructionMark im(this); | 4790 InstructionMark im(this); |
4765 prefixq(src, dst); | 4791 prefixq(src, dst); |
4766 emit_byte(0x2B); | 4792 emit_byte(0x2B); |
4767 emit_operand(dst, src); | 4793 emit_operand(dst, src); |
5097 cdql(); | 5123 cdql(); |
5098 } else { | 5124 } else { |
5099 movl(hi, lo); | 5125 movl(hi, lo); |
5100 sarl(hi, 31); | 5126 sarl(hi, 31); |
5101 } | 5127 } |
5102 } | |
5103 | |
5104 void MacroAssembler::fat_nop() { | |
5105 // A 5 byte nop that is safe for patching (see patch_verified_entry) | |
5106 emit_byte(0x26); // es: | |
5107 emit_byte(0x2e); // cs: | |
5108 emit_byte(0x64); // fs: | |
5109 emit_byte(0x65); // gs: | |
5110 emit_byte(0x90); | |
5111 } | 5128 } |
5112 | 5129 |
5113 void MacroAssembler::jC2(Register tmp, Label& L) { | 5130 void MacroAssembler::jC2(Register tmp, Label& L) { |
5114 // set parity bit if FPU flag C2 is set (via rax) | 5131 // set parity bit if FPU flag C2 is set (via rax) |
5115 save_rax(tmp); | 5132 save_rax(tmp); |
5700 if (value == min_jint) { subq(dst, value); return; } | 5717 if (value == min_jint) { subq(dst, value); return; } |
5701 if (value < 0) { incrementq(dst, -value); return; } | 5718 if (value < 0) { incrementq(dst, -value); return; } |
5702 if (value == 0) { ; return; } | 5719 if (value == 0) { ; return; } |
5703 if (value == 1 && UseIncDec) { decq(dst) ; return; } | 5720 if (value == 1 && UseIncDec) { decq(dst) ; return; } |
5704 /* else */ { subq(dst, value) ; return; } | 5721 /* else */ { subq(dst, value) ; return; } |
5705 } | |
5706 | |
5707 void MacroAssembler::fat_nop() { | |
5708 // A 5 byte nop that is safe for patching (see patch_verified_entry) | |
5709 // Recommened sequence from 'Software Optimization Guide for the AMD | |
5710 // Hammer Processor' | |
5711 emit_byte(0x66); | |
5712 emit_byte(0x66); | |
5713 emit_byte(0x90); | |
5714 emit_byte(0x66); | |
5715 emit_byte(0x90); | |
5716 } | 5722 } |
5717 | 5723 |
5718 void MacroAssembler::incrementq(Register reg, int value) { | 5724 void MacroAssembler::incrementq(Register reg, int value) { |
5719 if (value == min_jint) { addq(reg, value); return; } | 5725 if (value == min_jint) { addq(reg, value); return; } |
5720 if (value < 0) { decrementq(reg, -value); return; } | 5726 if (value < 0) { decrementq(reg, -value); return; } |
6764 void MacroAssembler::enter() { | 6770 void MacroAssembler::enter() { |
6765 push(rbp); | 6771 push(rbp); |
6766 mov(rbp, rsp); | 6772 mov(rbp, rsp); |
6767 } | 6773 } |
6768 | 6774 |
6775 // A 5 byte nop that is safe for patching (see patch_verified_entry) | |
6776 void MacroAssembler::fat_nop() { | |
6777 if (UseAddressNop) { | |
6778 addr_nop_5(); | |
6779 } else { | |
6780 emit_byte(0x26); // es: | |
6781 emit_byte(0x2e); // cs: | |
6782 emit_byte(0x64); // fs: | |
6783 emit_byte(0x65); // gs: | |
6784 emit_byte(0x90); | |
6785 } | |
6786 } | |
6787 | |
6769 void MacroAssembler::fcmp(Register tmp) { | 6788 void MacroAssembler::fcmp(Register tmp) { |
6770 fcmp(tmp, 1, true, true); | 6789 fcmp(tmp, 1, true, true); |
6771 } | 6790 } |
6772 | 6791 |
6773 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { | 6792 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { |
7821 } | 7840 } |
7822 } | 7841 } |
7823 | 7842 |
7824 void MacroAssembler::subptr(Register dst, int32_t imm32) { | 7843 void MacroAssembler::subptr(Register dst, int32_t imm32) { |
7825 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); | 7844 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); |
7845 } | |
7846 | |
7847 // Force generation of a 4 byte immediate value even if it fits into 8bit | |
7848 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { | |
7849 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); | |
7826 } | 7850 } |
7827 | 7851 |
7828 void MacroAssembler::subptr(Register dst, Register src) { | 7852 void MacroAssembler::subptr(Register dst, Register src) { |
7829 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); | 7853 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); |
7830 } | 7854 } |
9290 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); | 9314 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); |
9291 } | 9315 } |
9292 } | 9316 } |
9293 #endif // _LP64 | 9317 #endif // _LP64 |
9294 | 9318 |
9319 | |
9320 // C2 compiled method's prolog code. | |
9321 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { | |
9322 | |
9323 // WARNING: Initial instruction MUST be 5 bytes or longer so that | |
9324 // NativeJump::patch_verified_entry will be able to patch out the entry | |
9325 // code safely. The push to verify stack depth is ok at 5 bytes, | |
9326 // the frame allocation can be either 3 or 6 bytes. So if we don't do | |
9327 // stack bang then we must use the 6 byte frame allocation even if | |
9328 // we have no frame. :-( | |
9329 | |
9330 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); | |
9331 // Remove word for return addr | |
9332 framesize -= wordSize; | |
9333 | |
9334 // Calls to C2R adapters often do not accept exceptional returns. | |
9335 // We require that their callers must bang for them. But be careful, because | |
9336 // some VM calls (such as call site linkage) can use several kilobytes of | |
9337 // stack. But the stack safety zone should account for that. | |
9338 // See bugs 4446381, 4468289, 4497237. | |
9339 if (stack_bang) { | |
9340 generate_stack_overflow_check(framesize); | |
9341 | |
9342 // We always push rbp, so that on return to interpreter rbp, will be | |
9343 // restored correctly and we can correct the stack. | |
9344 push(rbp); | |
9345 // Remove word for ebp | |
9346 framesize -= wordSize; | |
9347 | |
9348 // Create frame | |
9349 if (framesize) { | |
9350 subptr(rsp, framesize); | |
9351 } | |
9352 } else { | |
9353 // Create frame (force generation of a 4 byte immediate value) | |
9354 subptr_imm32(rsp, framesize); | |
9355 | |
9356 // Save RBP register now. | |
9357 framesize -= wordSize; | |
9358 movptr(Address(rsp, framesize), rbp); | |
9359 } | |
9360 | |
9361 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth | |
9362 framesize -= wordSize; | |
9363 movptr(Address(rsp, framesize), (int32_t)0xbadb100d); | |
9364 } | |
9365 | |
9366 #ifndef _LP64 | |
9367 // If method sets FPU control word do it now | |
9368 if (fp_mode_24b) { | |
9369 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); | |
9370 } | |
9371 if (UseSSE >= 2 && VerifyFPU) { | |
9372 verify_FPU(0, "FPU stack must be clean on entry"); | |
9373 } | |
9374 #endif | |
9375 | |
9376 #ifdef ASSERT | |
9377 if (VerifyStackAtCalls) { | |
9378 Label L; | |
9379 push(rax); | |
9380 mov(rax, rsp); | |
9381 andptr(rax, StackAlignmentInBytes-1); | |
9382 cmpptr(rax, StackAlignmentInBytes-wordSize); | |
9383 pop(rax); | |
9384 jcc(Assembler::equal, L); | |
9385 stop("Stack is not properly aligned!"); | |
9386 bind(L); | |
9387 } | |
9388 #endif | |
9389 | |
9390 } | |
9391 | |
9392 | |
9295 // IndexOf for constant substrings with size >= 8 chars | 9393 // IndexOf for constant substrings with size >= 8 chars |
9296 // which don't need to be loaded through stack. | 9394 // which don't need to be loaded through stack. |
9297 void MacroAssembler::string_indexofC8(Register str1, Register str2, | 9395 void MacroAssembler::string_indexofC8(Register str1, Register str2, |
9298 Register cnt1, Register cnt2, | 9396 Register cnt1, Register cnt2, |
9299 int int_cnt2, Register result, | 9397 int int_cnt2, Register result, |