comparison src/cpu/x86/vm/assembler_x86.cpp @ 4947:fd8114661503

7125136: SIGILL on linux amd64 in gc/ArrayJuggle/Juggle29 Summary: For C2 moved saving EBP after ESP adjustment. For C1 generated 5 byte nop instruction first if needed. Reviewed-by: never, twisti, azeemj
author kvn
date Wed, 15 Feb 2012 21:37:49 -0800
parents 1cb50d7a9d95
children 33df1aeaebbf fd09f2d8283e
comparison
equal deleted inserted replaced
4946:69333a2fbae2 4947:fd8114661503
232 } else { 232 } else {
233 emit_byte(op1); 233 emit_byte(op1);
234 emit_byte(op2 | encode(dst)); 234 emit_byte(op2 | encode(dst));
235 emit_long(imm32); 235 emit_long(imm32);
236 } 236 }
237 }
238
239 // Force generation of a 4 byte immediate value even if it fits into 8bit
240 void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
241 assert(isByte(op1) && isByte(op2), "wrong opcode");
242 assert((op1 & 0x01) == 1, "should be 32bit operation");
243 assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
244 emit_byte(op1);
245 emit_byte(op2 | encode(dst));
246 emit_long(imm32);
237 } 247 }
238 248
239 // immediate-to-memory forms 249 // immediate-to-memory forms
240 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) { 250 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
241 assert((op1 & 0x01) == 1, "should be 32bit operation"); 251 assert((op1 & 0x01) == 1, "should be 32bit operation");
937 (void) prefix_and_encode(dst->encoding(), src->encoding()); 947 (void) prefix_and_encode(dst->encoding(), src->encoding());
938 emit_arith(0x03, 0xC0, dst, src); 948 emit_arith(0x03, 0xC0, dst, src);
939 } 949 }
940 950
941 void Assembler::addr_nop_4() { 951 void Assembler::addr_nop_4() {
952 assert(UseAddressNop, "no CPU support");
942 // 4 bytes: NOP DWORD PTR [EAX+0] 953 // 4 bytes: NOP DWORD PTR [EAX+0]
943 emit_byte(0x0F); 954 emit_byte(0x0F);
944 emit_byte(0x1F); 955 emit_byte(0x1F);
945 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc); 956 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
946 emit_byte(0); // 8-bits offset (1 byte) 957 emit_byte(0); // 8-bits offset (1 byte)
947 } 958 }
948 959
949 void Assembler::addr_nop_5() { 960 void Assembler::addr_nop_5() {
961 assert(UseAddressNop, "no CPU support");
950 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset 962 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
951 emit_byte(0x0F); 963 emit_byte(0x0F);
952 emit_byte(0x1F); 964 emit_byte(0x1F);
953 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4); 965 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
954 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 966 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
955 emit_byte(0); // 8-bits offset (1 byte) 967 emit_byte(0); // 8-bits offset (1 byte)
956 } 968 }
957 969
958 void Assembler::addr_nop_7() { 970 void Assembler::addr_nop_7() {
971 assert(UseAddressNop, "no CPU support");
959 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset 972 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
960 emit_byte(0x0F); 973 emit_byte(0x0F);
961 emit_byte(0x1F); 974 emit_byte(0x1F);
962 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc); 975 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
963 emit_long(0); // 32-bits offset (4 bytes) 976 emit_long(0); // 32-bits offset (4 bytes)
964 } 977 }
965 978
966 void Assembler::addr_nop_8() { 979 void Assembler::addr_nop_8() {
980 assert(UseAddressNop, "no CPU support");
967 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset 981 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
968 emit_byte(0x0F); 982 emit_byte(0x0F);
969 emit_byte(0x1F); 983 emit_byte(0x1F);
970 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4); 984 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
971 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc); 985 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
2767 void Assembler::subl(Register dst, int32_t imm32) { 2781 void Assembler::subl(Register dst, int32_t imm32) {
2768 prefix(dst); 2782 prefix(dst);
2769 emit_arith(0x81, 0xE8, dst, imm32); 2783 emit_arith(0x81, 0xE8, dst, imm32);
2770 } 2784 }
2771 2785
2786 // Force generation of a 4 byte immediate value even if it fits into 8bit
2787 void Assembler::subl_imm32(Register dst, int32_t imm32) {
2788 prefix(dst);
2789 emit_arith_imm32(0x81, 0xE8, dst, imm32);
2790 }
2791
2772 void Assembler::subl(Register dst, Address src) { 2792 void Assembler::subl(Register dst, Address src) {
2773 InstructionMark im(this); 2793 InstructionMark im(this);
2774 prefix(src, dst); 2794 prefix(src, dst);
2775 emit_byte(0x2B); 2795 emit_byte(0x2B);
2776 emit_operand(dst, src); 2796 emit_operand(dst, src);
4758 void Assembler::subq(Register dst, int32_t imm32) { 4778 void Assembler::subq(Register dst, int32_t imm32) {
4759 (void) prefixq_and_encode(dst->encoding()); 4779 (void) prefixq_and_encode(dst->encoding());
4760 emit_arith(0x81, 0xE8, dst, imm32); 4780 emit_arith(0x81, 0xE8, dst, imm32);
4761 } 4781 }
4762 4782
4783 // Force generation of a 4 byte immediate value even if it fits into 8bit
4784 void Assembler::subq_imm32(Register dst, int32_t imm32) {
4785 (void) prefixq_and_encode(dst->encoding());
4786 emit_arith_imm32(0x81, 0xE8, dst, imm32);
4787 }
4788
4763 void Assembler::subq(Register dst, Address src) { 4789 void Assembler::subq(Register dst, Address src) {
4764 InstructionMark im(this); 4790 InstructionMark im(this);
4765 prefixq(src, dst); 4791 prefixq(src, dst);
4766 emit_byte(0x2B); 4792 emit_byte(0x2B);
4767 emit_operand(dst, src); 4793 emit_operand(dst, src);
5097 cdql(); 5123 cdql();
5098 } else { 5124 } else {
5099 movl(hi, lo); 5125 movl(hi, lo);
5100 sarl(hi, 31); 5126 sarl(hi, 31);
5101 } 5127 }
5102 }
5103
5104 void MacroAssembler::fat_nop() {
5105 // A 5 byte nop that is safe for patching (see patch_verified_entry)
5106 emit_byte(0x26); // es:
5107 emit_byte(0x2e); // cs:
5108 emit_byte(0x64); // fs:
5109 emit_byte(0x65); // gs:
5110 emit_byte(0x90);
5111 } 5128 }
5112 5129
5113 void MacroAssembler::jC2(Register tmp, Label& L) { 5130 void MacroAssembler::jC2(Register tmp, Label& L) {
5114 // set parity bit if FPU flag C2 is set (via rax) 5131 // set parity bit if FPU flag C2 is set (via rax)
5115 save_rax(tmp); 5132 save_rax(tmp);
5700 if (value == min_jint) { subq(dst, value); return; } 5717 if (value == min_jint) { subq(dst, value); return; }
5701 if (value < 0) { incrementq(dst, -value); return; } 5718 if (value < 0) { incrementq(dst, -value); return; }
5702 if (value == 0) { ; return; } 5719 if (value == 0) { ; return; }
5703 if (value == 1 && UseIncDec) { decq(dst) ; return; } 5720 if (value == 1 && UseIncDec) { decq(dst) ; return; }
5704 /* else */ { subq(dst, value) ; return; } 5721 /* else */ { subq(dst, value) ; return; }
5705 }
5706
5707 void MacroAssembler::fat_nop() {
5708 // A 5 byte nop that is safe for patching (see patch_verified_entry)
5709 // Recommened sequence from 'Software Optimization Guide for the AMD
5710 // Hammer Processor'
5711 emit_byte(0x66);
5712 emit_byte(0x66);
5713 emit_byte(0x90);
5714 emit_byte(0x66);
5715 emit_byte(0x90);
5716 } 5722 }
5717 5723
5718 void MacroAssembler::incrementq(Register reg, int value) { 5724 void MacroAssembler::incrementq(Register reg, int value) {
5719 if (value == min_jint) { addq(reg, value); return; } 5725 if (value == min_jint) { addq(reg, value); return; }
5720 if (value < 0) { decrementq(reg, -value); return; } 5726 if (value < 0) { decrementq(reg, -value); return; }
6764 void MacroAssembler::enter() { 6770 void MacroAssembler::enter() {
6765 push(rbp); 6771 push(rbp);
6766 mov(rbp, rsp); 6772 mov(rbp, rsp);
6767 } 6773 }
6768 6774
6775 // A 5 byte nop that is safe for patching (see patch_verified_entry)
6776 void MacroAssembler::fat_nop() {
6777 if (UseAddressNop) {
6778 addr_nop_5();
6779 } else {
6780 emit_byte(0x26); // es:
6781 emit_byte(0x2e); // cs:
6782 emit_byte(0x64); // fs:
6783 emit_byte(0x65); // gs:
6784 emit_byte(0x90);
6785 }
6786 }
6787
6769 void MacroAssembler::fcmp(Register tmp) { 6788 void MacroAssembler::fcmp(Register tmp) {
6770 fcmp(tmp, 1, true, true); 6789 fcmp(tmp, 1, true, true);
6771 } 6790 }
6772 6791
6773 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { 6792 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
7821 } 7840 }
7822 } 7841 }
7823 7842
7824 void MacroAssembler::subptr(Register dst, int32_t imm32) { 7843 void MacroAssembler::subptr(Register dst, int32_t imm32) {
7825 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); 7844 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
7845 }
7846
7847 // Force generation of a 4 byte immediate value even if it fits into 8bit
7848 void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
7849 LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
7826 } 7850 }
7827 7851
7828 void MacroAssembler::subptr(Register dst, Register src) { 7852 void MacroAssembler::subptr(Register dst, Register src) {
7829 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); 7853 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
7830 } 7854 }
9290 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr())); 9314 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
9291 } 9315 }
9292 } 9316 }
9293 #endif // _LP64 9317 #endif // _LP64
9294 9318
9319
9320 // C2 compiled method's prolog code.
9321 void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
9322
9323 // WARNING: Initial instruction MUST be 5 bytes or longer so that
9324 // NativeJump::patch_verified_entry will be able to patch out the entry
9325 // code safely. The push to verify stack depth is ok at 5 bytes,
9326 // the frame allocation can be either 3 or 6 bytes. So if we don't do
9327 // stack bang then we must use the 6 byte frame allocation even if
9328 // we have no frame. :-(
9329
9330 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
9331 // Remove word for return addr
9332 framesize -= wordSize;
9333
9334 // Calls to C2R adapters often do not accept exceptional returns.
9335 // We require that their callers must bang for them. But be careful, because
9336 // some VM calls (such as call site linkage) can use several kilobytes of
9337 // stack. But the stack safety zone should account for that.
9338 // See bugs 4446381, 4468289, 4497237.
9339 if (stack_bang) {
9340 generate_stack_overflow_check(framesize);
9341
9342 // We always push rbp, so that on return to interpreter rbp, will be
9343 // restored correctly and we can correct the stack.
9344 push(rbp);
9345 // Remove word for ebp
9346 framesize -= wordSize;
9347
9348 // Create frame
9349 if (framesize) {
9350 subptr(rsp, framesize);
9351 }
9352 } else {
9353 // Create frame (force generation of a 4 byte immediate value)
9354 subptr_imm32(rsp, framesize);
9355
9356 // Save RBP register now.
9357 framesize -= wordSize;
9358 movptr(Address(rsp, framesize), rbp);
9359 }
9360
9361 if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
9362 framesize -= wordSize;
9363 movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
9364 }
9365
9366 #ifndef _LP64
9367 // If method sets FPU control word do it now
9368 if (fp_mode_24b) {
9369 fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
9370 }
9371 if (UseSSE >= 2 && VerifyFPU) {
9372 verify_FPU(0, "FPU stack must be clean on entry");
9373 }
9374 #endif
9375
9376 #ifdef ASSERT
9377 if (VerifyStackAtCalls) {
9378 Label L;
9379 push(rax);
9380 mov(rax, rsp);
9381 andptr(rax, StackAlignmentInBytes-1);
9382 cmpptr(rax, StackAlignmentInBytes-wordSize);
9383 pop(rax);
9384 jcc(Assembler::equal, L);
9385 stop("Stack is not properly aligned!");
9386 bind(L);
9387 }
9388 #endif
9389
9390 }
9391
9392
9295 // IndexOf for constant substrings with size >= 8 chars 9393 // IndexOf for constant substrings with size >= 8 chars
9296 // which don't need to be loaded through stack. 9394 // which don't need to be loaded through stack.
9297 void MacroAssembler::string_indexofC8(Register str1, Register str2, 9395 void MacroAssembler::string_indexofC8(Register str1, Register str2,
9298 Register cnt1, Register cnt2, 9396 Register cnt1, Register cnt2,
9299 int int_cnt2, Register result, 9397 int int_cnt2, Register result,