truffle: src/cpu/x86/vm/assembler

comparison src/cpu/x86/vm/assembler_x86.cpp @ 4947:fd8114661503

7125136: SIGILL on linux amd64 in gc/ArrayJuggle/Juggle29 Summary: For C2 moved saving EBP after ESP adjustment. For C1 generated 5 byte nop instruction first if needed. Reviewed-by: never, twisti, azeemj

author	kvn
date	Wed, 15 Feb 2012 21:37:49 -0800
parents	1cb50d7a9d95
children	33df1aeaebbf fd09f2d8283e

comparison

equal deleted inserted replaced

-:69333a2fbae2
+:fd8114661503
 } else {
 emit_byte(op1);
 emit_byte(op2 | encode(dst));
 emit_long(imm32);
 }
+}
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void Assembler::emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32) {
+assert(isByte(op1) && isByte(op2), "wrong opcode");
+assert((op1 & 0x01) == 1, "should be 32bit operation");
+assert((op1 & 0x02) == 0, "sign-extension bit should not be set");
+emit_byte(op1);
+emit_byte(op2 | encode(dst));
+emit_long(imm32);
 }
 // immediate-to-memory forms
 void Assembler::emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32) {
 assert((op1 & 0x01) == 1, "should be 32bit operation");
 (void) prefix_and_encode(dst->encoding(), src->encoding());
 emit_arith(0x03, 0xC0, dst, src);
 }
 void Assembler::addr_nop_4() {
+assert(UseAddressNop, "no CPU support");
 // 4 bytes: NOP DWORD PTR [EAX+0]
 emit_byte(0x0F);
 emit_byte(0x1F);
 emit_byte(0x40); // emit_rm(cbuf, 0x1, EAX_enc, EAX_enc);
 emit_byte(0);    // 8-bits offset (1 byte)
 }
 void Assembler::addr_nop_5() {
+assert(UseAddressNop, "no CPU support");
 // 5 bytes: NOP DWORD PTR [EAX+EAX*0+0] 8-bits offset
 emit_byte(0x0F);
 emit_byte(0x1F);
 emit_byte(0x44); // emit_rm(cbuf, 0x1, EAX_enc, 0x4);
 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 emit_byte(0);    // 8-bits offset (1 byte)
 }
 void Assembler::addr_nop_7() {
+assert(UseAddressNop, "no CPU support");
 // 7 bytes: NOP DWORD PTR [EAX+0] 32-bits offset
 emit_byte(0x0F);
 emit_byte(0x1F);
 emit_byte(0x80); // emit_rm(cbuf, 0x2, EAX_enc, EAX_enc);
 emit_long(0);    // 32-bits offset (4 bytes)
 }
 void Assembler::addr_nop_8() {
+assert(UseAddressNop, "no CPU support");
 // 8 bytes: NOP DWORD PTR [EAX+EAX*0+0] 32-bits offset
 emit_byte(0x0F);
 emit_byte(0x1F);
 emit_byte(0x84); // emit_rm(cbuf, 0x2, EAX_enc, 0x4);
 emit_byte(0x00); // emit_rm(cbuf, 0x0, EAX_enc, EAX_enc);
 void Assembler::subl(Register dst, int32_t imm32) {
 prefix(dst);
 emit_arith(0x81, 0xE8, dst, imm32);
 }
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void Assembler::subl_imm32(Register dst, int32_t imm32) {
+prefix(dst);
+emit_arith_imm32(0x81, 0xE8, dst, imm32);
+}
 void Assembler::subl(Register dst, Address src) {
 InstructionMark im(this);
 prefix(src, dst);
 emit_byte(0x2B);
 emit_operand(dst, src);
 void Assembler::subq(Register dst, int32_t imm32) {
 (void) prefixq_and_encode(dst->encoding());
 emit_arith(0x81, 0xE8, dst, imm32);
 }
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void Assembler::subq_imm32(Register dst, int32_t imm32) {
+(void) prefixq_and_encode(dst->encoding());
+emit_arith_imm32(0x81, 0xE8, dst, imm32);
+}
 void Assembler::subq(Register dst, Address src) {
 InstructionMark im(this);
 prefixq(src, dst);
 emit_byte(0x2B);
 emit_operand(dst, src);
 cdql();
 } else {
 movl(hi, lo);
 sarl(hi, 31);
 }
-}
-void MacroAssembler::fat_nop() {
-// A 5 byte nop that is safe for patching (see patch_verified_entry)
-emit_byte(0x26); // es:
-emit_byte(0x2e); // cs:
-emit_byte(0x64); // fs:
-emit_byte(0x65); // gs:
-emit_byte(0x90);
 }
 void MacroAssembler::jC2(Register tmp, Label& L) {
 // set parity bit if FPU flag C2 is set (via rax)
 save_rax(tmp);
 if (value == min_jint) { subq(dst, value); return; }
 if (value <  0) { incrementq(dst, -value); return; }
 if (value == 0) {                        ; return; }
 if (value == 1 && UseIncDec) { decq(dst) ; return; }
 /* else */      { subq(dst, value)       ; return; }
-}
-void MacroAssembler::fat_nop() {
-// A 5 byte nop that is safe for patching (see patch_verified_entry)
-// Recommened sequence from 'Software Optimization Guide for the AMD
-// Hammer Processor'
-emit_byte(0x66);
-emit_byte(0x66);
-emit_byte(0x90);
-emit_byte(0x66);
-emit_byte(0x90);
 }
 void MacroAssembler::incrementq(Register reg, int value) {
 if (value == min_jint) { addq(reg, value); return; }
 if (value <  0) { decrementq(reg, -value); return; }
 void MacroAssembler::enter() {
 push(rbp);
 mov(rbp, rsp);
 }
+// A 5 byte nop that is safe for patching (see patch_verified_entry)
+void MacroAssembler::fat_nop() {
+if (UseAddressNop) {
+addr_nop_5();
+} else {
+emit_byte(0x26); // es:
+emit_byte(0x2e); // cs:
+emit_byte(0x64); // fs:
+emit_byte(0x65); // gs:
+emit_byte(0x90);
+}
+}
 void MacroAssembler::fcmp(Register tmp) {
 fcmp(tmp, 1, true, true);
 }
 void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) {
 }
 }
 void MacroAssembler::subptr(Register dst, int32_t imm32) {
 LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32));
+}
+// Force generation of a 4 byte immediate value even if it fits into 8bit
+void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) {
+LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32));
 }
 void MacroAssembler::subptr(Register dst, Register src) {
 LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src));
 }
 movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
 }
 }
 #endif // _LP64
+// C2 compiled method's prolog code.
+void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) {
+// WARNING: Initial instruction MUST be 5 bytes or longer so that
+// NativeJump::patch_verified_entry will be able to patch out the entry
+// code safely. The push to verify stack depth is ok at 5 bytes,
+// the frame allocation can be either 3 or 6 bytes. So if we don't do
+// stack bang then we must use the 6 byte frame allocation even if
+// we have no frame. :-(
+assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+// Remove word for return addr
+framesize -= wordSize;
+// Calls to C2R adapters often do not accept exceptional returns.
+// We require that their callers must bang for them.  But be careful, because
+// some VM calls (such as call site linkage) can use several kilobytes of
+// stack.  But the stack safety zone should account for that.
+// See bugs 4446381, 4468289, 4497237.
+if (stack_bang) {
+generate_stack_overflow_check(framesize);
+// We always push rbp, so that on return to interpreter rbp, will be
+// restored correctly and we can correct the stack.
+push(rbp);
+// Remove word for ebp
+framesize -= wordSize;
+// Create frame
+if (framesize) {
+subptr(rsp, framesize);
+}
+} else {
+// Create frame (force generation of a 4 byte immediate value)
+subptr_imm32(rsp, framesize);
+// Save RBP register now.
+framesize -= wordSize;
+movptr(Address(rsp, framesize), rbp);
+}
+if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
+framesize -= wordSize;
+movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
+}
+#ifndef _LP64
+// If method sets FPU control word do it now
+if (fp_mode_24b) {
+fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+}
+if (UseSSE >= 2 && VerifyFPU) {
+verify_FPU(0, "FPU stack must be clean on entry");
+}
+#endif
+#ifdef ASSERT
+if (VerifyStackAtCalls) {
+Label L;
+push(rax);
+mov(rax, rsp);
+andptr(rax, StackAlignmentInBytes-1);
+cmpptr(rax, StackAlignmentInBytes-wordSize);
+pop(rax);
+jcc(Assembler::equal, L);
+stop("Stack is not properly aligned!");
+bind(L);
+}
+#endif
+}
 // IndexOf for constant substrings with size >= 8 chars
 // which don't need to be loaded through stack.
 void MacroAssembler::string_indexofC8(Register str1, Register str2,
 Register cnt1, Register cnt2,
 int int_cnt2,  Register result,

Mercurial > hg > truffle

comparison src/cpu/x86/vm/assembler_x86.cpp @ 4947:fd8114661503