Mercurial > hg > truffle
diff src/cpu/x86/vm/sharedRuntime_x86_32.cpp @ 6948:e522a00b91aa
Merge with http://hg.openjdk.java.net/hsx/hsx25/hotspot/ after NPG - C++ build works
author | Doug Simon <doug.simon@oracle.com> |
---|---|
date | Mon, 12 Nov 2012 23:14:12 +0100 |
parents | 2dfab5607b3d 18fb7da42534 |
children | 1baf7f1e3f23 |
line wrap: on
line diff
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Mon Nov 12 18:11:17 2012 +0100 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Mon Nov 12 23:14:12 2012 +0100 @@ -29,7 +29,7 @@ #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" #include "interpreter/interpreter.hpp" -#include "oops/compiledICHolderOop.hpp" +#include "oops/compiledICHolder.hpp" #include "prims/jvmtiRedefineClassesTrace.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/vframeArray.hpp" @@ -46,11 +46,11 @@ const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; class RegisterSaver { - enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ }; // Capture info about frame layout +#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off enum layout { fpu_state_off = 0, - fpu_state_end = fpu_state_off+FPUStateSizeInWords-1, + fpu_state_end = fpu_state_off+FPUStateSizeInWords, st0_off, st0H_off, st1_off, st1H_off, st2_off, st2H_off, @@ -59,16 +59,16 @@ st5_off, st5H_off, st6_off, st6H_off, st7_off, st7H_off, - - xmm0_off, xmm0H_off, - xmm1_off, xmm1H_off, - xmm2_off, xmm2H_off, - xmm3_off, xmm3H_off, - xmm4_off, xmm4H_off, - xmm5_off, xmm5H_off, - xmm6_off, xmm6H_off, - xmm7_off, xmm7H_off, - flags_off, + xmm_off, + DEF_XMM_OFFS(0), + DEF_XMM_OFFS(1), + DEF_XMM_OFFS(2), + DEF_XMM_OFFS(3), + DEF_XMM_OFFS(4), + DEF_XMM_OFFS(5), + DEF_XMM_OFFS(6), + DEF_XMM_OFFS(7), + flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word rdi_off, rsi_off, ignore_off, // extra copy of rbp, @@ -83,13 +83,13 @@ rbp_off, return_off, // slot for return address reg_save_size }; - + enum { FPU_regs_live = flags_off - fpu_state_end }; public: static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, - int* total_frame_words, bool verify_fpu = true); - static void restore_live_registers(MacroAssembler* masm); + int* total_frame_words, bool verify_fpu = true, bool save_vectors = false); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); static int rax_offset() { return rax_off; } static int rbx_offset() { return rbx_off; } @@ -113,9 +113,20 @@ }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, - int* total_frame_words, bool verify_fpu) { - - int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize; + int* total_frame_words, bool verify_fpu, bool save_vectors) { + int vect_words = 0; +#ifdef COMPILER2 + if (save_vectors) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + // Save upper half of YMM registes + vect_words = 8 * 16 / wordSize; + additional_frame_words += vect_words; + } +#else + assert(!save_vectors, "vectors are generated only by C2"); +#endif + int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize; int frame_words = frame_size_in_bytes / wordSize; *total_frame_words = frame_words; @@ -129,7 +140,7 @@ __ enter(); __ pusha(); __ pushf(); - __ subptr(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space + __ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space __ push_FPU_state(); // Save FPU state & init if (verify_fpu) { @@ -183,14 +194,28 @@ __ movflt(Address(rsp,xmm6_off*wordSize),xmm6); __ movflt(Address(rsp,xmm7_off*wordSize),xmm7); } else if( UseSSE >= 2 ) { - __ movdbl(Address(rsp,xmm0_off*wordSize),xmm0); - __ movdbl(Address(rsp,xmm1_off*wordSize),xmm1); - __ movdbl(Address(rsp,xmm2_off*wordSize),xmm2); - __ movdbl(Address(rsp,xmm3_off*wordSize),xmm3); - __ movdbl(Address(rsp,xmm4_off*wordSize),xmm4); - __ movdbl(Address(rsp,xmm5_off*wordSize),xmm5); - __ movdbl(Address(rsp,xmm6_off*wordSize),xmm6); - __ movdbl(Address(rsp,xmm7_off*wordSize),xmm7); + // Save whole 128bit (16 bytes) XMM regiters + __ movdqu(Address(rsp,xmm0_off*wordSize),xmm0); + __ movdqu(Address(rsp,xmm1_off*wordSize),xmm1); + __ movdqu(Address(rsp,xmm2_off*wordSize),xmm2); + __ movdqu(Address(rsp,xmm3_off*wordSize),xmm3); + __ movdqu(Address(rsp,xmm4_off*wordSize),xmm4); + __ movdqu(Address(rsp,xmm5_off*wordSize),xmm5); + __ movdqu(Address(rsp,xmm6_off*wordSize),xmm6); + __ movdqu(Address(rsp,xmm7_off*wordSize),xmm7); + } + + if (vect_words > 0) { + assert(vect_words*wordSize == 128, ""); + __ subptr(rsp, 128); // Save upper half of YMM registes + __ vextractf128h(Address(rsp, 0),xmm0); + __ vextractf128h(Address(rsp, 16),xmm1); + __ vextractf128h(Address(rsp, 32),xmm2); + __ vextractf128h(Address(rsp, 48),xmm3); + __ vextractf128h(Address(rsp, 64),xmm4); + __ vextractf128h(Address(rsp, 80),xmm5); + __ vextractf128h(Address(rsp, 96),xmm6); + __ vextractf128h(Address(rsp,112),xmm7); } // Set an oopmap for the call site. This oopmap will map all @@ -253,10 +278,20 @@ } -void RegisterSaver::restore_live_registers(MacroAssembler* masm) { - +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { // Recover XMM & FPU state - if( UseSSE == 1 ) { + int additional_frame_bytes = 0; +#ifdef COMPILER2 + if (restore_vectors) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + additional_frame_bytes = 128; + } +#else + assert(!restore_vectors, "vectors are generated only by C2"); +#endif + if (UseSSE == 1) { + assert(additional_frame_bytes == 0, ""); __ movflt(xmm0,Address(rsp,xmm0_off*wordSize)); __ movflt(xmm1,Address(rsp,xmm1_off*wordSize)); __ movflt(xmm2,Address(rsp,xmm2_off*wordSize)); @@ -265,18 +300,33 @@ __ movflt(xmm5,Address(rsp,xmm5_off*wordSize)); __ movflt(xmm6,Address(rsp,xmm6_off*wordSize)); __ movflt(xmm7,Address(rsp,xmm7_off*wordSize)); - } else if( UseSSE >= 2 ) { - __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize)); - __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize)); - __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize)); - __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize)); - __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize)); - __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize)); - __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize)); - __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize)); + } else if (UseSSE >= 2) { +#define STACK_ADDRESS(x) Address(rsp,(x)*wordSize + additional_frame_bytes) + __ movdqu(xmm0,STACK_ADDRESS(xmm0_off)); + __ movdqu(xmm1,STACK_ADDRESS(xmm1_off)); + __ movdqu(xmm2,STACK_ADDRESS(xmm2_off)); + __ movdqu(xmm3,STACK_ADDRESS(xmm3_off)); + __ movdqu(xmm4,STACK_ADDRESS(xmm4_off)); + __ movdqu(xmm5,STACK_ADDRESS(xmm5_off)); + __ movdqu(xmm6,STACK_ADDRESS(xmm6_off)); + __ movdqu(xmm7,STACK_ADDRESS(xmm7_off)); +#undef STACK_ADDRESS + } + if (restore_vectors) { + // Restore upper half of YMM registes. + assert(additional_frame_bytes == 128, ""); + __ vinsertf128h(xmm0, Address(rsp, 0)); + __ vinsertf128h(xmm1, Address(rsp, 16)); + __ vinsertf128h(xmm2, Address(rsp, 32)); + __ vinsertf128h(xmm3, Address(rsp, 48)); + __ vinsertf128h(xmm4, Address(rsp, 64)); + __ vinsertf128h(xmm5, Address(rsp, 80)); + __ vinsertf128h(xmm6, Address(rsp, 96)); + __ vinsertf128h(xmm7, Address(rsp,112)); + __ addptr(rsp, additional_frame_bytes); } __ pop_FPU_state(); - __ addptr(rsp, FPU_regs_live*sizeof(jdouble)); // Pop FPU registers + __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers __ popf(); __ popa(); @@ -308,6 +358,13 @@ __ addptr(rsp, return_off * wordSize); } +// Is vector's size (in bytes) bigger than a size saved by default? +// 16 bytes XMM registers are saved by default using SSE2 movdqu instructions. +// Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated. +bool SharedRuntime::is_wide_vector(int size) { + return size > 16; +} + // The java_calling_convention describes stack locations as ideal slots on // a frame with no abi restrictions. Since we must observe abi restrictions // (like the placement of the register window) the slots must be biased by @@ -453,8 +510,7 @@ // Patch the callers callsite with entry to compiled code if it exists. static void patch_callers_callsite(MacroAssembler *masm) { Label L; - __ verify_oop(rbx); - __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD); + __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); __ jcc(Assembler::equal, L); // Schedule the branch target address early. // Call into the VM to patch the caller, then jump to compiled callee @@ -486,7 +542,6 @@ __ push(rax); // VM needs target method __ push(rbx); - __ verify_oop(rbx); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); __ addptr(rsp, 2*wordSize); @@ -631,7 +686,7 @@ } // Schedule the branch target address early. - __ movptr(rcx, Address(rbx, in_bytes(methodOopDesc::interpreter_entry_offset()))); + __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); // And repush original return address __ push(rax); __ jmp(rcx); @@ -746,7 +801,7 @@ // Will jump to the compiled code just as if compiled code was doing it. // Pre-load the register-jump target early, to schedule it better. - __ movptr(rdi, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset()))); + __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset()))); // Now generate the shuffle code. Pick up all register args and move the // rest through the floating point stack top. @@ -859,8 +914,8 @@ __ get_thread(rax); __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx); - // move methodOop to rax, in case we end up in an c2i adapter. - // the c2i adapters expect methodOop in rax, (c2) because c2's + // move Method* to rax, in case we end up in an c2i adapter. + // the c2i adapters expect Method* in rax, (c2) because c2's // resolve stubs return the result (the method) in rax,. // I'd love to fix this. __ mov(rax, rbx); @@ -880,7 +935,7 @@ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); // ------------------------------------------------------------------------- - // Generate a C2I adapter. On entry we know rbx, holds the methodOop during calls + // Generate a C2I adapter. On entry we know rbx, holds the Method* during calls // to the interpreter. The args start out packed in the compiled layout. They // need to be unpacked into the interpreter layout. This will almost always // require some stack space. We grow the current (compiled) stack, then repack @@ -898,18 +953,14 @@ { Label missed; - - __ verify_oop(holder); __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes())); - __ verify_oop(temp); - - __ cmpptr(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset())); - __ movptr(rbx, Address(holder, compiledICHolderOopDesc::holder_method_offset())); + __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset())); + __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset())); __ jcc(Assembler::notEqual, missed); // Method might have been compiled since the call site was patched to // interpreted if that is the case treat it as a miss so we can get // the call site corrected. - __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD); + __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); __ jcc(Assembler::equal, skip_fixup); __ bind(missed); @@ -944,6 +995,7 @@ case T_OBJECT: case T_ARRAY: case T_ADDRESS: + case T_METADATA: regs[i].set1(VMRegImpl::stack2reg(stack++)); break; case T_LONG: @@ -1351,12 +1403,12 @@ } static void verify_oop_args(MacroAssembler* masm, - int total_args_passed, + methodHandle method, const BasicType* sig_bt, const VMRegPair* regs) { Register temp_reg = rbx; // not part of any compiled calling seq if (VerifyOops) { - for (int i = 0; i < total_args_passed; i++) { + for (int i = 0; i < method->size_of_parameters(); i++) { if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { VMReg r = regs[i].first(); @@ -1373,35 +1425,32 @@ } static void gen_special_dispatch(MacroAssembler* masm, - int total_args_passed, - int comp_args_on_stack, - vmIntrinsics::ID special_dispatch, + methodHandle method, const BasicType* sig_bt, const VMRegPair* regs) { - verify_oop_args(masm, total_args_passed, sig_bt, regs); + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); // Now write the args into the outgoing interpreter space bool has_receiver = false; Register receiver_reg = noreg; int member_arg_pos = -1; Register member_reg = noreg; - int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch); + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); if (ref_kind != 0) { - member_arg_pos = total_args_passed - 1; // trailing MemberName argument + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument member_reg = rbx; // known to be free at this point has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); - } else if (special_dispatch == vmIntrinsics::_invokeBasic) { + } else if (iid == vmIntrinsics::_invokeBasic) { has_receiver = true; } else { - guarantee(false, err_msg("special_dispatch=%d", special_dispatch)); + fatal(err_msg_res("unexpected intrinsic id %d", iid)); } if (member_reg != noreg) { // Load the member_arg into register, if necessary. - assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); - assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); VMReg r = regs[member_arg_pos].first(); - assert(r->is_valid(), "bad member arg"); if (r->is_stack()) { __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); } else { @@ -1412,7 +1461,7 @@ if (has_receiver) { // Make sure the receiver is loaded into a register. - assert(total_args_passed > 0, "oob"); + assert(method->size_of_parameters() > 0, "oob"); assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); VMReg r = regs[0].first(); assert(r->is_valid(), "bad receiver arg"); @@ -1420,7 +1469,7 @@ // Porting note: This assumes that compiled calling conventions always // pass the receiver oop in a register. If this is not true on some // platform, pick a temp and load the receiver from stack. - assert(false, "receiver always in a register"); + fatal("receiver always in a register"); receiver_reg = rcx; // known to be free at this point __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); } else { @@ -1430,7 +1479,7 @@ } // Figure out which address we are really jumping to: - MethodHandles::generate_method_handle_dispatch(masm, special_dispatch, + MethodHandles::generate_method_handle_dispatch(masm, iid, receiver_reg, member_reg, /*for_compiler_entry:*/ true); } @@ -1466,8 +1515,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, methodHandle method, int compile_id, - int total_in_args, - int comp_args_on_stack, BasicType* in_sig_bt, VMRegPair* in_regs, BasicType ret_type) { @@ -1476,9 +1523,7 @@ intptr_t start = (intptr_t)__ pc(); int vep_offset = ((intptr_t)__ pc()) - start; gen_special_dispatch(masm, - total_in_args, - comp_args_on_stack, - method->intrinsic_id(), + method, in_sig_bt, in_regs); int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period @@ -1511,6 +1556,7 @@ // we convert the java signature to a C signature by inserting // the hidden arguments as arg[0] and possibly arg[1] (static method) + const int total_in_args = method->size_of_parameters(); int total_c_args = total_in_args; if (!is_critical_native) { total_c_args += 1; @@ -1890,7 +1936,7 @@ if (method->is_static() && !is_critical_native) { // load opp into a register - __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror())); + __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); // Now handlize the static class mirror it's known not-null. __ movptr(Address(rsp, klass_offset), oop_handle_reg); @@ -1918,7 +1964,7 @@ { SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); - __ movoop(rax, JNIHandles::make_local(method())); + __ mov_metadata(rax, method()); __ call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), thread, rax); @@ -1926,7 +1972,7 @@ // RedefineClasses() tracing support for obsolete method entry if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { - __ movoop(rax, JNIHandles::make_local(method())); + __ mov_metadata(rax, method()); __ call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), thread, rax); @@ -2184,7 +2230,7 @@ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); // Tell dtrace about this method exit save_native_result(masm, ret_type, stack_slots); - __ movoop(rax, JNIHandles::make_local(method())); + __ mov_metadata(rax, method()); __ call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), thread, rax); @@ -2743,7 +2789,6 @@ return 0; } - //------------------------------generate_deopt_blob---------------------------- void SharedRuntime::generate_deopt_blob() { // allocate space for the code @@ -3281,7 +3326,7 @@ // setup oopmap, and calls safepoint code to stop the compiled code for // a safepoint. // -SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) { +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { // Account for thread arg in our frame const int additional_words = 1; @@ -3301,17 +3346,18 @@ const Register java_thread = rdi; // callee-saved for VC++ address start = __ pc(); address call_pc = NULL; - + bool cause_return = (poll_type == POLL_AT_RETURN); + bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); // If cause_return is true we are at a poll_return and there is // the return address on the stack to the caller on the nmethod // that is safepoint. We can leave this return on the stack and // effectively complete the return and safepoint in the caller. // Otherwise we push space for a return address that the safepoint // handler will install later to make the stack walking sensible. - if( !cause_return ) - __ push(rbx); // Make room for return address (or push it again) - - map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false); + if (!cause_return) + __ push(rbx); // Make room for return address (or push it again) + + map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors); // The following is basically a call_VM. However, we need the precise // address of the call in order to generate an oopmap. Hence, we do all the @@ -3323,7 +3369,7 @@ __ set_last_Java_frame(java_thread, noreg, noreg, NULL); // if this was not a poll_return then we need to correct the return address now. - if( !cause_return ) { + if (!cause_return) { __ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset())); __ movptr(Address(rbp, wordSize), rax); } @@ -3351,15 +3397,14 @@ __ jcc(Assembler::equal, noException); // Exception pending - - RegisterSaver::restore_live_registers(masm); + RegisterSaver::restore_live_registers(masm, save_vectors); __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); __ bind(noException); // Normal exit, register restoring and exit - RegisterSaver::restore_live_registers(masm); + RegisterSaver::restore_live_registers(masm, save_vectors); __ ret(0); @@ -3427,8 +3472,8 @@ __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); __ jcc(Assembler::notEqual, pending); - // get the returned methodOop - __ movptr(rbx, Address(thread, JavaThread::vm_result_offset())); + // get the returned Method* + __ get_vm_result_2(rbx, thread); __ movptr(Address(rsp, RegisterSaver::rbx_offset() * wordSize), rbx); __ movptr(Address(rsp, RegisterSaver::rax_offset() * wordSize), rax);