Mercurial > hg > truffle
diff src/cpu/x86/vm/sharedRuntime_x86_64.cpp @ 6792:137868b7aa6f
7196199: java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect
Summary: Save whole XMM/YMM registers in safepoint interrupt handler.
Reviewed-by: roland, twisti
author | kvn |
---|---|
date | Mon, 17 Sep 2012 19:39:07 -0700 |
parents | 2cb2f30450c7 |
children | 18fb7da42534 |
line wrap: on
line diff
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Mon Sep 17 17:02:10 2012 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Mon Sep 17 19:39:07 2012 -0700 @@ -116,8 +116,8 @@ }; public: - static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); - static void restore_live_registers(MacroAssembler* masm); + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); // Offsets into the register save area // Used by deoptimization when it is managing result register @@ -134,7 +134,19 @@ static void restore_result_registers(MacroAssembler* masm); }; -OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { + int vect_words = 0; +#ifdef COMPILER2 + if (save_vectors) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + // Save upper half of YMM registes + vect_words = 16 * 16 / wordSize; + additional_frame_words += vect_words; + } +#else + assert(!save_vectors, "vectors are generated only by C2"); +#endif // Always make the frame size 16-byte aligned int frame_size_in_bytes = round_to(additional_frame_words*wordSize + @@ -155,6 +167,27 @@ __ enter(); // rsp becomes 16-byte aligned here __ push_CPU_state(); // Push a multiple of 16 bytes + + if (vect_words > 0) { + assert(vect_words*wordSize == 256, ""); + __ subptr(rsp, 256); // Save upper half of YMM registes + __ vextractf128h(Address(rsp, 0),xmm0); + __ vextractf128h(Address(rsp, 16),xmm1); + __ vextractf128h(Address(rsp, 32),xmm2); + __ vextractf128h(Address(rsp, 48),xmm3); + __ vextractf128h(Address(rsp, 64),xmm4); + __ vextractf128h(Address(rsp, 80),xmm5); + __ vextractf128h(Address(rsp, 96),xmm6); + __ vextractf128h(Address(rsp,112),xmm7); + __ vextractf128h(Address(rsp,128),xmm8); + __ vextractf128h(Address(rsp,144),xmm9); + __ vextractf128h(Address(rsp,160),xmm10); + __ vextractf128h(Address(rsp,176),xmm11); + __ vextractf128h(Address(rsp,192),xmm12); + __ vextractf128h(Address(rsp,208),xmm13); + __ vextractf128h(Address(rsp,224),xmm14); + __ vextractf128h(Address(rsp,240),xmm15); + } if (frame::arg_reg_save_area_bytes != 0) { // Allocate argument register save area __ subptr(rsp, frame::arg_reg_save_area_bytes); @@ -167,112 +200,111 @@ OopMapSet *oop_maps = new OopMapSet(); OopMap* map = new OopMap(frame_size_in_slots, 0); - map->set_callee_saved(VMRegImpl::stack2reg( rax_off + additional_frame_slots), rax->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( rcx_off + additional_frame_slots), rcx->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( rdx_off + additional_frame_slots), rdx->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( rbx_off + additional_frame_slots), rbx->as_VMReg()); + +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) + + map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); // rbp location is known implicitly by the frame sender code, needs no oopmap // and the location where rbp was saved by is ignored - map->set_callee_saved(VMRegImpl::stack2reg( rsi_off + additional_frame_slots), rsi->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( rdi_off + additional_frame_slots), rdi->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r8_off + additional_frame_slots), r8->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r9_off + additional_frame_slots), r9->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r10_off + additional_frame_slots), r10->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r11_off + additional_frame_slots), r11->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r12_off + additional_frame_slots), r12->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r13_off + additional_frame_slots), r13->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r14_off + additional_frame_slots), r14->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r15_off + additional_frame_slots), r15->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm0_off + additional_frame_slots), xmm0->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm1_off + additional_frame_slots), xmm1->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm2_off + additional_frame_slots), xmm2->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm3_off + additional_frame_slots), xmm3->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm4_off + additional_frame_slots), xmm4->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm5_off + additional_frame_slots), xmm5->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm6_off + additional_frame_slots), xmm6->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm7_off + additional_frame_slots), xmm7->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm8_off + additional_frame_slots), xmm8->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm9_off + additional_frame_slots), xmm9->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm10_off + additional_frame_slots), xmm10->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm11_off + additional_frame_slots), xmm11->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm12_off + additional_frame_slots), xmm12->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm13_off + additional_frame_slots), xmm13->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm14_off + additional_frame_slots), xmm14->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm15_off + additional_frame_slots), xmm15->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm3_off ), xmm3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm4_off ), xmm4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm5_off ), xmm5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm6_off ), xmm6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm7_off ), xmm7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm8_off ), xmm8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm9_off ), xmm9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm10_off), xmm10->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); // %%% These should all be a waste but we'll keep things as they were for now if (true) { - map->set_callee_saved(VMRegImpl::stack2reg( raxH_off + additional_frame_slots), - rax->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( rcxH_off + additional_frame_slots), - rcx->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( rdxH_off + additional_frame_slots), - rdx->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( rbxH_off + additional_frame_slots), - rbx->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); // rbp location is known implicitly by the frame sender code, needs no oopmap - map->set_callee_saved(VMRegImpl::stack2reg( rsiH_off + additional_frame_slots), - rsi->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( rdiH_off + additional_frame_slots), - rdi->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r8H_off + additional_frame_slots), - r8->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r9H_off + additional_frame_slots), - r9->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r10H_off + additional_frame_slots), - r10->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r11H_off + additional_frame_slots), - r11->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r12H_off + additional_frame_slots), - r12->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r13H_off + additional_frame_slots), - r13->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r14H_off + additional_frame_slots), - r14->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r15H_off + additional_frame_slots), - r15->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm0H_off + additional_frame_slots), - xmm0->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm1H_off + additional_frame_slots), - xmm1->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm2H_off + additional_frame_slots), - xmm2->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm3H_off + additional_frame_slots), - xmm3->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm4H_off + additional_frame_slots), - xmm4->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm5H_off + additional_frame_slots), - xmm5->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm6H_off + additional_frame_slots), - xmm6->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm7H_off + additional_frame_slots), - xmm7->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm8H_off + additional_frame_slots), - xmm8->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm9H_off + additional_frame_slots), - xmm9->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm10H_off + additional_frame_slots), - xmm10->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm11H_off + additional_frame_slots), - xmm11->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm12H_off + additional_frame_slots), - xmm12->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm13H_off + additional_frame_slots), - xmm13->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm14H_off + additional_frame_slots), - xmm14->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm15H_off + additional_frame_slots), - xmm15->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm3H_off ), xmm3->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm4H_off ), xmm4->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm5H_off ), xmm5->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm6H_off ), xmm6->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm7H_off ), xmm7->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm8H_off ), xmm8->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm9H_off ), xmm9->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm10H_off), xmm10->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next()); } return map; } -void RegisterSaver::restore_live_registers(MacroAssembler* masm) { +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { if (frame::arg_reg_save_area_bytes != 0) { // Pop arg register save area __ addptr(rsp, frame::arg_reg_save_area_bytes); } +#ifdef COMPILER2 + if (restore_vectors) { + // Restore upper half of YMM registes. + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + __ vinsertf128h(xmm0, Address(rsp, 0)); + __ vinsertf128h(xmm1, Address(rsp, 16)); + __ vinsertf128h(xmm2, Address(rsp, 32)); + __ vinsertf128h(xmm3, Address(rsp, 48)); + __ vinsertf128h(xmm4, Address(rsp, 64)); + __ vinsertf128h(xmm5, Address(rsp, 80)); + __ vinsertf128h(xmm6, Address(rsp, 96)); + __ vinsertf128h(xmm7, Address(rsp,112)); + __ vinsertf128h(xmm8, Address(rsp,128)); + __ vinsertf128h(xmm9, Address(rsp,144)); + __ vinsertf128h(xmm10, Address(rsp,160)); + __ vinsertf128h(xmm11, Address(rsp,176)); + __ vinsertf128h(xmm12, Address(rsp,192)); + __ vinsertf128h(xmm13, Address(rsp,208)); + __ vinsertf128h(xmm14, Address(rsp,224)); + __ vinsertf128h(xmm15, Address(rsp,240)); + __ addptr(rsp, 256); + } +#else + assert(!restore_vectors, "vectors are generated only by C2"); +#endif // Recover CPU state __ pop_CPU_state(); // Get the rbp described implicitly by the calling convention (no oopMap) @@ -297,6 +329,12 @@ __ addptr(rsp, return_offset_in_bytes()); } +// Is vector's size (in bytes) bigger than a size saved by default? +// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. +bool SharedRuntime::is_wide_vector(int size) { + return size > 16; +} + // The java_calling_convention describes stack locations as ideal slots on // a frame with no abi restrictions. Since we must observe abi restrictions // (like the placement of the register window) the slots must be biased by @@ -3235,7 +3273,6 @@ return 0; } - //------------------------------generate_deopt_blob---------------------------- void SharedRuntime::generate_deopt_blob() { // Allocate space for the code @@ -3740,7 +3777,7 @@ // Generate a special Compile2Runtime blob that saves all registers, // and setup oopmap. // -SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) { +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); @@ -3755,6 +3792,8 @@ address start = __ pc(); address call_pc = NULL; int frame_size_in_words; + bool cause_return = (poll_type == POLL_AT_RETURN); + bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); // Make room for return address (or push it again) if (!cause_return) { @@ -3762,7 +3801,7 @@ } // Save registers, fpu state, and flags - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); // The following is basically a call_VM. However, we need the precise // address of the call in order to generate an oopmap. Hence, we do all the @@ -3799,7 +3838,7 @@ // Exception pending - RegisterSaver::restore_live_registers(masm); + RegisterSaver::restore_live_registers(masm, save_vectors); __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); @@ -3807,7 +3846,7 @@ __ bind(noException); // Normal exit, restore registers and exit. - RegisterSaver::restore_live_registers(masm); + RegisterSaver::restore_live_registers(masm, save_vectors); __ ret(0);