# HG changeset patch # User kvn # Date 1348523166 25200 # Node ID c92f43386117fafbfd76b8100a563cfb5b197540 # Parent 04ed664b7e30a8cc0d007acda3a0856653effc1e# Parent f7c1f489db55c60491ec010f76d5b3d49776c4d6 Merge diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/assembler_sparc.cpp --- a/src/cpu/sparc/vm/assembler_sparc.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -725,24 +725,6 @@ } -// Convert to C varargs format -void MacroAssembler::set_varargs( Argument inArg, Register d ) { - // spill register-resident args to their memory slots - // (SPARC calling convention requires callers to have already preallocated these) - // Note that the inArg might in fact be an outgoing argument, - // if a leaf routine or stub does some tricky argument shuffling. - // This routine must work even though one of the saved arguments - // is in the d register (e.g., set_varargs(Argument(0, false), O0)). - for (Argument savePtr = inArg; - savePtr.is_register(); - savePtr = savePtr.successor()) { - st_ptr(savePtr.as_register(), savePtr.address_in_frame()); - } - // return the address of the first memory slot - Address a = inArg.address_in_frame(); - add(a.base(), a.disp(), d); -} - // Conditional breakpoint (for assertion checks in assembly code) void MacroAssembler::breakpoint_trap(Condition c, CC cc) { trap(c, cc, G0, ST_RESERVED_FOR_USER_0); @@ -2943,6 +2925,20 @@ assert(itable_index.is_constant() || itable_index.as_register() == method_result, "caller must use same register for non-constant itable index as for method"); + Label L_no_such_interface_restore; + bool did_save = false; + if (scan_temp == noreg || sethi_temp == noreg) { + Register recv_2 = recv_klass->is_global() ? recv_klass : L0; + Register intf_2 = intf_klass->is_global() ? intf_klass : L1; + assert(method_result->is_global(), "must be able to return value"); + scan_temp = L2; + sethi_temp = L3; + save_frame_and_mov(0, recv_klass, recv_2, intf_klass, intf_2); + recv_klass = recv_2; + intf_klass = intf_2; + did_save = true; + } + // Compute start of first itableOffsetEntry (which is at the end of the vtable) int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; int scan_step = itableOffsetEntry::size() * wordSize; @@ -2981,7 +2977,7 @@ // result = (klass + scan->offset() + itable_index); // } // } - Label search, found_method; + Label L_search, L_found_method; for (int peel = 1; peel >= 0; peel--) { // %%%% Could load both offset and interface in one ldx, if they were @@ -2991,23 +2987,23 @@ // Check that this entry is non-null. A null entry means that // the receiver class doesn't implement the interface, and wasn't the // same as when the caller was compiled. - bpr(Assembler::rc_z, false, Assembler::pn, method_result, L_no_such_interface); + bpr(Assembler::rc_z, false, Assembler::pn, method_result, did_save ? L_no_such_interface_restore : L_no_such_interface); delayed()->cmp(method_result, intf_klass); if (peel) { - brx(Assembler::equal, false, Assembler::pt, found_method); + brx(Assembler::equal, false, Assembler::pt, L_found_method); } else { - brx(Assembler::notEqual, false, Assembler::pn, search); + brx(Assembler::notEqual, false, Assembler::pn, L_search); // (invert the test to fall through to found_method...) } delayed()->add(scan_temp, scan_step, scan_temp); if (!peel) break; - bind(search); + bind(L_search); } - bind(found_method); + bind(L_found_method); // Got a hit. int ito_offset = itableOffsetEntry::offset_offset_in_bytes(); @@ -3015,6 +3011,18 @@ ito_offset -= scan_step; lduw(scan_temp, ito_offset, scan_temp); ld_ptr(recv_klass, scan_temp, method_result); + + if (did_save) { + Label L_done; + ba(L_done); + delayed()->restore(); + + bind(L_no_such_interface_restore); + ba(L_no_such_interface); + delayed()->restore(); + + bind(L_done); + } } diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -2428,9 +2428,6 @@ static void test(); #endif - // convert an incoming arglist to varargs format; put the pointer in d - void set_varargs( Argument a, Register d ); - int total_frame_size_in_bytes(int extraWords); // used when extraWords known statically diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/assembler_sparc.inline.hpp --- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -347,7 +347,11 @@ inline void Assembler::swap( Register s1, Register s2, Register d) { v9_dep(); emit_long( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | rs2(s2) ); } inline void Assembler::swap( Register s1, int simm13a, Register d) { v9_dep(); emit_data( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } -inline void Assembler::swap( Address& a, Register d, int offset ) { relocate(a.rspec(offset)); swap( a.base(), a.disp() + offset, d ); } +inline void Assembler::swap( Address& a, Register d, int offset ) { + relocate(a.rspec(offset)); + if (a.has_index()) { assert(offset == 0, ""); swap( a.base(), a.index(), d ); } + else { swap( a.base(), a.disp() + offset, d ); } +} // Use the right loads/stores for the platform diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -1315,7 +1315,13 @@ Address LIR_Assembler::as_Address(LIR_Address* addr) { Register reg = addr->base()->as_register(); - return Address(reg, addr->disp()); + LIR_Opr index = addr->index(); + if (index->is_illegal()) { + return Address(reg, addr->disp()); + } else { + assert (addr->disp() == 0, "unsupported address mode"); + return Address(reg, index->as_pointer_register()); + } } @@ -3438,7 +3444,28 @@ } } - - +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) { + LIR_Address* addr = src->as_address_ptr(); + + assert(data == dest, "swap uses only 2 operands"); + assert (code == lir_xchg, "no xadd on sparc"); + + if (data->type() == T_INT) { + __ swap(as_Address(addr), data->as_register()); + } else if (data->is_oop()) { + Register obj = data->as_register(); + Register narrow = tmp->as_register(); +#ifdef _LP64 + assert(UseCompressedOops, "swap is 32bit only"); + __ encode_heap_oop(obj, narrow); + __ swap(as_Address(addr), narrow); + __ decode_heap_oop(narrow, obj); +#else + __ swap(as_Address(addr), obj); +#endif + } else { + ShouldNotReachHere(); + } +} #undef __ diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -1204,3 +1204,58 @@ __ load(addr, dst); } } + +void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { + BasicType type = x->basic_type(); + LIRItem src(x->object(), this); + LIRItem off(x->offset(), this); + LIRItem value(x->value(), this); + + src.load_item(); + value.load_item(); + off.load_nonconstant(); + + LIR_Opr dst = rlock_result(x, type); + LIR_Opr data = value.result(); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + LIR_Opr offset = off.result(); + + if (data != dst) { + __ move(data, dst); + data = dst; + } + + assert (!x->is_add() && (type == T_INT || (is_obj LP64_ONLY(&& UseCompressedOops))), "unexpected type"); + LIR_Address* addr; + if (offset->is_constant()) { + +#ifdef _LP64 + jlong l = offset->as_jlong(); + assert((jlong)((jint)l) == l, "offset too large for constant"); + jint c = (jint)l; +#else + jint c = offset->as_jint(); +#endif + addr = new LIR_Address(src.result(), c, type); + } else { + addr = new LIR_Address(src.result(), offset, type); + } + + LIR_Opr tmp = LIR_OprFact::illegalOpr; + LIR_Opr ptr = LIR_OprFact::illegalOpr; + + if (is_obj) { + // Do the pre-write barrier, if any. + // barriers on sparc don't work with a base + index address + tmp = FrameMap::G3_opr; + ptr = new_pointer_register(); + __ add(src.result(), off.result(), ptr); + pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + __ xchg(LIR_OprFact::address(addr), data, dst, tmp); + if (is_obj) { + // Seems to be a precise address + post_barrier(ptr, data); + } +} diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/methodHandles_sparc.cpp --- a/src/cpu/sparc/vm/methodHandles_sparc.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -121,6 +121,7 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp, bool for_compiler_entry) { assert(method == G5_method, "interpreter calling convention"); + assert_different_registers(method, target, temp); if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { Label run_compiled_code; @@ -153,19 +154,19 @@ BLOCK_COMMENT("jump_to_lambda_form {"); // This is the initial entry point of a lazy method handle. // After type checking, it picks up the invoker from the LambdaForm. - assert_different_registers(recv, method_temp, temp2, temp3); + assert_different_registers(recv, method_temp, temp2); // temp3 is only passed on assert(method_temp == G5_method, "required register for loading method"); //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); // Load the invoker, as MH -> MH.form -> LF.vmentry __ verify_oop(recv); - __ load_heap_oop(Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), method_temp); + __ load_heap_oop(Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), method_temp); __ verify_oop(method_temp); - __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), method_temp); + __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), method_temp); __ verify_oop(method_temp); // the following assumes that a Method* is normally compressed in the vmtarget field: - __ ld_ptr(Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())), method_temp); + __ ld_ptr( Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())), method_temp); if (VerifyMethodHandles && !for_compiler_entry) { // make sure recv is already on stack @@ -303,25 +304,25 @@ Register member_reg, bool for_compiler_entry) { assert(is_signature_polymorphic(iid), "expected invoke iid"); - // temps used in this code are not used in *either* compiled or interpreted calling sequences Register temp1 = (for_compiler_entry ? G1_scratch : O1); - Register temp2 = (for_compiler_entry ? G4_scratch : O4); - Register temp3 = G3_scratch; - Register temp4 = (for_compiler_entry ? noreg : O2); + Register temp2 = (for_compiler_entry ? G3_scratch : O2); + Register temp3 = (for_compiler_entry ? G4_scratch : O3); + Register temp4 = (for_compiler_entry ? noreg : O4); if (for_compiler_entry) { assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : O0), "only valid assignment"); - assert_different_registers(temp1, O0, O1, O2, O3, O4, O5); - assert_different_registers(temp2, O0, O1, O2, O3, O4, O5); - assert_different_registers(temp3, O0, O1, O2, O3, O4, O5); - assert_different_registers(temp4, O0, O1, O2, O3, O4, O5); + assert_different_registers(temp1, O0, O1, O2, O3, O4, O5); + assert_different_registers(temp2, O0, O1, O2, O3, O4, O5); + assert_different_registers(temp3, O0, O1, O2, O3, O4, O5); + assert_different_registers(temp4, O0, O1, O2, O3, O4, O5); + } else { + assert_different_registers(temp1, temp2, temp3, temp4, O5_savedSP); // don't trash lastSP } if (receiver_reg != noreg) assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg); if (member_reg != noreg) assert_different_registers(temp1, temp2, temp3, temp4, member_reg); - if (!for_compiler_entry) assert_different_registers(temp1, temp2, temp3, temp4, O5_savedSP); // don't trash lastSP if (iid == vmIntrinsics::_invokeBasic) { // indirect through MH.form.vmentry.vmtarget - jump_to_lambda_form(_masm, receiver_reg, G5_method, temp2, temp3, for_compiler_entry); + jump_to_lambda_form(_masm, receiver_reg, G5_method, temp1, temp2, for_compiler_entry); } else { // The method is a member invoker used by direct method handles. @@ -378,24 +379,22 @@ // member_reg - MemberName that was the trailing argument // temp1_recv_klass - klass of stacked receiver, if needed // O5_savedSP - interpreter linkage (if interpreted) - // O0..O7,G1,G4 - compiler arguments (if compiled) + // O0..O5 - compiler arguments (if compiled) - bool method_is_live = false; + Label L_incompatible_class_change_error; switch (iid) { case vmIntrinsics::_linkToSpecial: if (VerifyMethodHandles) { - verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp2); } __ ld_ptr(member_vmtarget, G5_method); - method_is_live = true; break; case vmIntrinsics::_linkToStatic: if (VerifyMethodHandles) { - verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp2); } __ ld_ptr(member_vmtarget, G5_method); - method_is_live = true; break; case vmIntrinsics::_linkToVirtual: @@ -404,7 +403,7 @@ // minus the CP setup and profiling: if (VerifyMethodHandles) { - verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp2); } // pick out the vtable index from the MemberName, and then we can discard it: @@ -423,7 +422,6 @@ // get target Method* & entry point __ lookup_virtual_method(temp1_recv_klass, temp2_index, G5_method); - method_is_live = true; break; } @@ -432,13 +430,13 @@ // same as TemplateTable::invokeinterface // (minus the CP setup and profiling, with different argument motion) if (VerifyMethodHandles) { - verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp2); } - Register temp3_intf = temp3; - __ load_heap_oop(member_clazz, temp3_intf); - load_klass_from_Class(_masm, temp3_intf, temp2, temp4); - __ verify_klass_ptr(temp3_intf); + Register temp2_intf = temp2; + __ load_heap_oop(member_clazz, temp2_intf); + load_klass_from_Class(_masm, temp2_intf, temp3, temp4); + __ verify_klass_ptr(temp2_intf); Register G5_index = G5_method; __ ld_ptr(member_vmindex, G5_index); @@ -450,37 +448,34 @@ } // given intf, index, and recv klass, dispatch to the implementation method - Label L_no_such_interface; - Register no_sethi_temp = noreg; - __ lookup_interface_method(temp1_recv_klass, temp3_intf, + __ lookup_interface_method(temp1_recv_klass, temp2_intf, // note: next two args must be the same: G5_index, G5_method, - temp2, no_sethi_temp, - L_no_such_interface); - - __ verify_method_ptr(G5_method); - jump_from_method_handle(_masm, G5_method, temp2, temp3, for_compiler_entry); - - __ bind(L_no_such_interface); - AddressLiteral icce(StubRoutines::throw_IncompatibleClassChangeError_entry()); - __ jump_to(icce, temp3); - __ delayed()->nop(); + temp3, temp4, + L_incompatible_class_change_error); break; } default: - fatal(err_msg("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); break; } - if (method_is_live) { - // live at this point: G5_method, O5_savedSP (if interpreted) + // Live at this point: + // G5_method + // O5_savedSP (if interpreted) - // After figuring out which concrete method to call, jump into it. - // Note that this works in the interpreter with no data motion. - // But the compiled version will require that rcx_recv be shifted out. - __ verify_method_ptr(G5_method); - jump_from_method_handle(_masm, G5_method, temp1, temp3, for_compiler_entry); + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that rcx_recv be shifted out. + __ verify_method_ptr(G5_method); + jump_from_method_handle(_masm, G5_method, temp1, temp2, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ BIND(L_incompatible_class_change_error); + AddressLiteral icce(StubRoutines::throw_IncompatibleClassChangeError_entry()); + __ jump_to(icce, temp1); + __ delayed()->nop(); } } } diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/sharedRuntime_sparc.cpp --- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -313,6 +313,14 @@ } +// Is vector's size (in bytes) bigger than a size saved by default? +// 8 bytes FP registers are saved by default on SPARC. +bool SharedRuntime::is_wide_vector(int size) { + // Note, MaxVectorSize == 8 on SPARC. + assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size)); + return size > 8; +} + // The java_calling_convention describes stack locations as ideal slots on // a frame with no abi restrictions. Since we must observe abi restrictions // (like the placement of the register window) the slots must be biased by @@ -364,9 +372,9 @@ // --------------------------------------------------------------------------- // The compiled Java calling convention. The Java convention always passes // 64-bit values in adjacent aligned locations (either registers or stack), -// floats in float registers and doubles in aligned float pairs. Values are -// packed in the registers. There is no backing varargs store for values in -// registers. In the 32-bit build, longs are passed in G1 and G4 (cannot be +// floats in float registers and doubles in aligned float pairs. There is +// no backing varargs store for values in registers. +// In the 32-bit build, longs are passed on the stack (cannot be // passed in I's, because longs in I's get their heads chopped off at // interrupt). int SharedRuntime::java_calling_convention(const BasicType *sig_bt, @@ -375,76 +383,13 @@ int is_outgoing) { assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers"); - // Convention is to pack the first 6 int/oop args into the first 6 registers - // (I0-I5), extras spill to the stack. Then pack the first 8 float args - // into F0-F7, extras spill to the stack. Then pad all register sets to - // align. Then put longs and doubles into the same registers as they fit, - // else spill to the stack. const int int_reg_max = SPARC_ARGS_IN_REGS_NUM; const int flt_reg_max = 8; - // - // Where 32-bit 1-reg longs start being passed - // In tiered we must pass on stack because c1 can't use a "pair" in a single reg. - // So make it look like we've filled all the G regs that c2 wants to use. - Register g_reg = TieredCompilation ? noreg : G1; - - // Count int/oop and float args. See how many stack slots we'll need and - // where the longs & doubles will go. - int int_reg_cnt = 0; - int flt_reg_cnt = 0; - // int stk_reg_pairs = frame::register_save_words*(wordSize>>2); - // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots(); - int stk_reg_pairs = 0; - for (int i = 0; i < total_args_passed; i++) { - switch (sig_bt[i]) { - case T_LONG: // LP64, longs compete with int args - assert(sig_bt[i+1] == T_VOID, ""); -#ifdef _LP64 - if (int_reg_cnt < int_reg_max) int_reg_cnt++; -#endif - break; - case T_OBJECT: - case T_ARRAY: - case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address - if (int_reg_cnt < int_reg_max) int_reg_cnt++; -#ifndef _LP64 - else stk_reg_pairs++; -#endif - break; - case T_INT: - case T_SHORT: - case T_CHAR: - case T_BYTE: - case T_BOOLEAN: - if (int_reg_cnt < int_reg_max) int_reg_cnt++; - else stk_reg_pairs++; - break; - case T_FLOAT: - if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++; - else stk_reg_pairs++; - break; - case T_DOUBLE: - assert(sig_bt[i+1] == T_VOID, ""); - break; - case T_VOID: - break; - default: - ShouldNotReachHere(); - } - } - - // This is where the longs/doubles start on the stack. - stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round - - int flt_reg_pairs = (flt_reg_cnt+1) & ~1; - - // int stk_reg = frame::register_save_words*(wordSize>>2); - // int stk_reg = SharedRuntime::out_preserve_stack_slots(); - int stk_reg = 0; + int int_reg = 0; int flt_reg = 0; - - // Now do the signature layout + int slot = 0; + for (int i = 0; i < total_args_passed; i++) { switch (sig_bt[i]) { case T_INT: @@ -461,11 +406,14 @@ Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); regs[i].set1(r->as_VMReg()); } else { - regs[i].set1(VMRegImpl::stack2reg(stk_reg++)); + regs[i].set1(VMRegImpl::stack2reg(slot++)); } break; #ifdef _LP64 + case T_LONG: + assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); + // fall-through case T_OBJECT: case T_ARRAY: case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address @@ -473,78 +421,57 @@ Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); regs[i].set2(r->as_VMReg()); } else { - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); - stk_reg_pairs += 2; + slot = round_to(slot, 2); // align + regs[i].set2(VMRegImpl::stack2reg(slot)); + slot += 2; } break; -#endif // _LP64 - +#else case T_LONG: assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); -#ifdef _LP64 - if (int_reg < int_reg_max) { - Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); - regs[i].set2(r->as_VMReg()); - } else { - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); - stk_reg_pairs += 2; - } -#else -#ifdef COMPILER2 - // For 32-bit build, can't pass longs in O-regs because they become - // I-regs and get trashed. Use G-regs instead. G1 and G4 are almost - // spare and available. This convention isn't used by the Sparc ABI or - // anywhere else. If we're tiered then we don't use G-regs because c1 - // can't deal with them as a "pair". (Tiered makes this code think g's are filled) - // G0: zero - // G1: 1st Long arg - // G2: global allocated to TLS - // G3: used in inline cache check - // G4: 2nd Long arg - // G5: used in inline cache check - // G6: used by OS - // G7: used by OS - - if (g_reg == G1) { - regs[i].set2(G1->as_VMReg()); // This long arg in G1 - g_reg = G4; // Where the next arg goes - } else if (g_reg == G4) { - regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4 - g_reg = noreg; // No more longs in registers - } else { - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); - stk_reg_pairs += 2; - } -#else // COMPILER2 - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); - stk_reg_pairs += 2; -#endif // COMPILER2 -#endif // _LP64 + // On 32-bit SPARC put longs always on the stack to keep the pressure off + // integer argument registers. They should be used for oops. + slot = round_to(slot, 2); // align + regs[i].set2(VMRegImpl::stack2reg(slot)); + slot += 2; +#endif break; case T_FLOAT: - if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg()); - else regs[i].set1(VMRegImpl::stack2reg(stk_reg++)); + if (flt_reg < flt_reg_max) { + FloatRegister r = as_FloatRegister(flt_reg++); + regs[i].set1(r->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(slot++)); + } break; + case T_DOUBLE: assert(sig_bt[i+1] == T_VOID, "expecting half"); - if (flt_reg_pairs + 1 < flt_reg_max) { - regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg()); - flt_reg_pairs += 2; + if (round_to(flt_reg, 2) + 1 < flt_reg_max) { + flt_reg = round_to(flt_reg, 2); // align + FloatRegister r = as_FloatRegister(flt_reg); + regs[i].set2(r->as_VMReg()); + flt_reg += 2; } else { - regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); - stk_reg_pairs += 2; + slot = round_to(slot, 2); // align + regs[i].set2(VMRegImpl::stack2reg(slot)); + slot += 2; } break; - case T_VOID: regs[i].set_bad(); break; // Halves of longs & doubles + + case T_VOID: + regs[i].set_bad(); // Halves of longs & doubles + break; + default: - ShouldNotReachHere(); + fatal(err_msg_res("unknown basic type %d", sig_bt[i])); + break; } } // retun the amount of stack space these arguments will need. - return stk_reg_pairs; - + return slot; } // Helper class mostly to avoid passing masm everywhere, and handle @@ -601,8 +528,7 @@ Label L; __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); __ br_null(G3_scratch, false, Assembler::pt, L); - // Schedule the branch target address early. - __ delayed()->ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); + __ delayed()->nop(); // Call into the VM to patch the caller, then jump to compiled callee __ save_frame(4); // Args in compiled layout; do not blow them @@ -645,7 +571,6 @@ __ ldx(FP, -8 + STACK_BIAS, G1); __ ldx(FP, -16 + STACK_BIAS, G4); __ mov(L5, G5_method); - __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); #endif /* _LP64 */ __ restore(); // Restore args @@ -726,7 +651,7 @@ int comp_args_on_stack, // VMRegStackSlots const BasicType *sig_bt, const VMRegPair *regs, - Label& skip_fixup) { + Label& L_skip_fixup) { // Before we get into the guts of the C2I adapter, see if we should be here // at all. We've come from compiled code and are attempting to jump to the @@ -747,7 +672,7 @@ patch_callers_callsite(); - __ bind(skip_fixup); + __ bind(L_skip_fixup); // Since all args are passed on the stack, total_args_passed*wordSize is the // space we need. Add in varargs area needed by the interpreter. Round up @@ -757,46 +682,18 @@ (frame::varargs_offset - frame::register_save_words)*wordSize; const int extraspace = round_to(arg_size + varargs_area, 2*wordSize); - int bias = STACK_BIAS; + const int bias = STACK_BIAS; const int interp_arg_offset = frame::varargs_offset*wordSize + (total_args_passed-1)*Interpreter::stackElementSize; - Register base = SP; - -#ifdef _LP64 - // In the 64bit build because of wider slots and STACKBIAS we can run - // out of bits in the displacement to do loads and stores. Use g3 as - // temporary displacement. - if (!Assembler::is_simm13(extraspace)) { - __ set(extraspace, G3_scratch); - __ sub(SP, G3_scratch, SP); - } else { - __ sub(SP, extraspace, SP); - } + const Register base = SP; + + // Make some extra space on the stack. + __ sub(SP, __ ensure_simm13_or_reg(extraspace, G3_scratch), SP); set_Rdisp(G3_scratch); -#else - __ sub(SP, extraspace, SP); -#endif // _LP64 - - // First write G1 (if used) to where ever it must go - for (int i=0; ias_VMReg()) { - if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { - store_c2i_object(G1_scratch, base, st_off); - } else if (sig_bt[i] == T_LONG) { - assert(!TieredCompilation, "should not use register args for longs"); - store_c2i_long(G1_scratch, base, st_off, false); - } else { - store_c2i_int(G1_scratch, base, st_off); - } - } - } - - // Now write the args into the outgoing interpreter space - for (int i=0; iis_valid(), ""); continue; } - // Skip G1 if found as we did it first in order to free it up - if (r_1 == G1_scratch->as_VMReg()) { - continue; - } -#ifdef ASSERT - bool G1_forced = false; -#endif // ASSERT if (r_1->is_stack()) { // Pretend stack targets are loaded into G1 -#ifdef _LP64 - Register ld_off = Rdisp; - __ set(reg2offset(r_1) + extraspace + bias, ld_off); -#else - int ld_off = reg2offset(r_1) + extraspace + bias; -#endif // _LP64 -#ifdef ASSERT - G1_forced = true; -#endif // ASSERT + RegisterOrConstant ld_off = reg2offset(r_1) + extraspace + bias; + ld_off = __ ensure_simm13_or_reg(ld_off, Rdisp); r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch); else __ ldx(base, ld_off, G1_scratch); @@ -831,11 +714,6 @@ if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { store_c2i_object(r, base, st_off); } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { -#ifndef _LP64 - if (TieredCompilation) { - assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs"); - } -#endif // _LP64 store_c2i_long(r, base, st_off, r_2->is_stack()); } else { store_c2i_int(r, base, st_off); @@ -851,19 +729,12 @@ } } -#ifdef _LP64 - // Need to reload G3_scratch, used for temporary displacements. + // Load the interpreter entry point. __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); // Pass O5_savedSP as an argument to the interpreter. // The interpreter will restore SP to this value before returning. - __ set(extraspace, G1); - __ add(SP, G1, O5_savedSP); -#else - // Pass O5_savedSP as an argument to the interpreter. - // The interpreter will restore SP to this value before returning. - __ add(SP, extraspace, O5_savedSP); -#endif // _LP64 + __ add(SP, __ ensure_simm13_or_reg(extraspace, G1), O5_savedSP); __ mov((frame::varargs_offset)*wordSize - 1*Interpreter::stackElementSize+bias+BytesPerWord, G1); @@ -971,7 +842,6 @@ // Outputs: // G2_thread - TLS - // G1, G4 - Outgoing long args in 32-bit build // O0-O5 - Outgoing args in compiled layout // O6 - Adjusted or restored SP // O7 - Valid return address @@ -1016,10 +886,10 @@ // +--------------+ <--- start of outgoing args // | pad, align | | // +--------------+ | - // | ints, floats | |---Outgoing stack args, packed low. - // +--------------+ | First few args in registers. - // : doubles : | - // | longs | | + // | ints, longs, | | + // | floats, | |---Outgoing stack args. + // : doubles : | First few args in registers. + // | | | // +--------------+ <--- SP' + 16*wordsize // | | // : window : @@ -1033,7 +903,6 @@ // Cut-out for having no stack args. Since up to 6 args are passed // in registers, we will commonly have no stack args. if (comp_args_on_stack > 0) { - // Convert VMReg stack slots to words. int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; // Round up to miminum stack alignment, in wordSize @@ -1044,13 +913,9 @@ __ sub(SP, (comp_words_on_stack)*wordSize, SP); } - // Will jump to the compiled code just as if compiled code was doing it. - // Pre-load the register-jump target early, to schedule it better. - __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3); - // Now generate the shuffle code. Pick up all register args and move the // rest through G1_scratch. - for (int i=0; iis_FloatRegister(), ""); if (!r_2->is_valid()) { - __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); + __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); } else { #ifdef _LP64 // In V9, doubles are given 2 64-bit slots in the interpreter, but the @@ -1104,11 +968,11 @@ // spare float register. RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? next_arg_slot(ld_off) : arg_slot(ld_off); - __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); + __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); #else // Need to marshal 64-bit value from misaligned Lesp loads __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister()); - __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister()); + __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister()); #endif } } @@ -1124,76 +988,35 @@ else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot); } } - bool made_space = false; -#ifndef _LP64 - // May need to pick up a few long args in G1/G4 - bool g4_crushed = false; - bool g3_crushed = false; - for (int i=0; iis_Register() && regs[i].second()->is_valid()) { - // Load in argument order going down - int ld_off = (total_args_passed-i)*Interpreter::stackElementSize; - // Need to marshal 64-bit value from misaligned Lesp loads - Register r = regs[i].first()->as_Register()->after_restore(); - if (r == G1 || r == G4) { - assert(!g4_crushed, "ordering problem"); - if (r == G4){ - g4_crushed = true; - __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits - __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits - } else { - // better schedule this way - __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits - __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits - } - g3_crushed = true; - __ sllx(r, 32, r); - __ or3(G3_scratch, r, r); - } else { - assert(r->is_out(), "longs passed in two O registers"); - __ ld (Gargs, arg_slot(ld_off) , r->successor()); // Load lo bits - __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits - } - } - } -#endif // Jump to the compiled code just as if compiled code was doing it. - // -#ifndef _LP64 - if (g3_crushed) { - // Rats load was wasted, at least it is in cache... - __ ld_ptr(G5_method, Method::from_compiled_offset(), G3); - } -#endif /* _LP64 */ - - // 6243940 We might end up in handle_wrong_method if - // the callee is deoptimized as we race thru here. If that - // happens we don't want to take a safepoint because the - // caller frame will look interpreted and arguments are now - // "compiled" so it is much better to make this transition - // invisible to the stack walking code. Unfortunately if - // we try and find the callee by normal means a safepoint - // is possible. So we stash the desired callee in the thread - // and the vm will find there should this case occur. - Address callee_target_addr(G2_thread, JavaThread::callee_target_offset()); - __ st_ptr(G5_method, callee_target_addr); - - if (StressNonEntrant) { - // Open a big window for deopt failure - __ save_frame(0); - __ mov(G0, L0); - Label loop; - __ bind(loop); - __ sub(L0, 1, L0); - __ br_null_short(L0, Assembler::pt, loop); - - __ restore(); - } - - - __ jmpl(G3, 0, G0); - __ delayed()->nop(); + __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3); + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. + Address callee_target_addr(G2_thread, JavaThread::callee_target_offset()); + __ st_ptr(G5_method, callee_target_addr); + + if (StressNonEntrant) { + // Open a big window for deopt failure + __ save_frame(0); + __ mov(G0, L0); + Label loop; + __ bind(loop); + __ sub(L0, 1, L0); + __ br_null_short(L0, Assembler::pt, loop); + __ restore(); + } + + __ jmpl(G3, 0, G0); + __ delayed()->nop(); } // --------------------------------------------------------------- @@ -1221,28 +1044,17 @@ // compiled code, which relys solely on SP and not FP, get sick). address c2i_unverified_entry = __ pc(); - Label skip_fixup; + Label L_skip_fixup; { -#if !defined(_LP64) && defined(COMPILER2) - Register R_temp = L0; // another scratch register -#else - Register R_temp = G1; // another scratch register -#endif + Register R_temp = G1; // another scratch register AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); __ verify_oop(O0); __ load_klass(O0, G3_scratch); -#if !defined(_LP64) && defined(COMPILER2) - __ save(SP, -frame::register_save_words*wordSize, SP); __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp); __ cmp(G3_scratch, R_temp); - __ restore(); -#else - __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp); - __ cmp(G3_scratch, R_temp); -#endif Label ok, ok2; __ brx(Assembler::equal, false, Assembler::pt, ok); @@ -1256,8 +1068,8 @@ // the call site corrected. __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); __ bind(ok2); - __ br_null(G3_scratch, false, Assembler::pt, skip_fixup); - __ delayed()->ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); + __ br_null(G3_scratch, false, Assembler::pt, L_skip_fixup); + __ delayed()->nop(); __ jump_to(ic_miss, G3_scratch); __ delayed()->nop(); @@ -1265,7 +1077,7 @@ address c2i_entry = __ pc(); - agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup); __ flush(); return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); @@ -1985,12 +1797,12 @@ } static void verify_oop_args(MacroAssembler* masm, - int total_args_passed, + methodHandle method, const BasicType* sig_bt, const VMRegPair* regs) { Register temp_reg = G5_method; // not part of any compiled calling seq if (VerifyOops) { - for (int i = 0; i < total_args_passed; i++) { + for (int i = 0; i < method->size_of_parameters(); i++) { if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { VMReg r = regs[i].first(); @@ -2009,35 +1821,32 @@ } static void gen_special_dispatch(MacroAssembler* masm, - int total_args_passed, - int comp_args_on_stack, - vmIntrinsics::ID special_dispatch, + methodHandle method, const BasicType* sig_bt, const VMRegPair* regs) { - verify_oop_args(masm, total_args_passed, sig_bt, regs); + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); // Now write the args into the outgoing interpreter space bool has_receiver = false; Register receiver_reg = noreg; int member_arg_pos = -1; Register member_reg = noreg; - int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch); + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); if (ref_kind != 0) { - member_arg_pos = total_args_passed - 1; // trailing MemberName argument + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument member_reg = G5_method; // known to be free at this point has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); - } else if (special_dispatch == vmIntrinsics::_invokeBasic) { + } else if (iid == vmIntrinsics::_invokeBasic) { has_receiver = true; } else { - fatal(err_msg("special_dispatch=%d", special_dispatch)); + fatal(err_msg_res("unexpected intrinsic id %d", iid)); } if (member_reg != noreg) { // Load the member_arg into register, if necessary. - assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); - assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); VMReg r = regs[member_arg_pos].first(); - assert(r->is_valid(), "bad member arg"); if (r->is_stack()) { RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); @@ -2050,7 +1859,7 @@ if (has_receiver) { // Make sure the receiver is loaded into a register. - assert(total_args_passed > 0, "oob"); + assert(method->size_of_parameters() > 0, "oob"); assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); VMReg r = regs[0].first(); assert(r->is_valid(), "bad receiver arg"); @@ -2058,7 +1867,7 @@ // Porting note: This assumes that compiled calling conventions always // pass the receiver oop in a register. If this is not true on some // platform, pick a temp and load the receiver from stack. - assert(false, "receiver always in a register"); + fatal("receiver always in a register"); receiver_reg = G3_scratch; // known to be free at this point RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); @@ -2070,7 +1879,7 @@ } // Figure out which address we are really jumping to: - MethodHandles::generate_method_handle_dispatch(masm, special_dispatch, + MethodHandles::generate_method_handle_dispatch(masm, iid, receiver_reg, member_reg, /*for_compiler_entry:*/ true); } @@ -2103,11 +1912,9 @@ // transition back to thread_in_Java // return to caller // -nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, methodHandle method, int compile_id, - int total_in_args, - int comp_args_on_stack, // in VMRegStackSlots BasicType* in_sig_bt, VMRegPair* in_regs, BasicType ret_type) { @@ -2116,9 +1923,7 @@ intptr_t start = (intptr_t)__ pc(); int vep_offset = ((intptr_t)__ pc()) - start; gen_special_dispatch(masm, - total_in_args, - comp_args_on_stack, - method->intrinsic_id(), + method, in_sig_bt, in_regs); int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period @@ -2220,6 +2025,7 @@ // we convert the java signature to a C signature by inserting // the hidden arguments as arg[0] and possibly arg[1] (static method) + const int total_in_args = method->size_of_parameters(); int total_c_args = total_in_args; int total_save_slots = 6 * VMRegImpl::slots_per_word; if (!is_critical_native) { @@ -3936,7 +3742,7 @@ // the 64-bit %o's, then do a save, then fixup the caller's SP (our FP). // Tricky, tricky, tricky... -SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) { +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); // allocate space for the code @@ -3954,6 +3760,7 @@ int start = __ offset(); + bool cause_return = (poll_type == POLL_AT_RETURN); // If this causes a return before the processing, then do a "restore" if (cause_return) { __ restore(); diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Mon Sep 24 14:46:06 2012 -0700 @@ -1838,6 +1838,12 @@ case Op_PopCountL: if (!UsePopCountInstruction) return false; + case Op_CompareAndSwapL: +#ifdef _LP64 + case Op_CompareAndSwapP: +#endif + if (!VM_Version::supports_cx8()) + return false; break; } @@ -7199,6 +7205,7 @@ // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them instruct compareAndSwapL_bool(iRegP mem_ptr, iRegL oldval, iRegL newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{ + predicate(VM_Version::supports_cx8()); match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); effect( USE mem_ptr, KILL ccr, KILL tmp1); format %{ @@ -7230,6 +7237,9 @@ %} instruct compareAndSwapP_bool(iRegP mem_ptr, iRegP oldval, iRegP newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{ +#ifdef _LP64 + predicate(VM_Version::supports_cx8()); +#endif match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); effect( USE mem_ptr, KILL ccr, KILL tmp1); format %{ @@ -7264,6 +7274,38 @@ ins_pipe( long_memory_op ); %} +instruct xchgI( memory mem, iRegI newval) %{ + match(Set newval (GetAndSetI mem newval)); + format %{ "SWAP [$mem],$newval" %} + size(4); + ins_encode %{ + __ swap($mem$$Address, $newval$$Register); + %} + ins_pipe( long_memory_op ); +%} + +#ifndef _LP64 +instruct xchgP( memory mem, iRegP newval) %{ + match(Set newval (GetAndSetP mem newval)); + format %{ "SWAP [$mem],$newval" %} + size(4); + ins_encode %{ + __ swap($mem$$Address, $newval$$Register); + %} + ins_pipe( long_memory_op ); +%} +#endif + +instruct xchgN( memory mem, iRegN newval) %{ + match(Set newval (GetAndSetN mem newval)); + format %{ "SWAP [$mem],$newval" %} + size(4); + ins_encode %{ + __ swap($mem$$Address, $newval$$Register); + %} + ins_pipe( long_memory_op ); +%} + //--------------------- // Subtraction Instructions // Register Subtraction diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -96,6 +96,7 @@ UseSSE = 0; // Only on x86 and x64 _supports_cx8 = has_v9(); + _supports_atomic_getset4 = true; // swap instruction if (is_niagara()) { // Indirect branch is the same cost as direct @@ -338,7 +339,11 @@ unsigned int VM_Version::calc_parallel_worker_threads() { unsigned int result; - if (is_niagara_plus()) { + if (is_M_series()) { + // for now, use same gc thread calculation for M-series as for niagara-plus + // in future, we may want to tweak parameters for nof_parallel_worker_thread + result = nof_parallel_worker_threads(5, 16, 8); + } else if (is_niagara_plus()) { result = nof_parallel_worker_threads(5, 16, 8); } else { result = nof_parallel_worker_threads(5, 8, 8); diff -r 04ed664b7e30 -r c92f43386117 src/cpu/sparc/vm/vm_version_sparc.hpp --- a/src/cpu/sparc/vm/vm_version_sparc.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -124,6 +124,8 @@ // Returns true if the platform is in the niagara line (T series) // and newer than the niagara1. static bool is_niagara_plus() { return is_T_family(_features) && !is_T1_model(_features); } + + static bool is_M_series() { return is_M_family(_features); } static bool is_T4() { return is_T_family(_features) && has_cbcond(); } // Fujitsu SPARC64 diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -3496,6 +3496,33 @@ emit_byte(0x01); } +void Assembler::vinsertf128h(XMMRegister dst, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + bool vector256 = true; + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); + emit_byte(0x18); + emit_operand(dst, src); + // 0x01 - insert into upper 128 bits + emit_byte(0x01); +} + +void Assembler::vextractf128h(Address dst, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + bool vector256 = true; + assert(src != xnoreg, "sanity"); + int src_enc = src->encoding(); + vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); + emit_byte(0x19); + emit_operand(src, dst); + // 0x01 - extract from upper 128 bits + emit_byte(0x01); +} + void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { assert(VM_Version::supports_avx2(), ""); bool vector256 = true; @@ -3507,6 +3534,33 @@ emit_byte(0x01); } +void Assembler::vinserti128h(XMMRegister dst, Address src) { + assert(VM_Version::supports_avx2(), ""); + InstructionMark im(this); + bool vector256 = true; + assert(dst != xnoreg, "sanity"); + int dst_enc = dst->encoding(); + // swap src<->dst for encoding + vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); + emit_byte(0x38); + emit_operand(dst, src); + // 0x01 - insert into upper 128 bits + emit_byte(0x01); +} + +void Assembler::vextracti128h(Address dst, XMMRegister src) { + assert(VM_Version::supports_avx2(), ""); + InstructionMark im(this); + bool vector256 = true; + assert(src != xnoreg, "sanity"); + int src_enc = src->encoding(); + vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256); + emit_byte(0x39); + emit_operand(src, dst); + // 0x01 - extract from upper 128 bits + emit_byte(0x01); +} + void Assembler::vzeroupper() { assert(VM_Version::supports_avx(), ""); (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE); @@ -8907,11 +8961,9 @@ pusha(); // if we are coming from c1, xmm registers may be live - if (UseSSE >= 1) { - subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); - } int off = 0; if (UseSSE == 1) { + subptr(rsp, sizeof(jdouble)*8); movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); @@ -8921,23 +8973,50 @@ movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); } else if (UseSSE >= 2) { - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7); +#ifdef COMPILER2 + if (MaxVectorSize > 16) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + // Save upper half of YMM registes + subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); + vextractf128h(Address(rsp, 0),xmm0); + vextractf128h(Address(rsp, 16),xmm1); + vextractf128h(Address(rsp, 32),xmm2); + vextractf128h(Address(rsp, 48),xmm3); + vextractf128h(Address(rsp, 64),xmm4); + vextractf128h(Address(rsp, 80),xmm5); + vextractf128h(Address(rsp, 96),xmm6); + vextractf128h(Address(rsp,112),xmm7); #ifdef _LP64 - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14); - movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15); + vextractf128h(Address(rsp,128),xmm8); + vextractf128h(Address(rsp,144),xmm9); + vextractf128h(Address(rsp,160),xmm10); + vextractf128h(Address(rsp,176),xmm11); + vextractf128h(Address(rsp,192),xmm12); + vextractf128h(Address(rsp,208),xmm13); + vextractf128h(Address(rsp,224),xmm14); + vextractf128h(Address(rsp,240),xmm15); +#endif + } +#endif + // Save whole 128bit (16 bytes) XMM regiters + subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); + movdqu(Address(rsp,off++*16),xmm0); + movdqu(Address(rsp,off++*16),xmm1); + movdqu(Address(rsp,off++*16),xmm2); + movdqu(Address(rsp,off++*16),xmm3); + movdqu(Address(rsp,off++*16),xmm4); + movdqu(Address(rsp,off++*16),xmm5); + movdqu(Address(rsp,off++*16),xmm6); + movdqu(Address(rsp,off++*16),xmm7); +#ifdef _LP64 + movdqu(Address(rsp,off++*16),xmm8); + movdqu(Address(rsp,off++*16),xmm9); + movdqu(Address(rsp,off++*16),xmm10); + movdqu(Address(rsp,off++*16),xmm11); + movdqu(Address(rsp,off++*16),xmm12); + movdqu(Address(rsp,off++*16),xmm13); + movdqu(Address(rsp,off++*16),xmm14); + movdqu(Address(rsp,off++*16),xmm15); #endif } @@ -9015,28 +9094,52 @@ movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); + addptr(rsp, sizeof(jdouble)*8); } else if (UseSSE >= 2) { - movdbl(xmm0, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm1, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm2, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm3, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm4, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm5, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm6, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm7, Address(rsp,off++*sizeof(jdouble))); + // Restore whole 128bit (16 bytes) XMM regiters + movdqu(xmm0, Address(rsp,off++*16)); + movdqu(xmm1, Address(rsp,off++*16)); + movdqu(xmm2, Address(rsp,off++*16)); + movdqu(xmm3, Address(rsp,off++*16)); + movdqu(xmm4, Address(rsp,off++*16)); + movdqu(xmm5, Address(rsp,off++*16)); + movdqu(xmm6, Address(rsp,off++*16)); + movdqu(xmm7, Address(rsp,off++*16)); #ifdef _LP64 - movdbl(xmm8, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm9, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm10, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm11, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm12, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm13, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm14, Address(rsp,off++*sizeof(jdouble))); - movdbl(xmm15, Address(rsp,off++*sizeof(jdouble))); + movdqu(xmm8, Address(rsp,off++*16)); + movdqu(xmm9, Address(rsp,off++*16)); + movdqu(xmm10, Address(rsp,off++*16)); + movdqu(xmm11, Address(rsp,off++*16)); + movdqu(xmm12, Address(rsp,off++*16)); + movdqu(xmm13, Address(rsp,off++*16)); + movdqu(xmm14, Address(rsp,off++*16)); + movdqu(xmm15, Address(rsp,off++*16)); #endif - } - if (UseSSE >= 1) { - addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8)); + addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); +#ifdef COMPILER2 + if (MaxVectorSize > 16) { + // Restore upper half of YMM registes. + vinsertf128h(xmm0, Address(rsp, 0)); + vinsertf128h(xmm1, Address(rsp, 16)); + vinsertf128h(xmm2, Address(rsp, 32)); + vinsertf128h(xmm3, Address(rsp, 48)); + vinsertf128h(xmm4, Address(rsp, 64)); + vinsertf128h(xmm5, Address(rsp, 80)); + vinsertf128h(xmm6, Address(rsp, 96)); + vinsertf128h(xmm7, Address(rsp,112)); +#ifdef _LP64 + vinsertf128h(xmm8, Address(rsp,128)); + vinsertf128h(xmm9, Address(rsp,144)); + vinsertf128h(xmm10, Address(rsp,160)); + vinsertf128h(xmm11, Address(rsp,176)); + vinsertf128h(xmm12, Address(rsp,192)); + vinsertf128h(xmm13, Address(rsp,208)); + vinsertf128h(xmm14, Address(rsp,224)); + vinsertf128h(xmm15, Address(rsp,240)); +#endif + addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); + } +#endif } popa(); } diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/assembler_x86.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -1743,6 +1743,12 @@ void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src); void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src); + // Load/store high 128bit of YMM registers which does not destroy other half. + void vinsertf128h(XMMRegister dst, Address src); + void vinserti128h(XMMRegister dst, Address src); + void vextractf128h(Address dst, XMMRegister src); + void vextracti128h(Address dst, XMMRegister src); + // AVX instruction which is used to clear upper 128 bits of YMM registers and // to avoid transaction penalty between AVX and SSE states. There is no // penalty if legacy SSE instructions are encoded using VEX prefix because diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -3794,5 +3794,49 @@ // do nothing for now } +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) { + assert(data == dest, "xchg/xadd uses only 2 operands"); + + if (data->type() == T_INT) { + if (code == lir_xadd) { + if (os::is_MP()) { + __ lock(); + } + __ xaddl(as_Address(src->as_address_ptr()), data->as_register()); + } else { + __ xchgl(data->as_register(), as_Address(src->as_address_ptr())); + } + } else if (data->is_oop()) { + assert (code == lir_xchg, "xadd for oops"); + Register obj = data->as_register(); +#ifdef _LP64 + if (UseCompressedOops) { + __ encode_heap_oop(obj); + __ xchgl(obj, as_Address(src->as_address_ptr())); + __ decode_heap_oop(obj); + } else { + __ xchgptr(obj, as_Address(src->as_address_ptr())); + } +#else + __ xchgl(obj, as_Address(src->as_address_ptr())); +#endif + } else if (data->type() == T_LONG) { +#ifdef _LP64 + assert(data->as_register_lo() == data->as_register_hi(), "should be a single register"); + if (code == lir_xadd) { + if (os::is_MP()) { + __ lock(); + } + __ xaddq(as_Address(src->as_address_ptr()), data->as_register_lo()); + } else { + __ xchgq(data->as_register_lo(), as_Address(src->as_address_ptr())); + } +#else + ShouldNotReachHere(); +#endif + } else { + ShouldNotReachHere(); + } +} #undef __ diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -753,9 +753,24 @@ LIR_Opr addr = new_pointer_register(); LIR_Address* a; if(offset.result()->is_constant()) { +#ifdef _LP64 + jlong c = offset.result()->as_jlong(); + if ((jlong)((jint)c) == c) { + a = new LIR_Address(obj.result(), + (jint)c, + as_BasicType(type)); + } else { + LIR_Opr tmp = new_register(T_LONG); + __ move(offset.result(), tmp); + a = new LIR_Address(obj.result(), + tmp, + as_BasicType(type)); + } +#else a = new LIR_Address(obj.result(), - NOT_LP64(offset.result()->as_constant_ptr()->as_jint()) LP64_ONLY((int)offset.result()->as_constant_ptr()->as_jlong()), + offset.result()->as_jint(), as_BasicType(type)); +#endif } else { a = new LIR_Address(obj.result(), offset.result(), @@ -1345,3 +1360,57 @@ } } } + +void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { + BasicType type = x->basic_type(); + LIRItem src(x->object(), this); + LIRItem off(x->offset(), this); + LIRItem value(x->value(), this); + + src.load_item(); + value.load_item(); + off.load_nonconstant(); + + LIR_Opr dst = rlock_result(x, type); + LIR_Opr data = value.result(); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + LIR_Opr offset = off.result(); + + assert (type == T_INT || (!x->is_add() && is_obj) LP64_ONLY( || type == T_LONG ), "unexpected type"); + LIR_Address* addr; + if (offset->is_constant()) { +#ifdef _LP64 + jlong c = offset->as_jlong(); + if ((jlong)((jint)c) == c) { + addr = new LIR_Address(src.result(), (jint)c, type); + } else { + LIR_Opr tmp = new_register(T_LONG); + __ move(offset, tmp); + addr = new LIR_Address(src.result(), tmp, type); + } +#else + addr = new LIR_Address(src.result(), offset->as_jint(), type); +#endif + } else { + addr = new LIR_Address(src.result(), offset, type); + } + + if (data != dst) { + __ move(data, dst); + data = dst; + } + if (x->is_add()) { + __ xadd(LIR_OprFact::address(addr), data, dst, LIR_OprFact::illegalOpr); + } else { + if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + __ xchg(LIR_OprFact::address(addr), data, dst, LIR_OprFact::illegalOpr); + if (is_obj) { + // Seems to be a precise address + post_barrier(LIR_OprFact::address(addr), data); + } + } +} diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/methodHandles_x86.cpp --- a/src/cpu/x86/vm/methodHandles_x86.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -327,10 +327,11 @@ assert_different_registers(temp3, rcx, rdx); } #endif + else { + assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP + } assert_different_registers(temp1, temp2, temp3, receiver_reg); assert_different_registers(temp1, temp2, temp3, member_reg); - if (!for_compiler_entry) - assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP if (iid == vmIntrinsics::_invokeBasic) { // indirect through MH.form.vmentry.vmtarget @@ -392,14 +393,13 @@ // rsi/r13 - interpreter linkage (if interpreted) // rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled) - bool method_is_live = false; + Label L_incompatible_class_change_error; switch (iid) { case vmIntrinsics::_linkToSpecial: if (VerifyMethodHandles) { verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); } __ movptr(rbx_method, member_vmtarget); - method_is_live = true; break; case vmIntrinsics::_linkToStatic: @@ -407,7 +407,6 @@ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); } __ movptr(rbx_method, member_vmtarget); - method_is_live = true; break; case vmIntrinsics::_linkToVirtual: @@ -436,7 +435,6 @@ // get target Method* & entry point __ lookup_virtual_method(temp1_recv_klass, temp2_index, rbx_method); - method_is_live = true; break; } @@ -464,35 +462,32 @@ } // given intf, index, and recv klass, dispatch to the implementation method - Label L_no_such_interface; __ lookup_interface_method(temp1_recv_klass, temp3_intf, // note: next two args must be the same: rbx_index, rbx_method, temp2, - L_no_such_interface); - - __ verify_method_ptr(rbx_method); - jump_from_method_handle(_masm, rbx_method, temp2, for_compiler_entry); - __ hlt(); - - __ bind(L_no_such_interface); - __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + L_incompatible_class_change_error); break; } default: - fatal(err_msg("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); break; } - if (method_is_live) { - // live at this point: rbx_method, rsi/r13 (if interpreted) + // Live at this point: + // rbx_method + // rsi/r13 (if interpreted) - // After figuring out which concrete method to call, jump into it. - // Note that this works in the interpreter with no data motion. - // But the compiled version will require that rcx_recv be shifted out. - __ verify_method_ptr(rbx_method); - jump_from_method_handle(_masm, rbx_method, temp1, for_compiler_entry); + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that rcx_recv be shifted out. + __ verify_method_ptr(rbx_method); + jump_from_method_handle(_masm, rbx_method, temp1, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); } } } diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/sharedRuntime_x86_32.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -46,11 +46,11 @@ const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; class RegisterSaver { - enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ }; // Capture info about frame layout +#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off enum layout { fpu_state_off = 0, - fpu_state_end = fpu_state_off+FPUStateSizeInWords-1, + fpu_state_end = fpu_state_off+FPUStateSizeInWords, st0_off, st0H_off, st1_off, st1H_off, st2_off, st2H_off, @@ -59,16 +59,16 @@ st5_off, st5H_off, st6_off, st6H_off, st7_off, st7H_off, - - xmm0_off, xmm0H_off, - xmm1_off, xmm1H_off, - xmm2_off, xmm2H_off, - xmm3_off, xmm3H_off, - xmm4_off, xmm4H_off, - xmm5_off, xmm5H_off, - xmm6_off, xmm6H_off, - xmm7_off, xmm7H_off, - flags_off, + xmm_off, + DEF_XMM_OFFS(0), + DEF_XMM_OFFS(1), + DEF_XMM_OFFS(2), + DEF_XMM_OFFS(3), + DEF_XMM_OFFS(4), + DEF_XMM_OFFS(5), + DEF_XMM_OFFS(6), + DEF_XMM_OFFS(7), + flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word rdi_off, rsi_off, ignore_off, // extra copy of rbp, @@ -83,13 +83,13 @@ rbp_off, return_off, // slot for return address reg_save_size }; - + enum { FPU_regs_live = flags_off - fpu_state_end }; public: static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, - int* total_frame_words, bool verify_fpu = true); - static void restore_live_registers(MacroAssembler* masm); + int* total_frame_words, bool verify_fpu = true, bool save_vectors = false); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); static int rax_offset() { return rax_off; } static int rbx_offset() { return rbx_off; } @@ -113,9 +113,20 @@ }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, - int* total_frame_words, bool verify_fpu) { - - int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize; + int* total_frame_words, bool verify_fpu, bool save_vectors) { + int vect_words = 0; +#ifdef COMPILER2 + if (save_vectors) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + // Save upper half of YMM registes + vect_words = 8 * 16 / wordSize; + additional_frame_words += vect_words; + } +#else + assert(!save_vectors, "vectors are generated only by C2"); +#endif + int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize; int frame_words = frame_size_in_bytes / wordSize; *total_frame_words = frame_words; @@ -129,7 +140,7 @@ __ enter(); __ pusha(); __ pushf(); - __ subptr(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space + __ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space __ push_FPU_state(); // Save FPU state & init if (verify_fpu) { @@ -183,14 +194,28 @@ __ movflt(Address(rsp,xmm6_off*wordSize),xmm6); __ movflt(Address(rsp,xmm7_off*wordSize),xmm7); } else if( UseSSE >= 2 ) { - __ movdbl(Address(rsp,xmm0_off*wordSize),xmm0); - __ movdbl(Address(rsp,xmm1_off*wordSize),xmm1); - __ movdbl(Address(rsp,xmm2_off*wordSize),xmm2); - __ movdbl(Address(rsp,xmm3_off*wordSize),xmm3); - __ movdbl(Address(rsp,xmm4_off*wordSize),xmm4); - __ movdbl(Address(rsp,xmm5_off*wordSize),xmm5); - __ movdbl(Address(rsp,xmm6_off*wordSize),xmm6); - __ movdbl(Address(rsp,xmm7_off*wordSize),xmm7); + // Save whole 128bit (16 bytes) XMM regiters + __ movdqu(Address(rsp,xmm0_off*wordSize),xmm0); + __ movdqu(Address(rsp,xmm1_off*wordSize),xmm1); + __ movdqu(Address(rsp,xmm2_off*wordSize),xmm2); + __ movdqu(Address(rsp,xmm3_off*wordSize),xmm3); + __ movdqu(Address(rsp,xmm4_off*wordSize),xmm4); + __ movdqu(Address(rsp,xmm5_off*wordSize),xmm5); + __ movdqu(Address(rsp,xmm6_off*wordSize),xmm6); + __ movdqu(Address(rsp,xmm7_off*wordSize),xmm7); + } + + if (vect_words > 0) { + assert(vect_words*wordSize == 128, ""); + __ subptr(rsp, 128); // Save upper half of YMM registes + __ vextractf128h(Address(rsp, 0),xmm0); + __ vextractf128h(Address(rsp, 16),xmm1); + __ vextractf128h(Address(rsp, 32),xmm2); + __ vextractf128h(Address(rsp, 48),xmm3); + __ vextractf128h(Address(rsp, 64),xmm4); + __ vextractf128h(Address(rsp, 80),xmm5); + __ vextractf128h(Address(rsp, 96),xmm6); + __ vextractf128h(Address(rsp,112),xmm7); } // Set an oopmap for the call site. This oopmap will map all @@ -253,10 +278,20 @@ } -void RegisterSaver::restore_live_registers(MacroAssembler* masm) { - +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { // Recover XMM & FPU state - if( UseSSE == 1 ) { + int additional_frame_bytes = 0; +#ifdef COMPILER2 + if (restore_vectors) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + additional_frame_bytes = 128; + } +#else + assert(!restore_vectors, "vectors are generated only by C2"); +#endif + if (UseSSE == 1) { + assert(additional_frame_bytes == 0, ""); __ movflt(xmm0,Address(rsp,xmm0_off*wordSize)); __ movflt(xmm1,Address(rsp,xmm1_off*wordSize)); __ movflt(xmm2,Address(rsp,xmm2_off*wordSize)); @@ -265,18 +300,33 @@ __ movflt(xmm5,Address(rsp,xmm5_off*wordSize)); __ movflt(xmm6,Address(rsp,xmm6_off*wordSize)); __ movflt(xmm7,Address(rsp,xmm7_off*wordSize)); - } else if( UseSSE >= 2 ) { - __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize)); - __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize)); - __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize)); - __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize)); - __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize)); - __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize)); - __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize)); - __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize)); + } else if (UseSSE >= 2) { +#define STACK_ADDRESS(x) Address(rsp,(x)*wordSize + additional_frame_bytes) + __ movdqu(xmm0,STACK_ADDRESS(xmm0_off)); + __ movdqu(xmm1,STACK_ADDRESS(xmm1_off)); + __ movdqu(xmm2,STACK_ADDRESS(xmm2_off)); + __ movdqu(xmm3,STACK_ADDRESS(xmm3_off)); + __ movdqu(xmm4,STACK_ADDRESS(xmm4_off)); + __ movdqu(xmm5,STACK_ADDRESS(xmm5_off)); + __ movdqu(xmm6,STACK_ADDRESS(xmm6_off)); + __ movdqu(xmm7,STACK_ADDRESS(xmm7_off)); +#undef STACK_ADDRESS + } + if (restore_vectors) { + // Restore upper half of YMM registes. + assert(additional_frame_bytes == 128, ""); + __ vinsertf128h(xmm0, Address(rsp, 0)); + __ vinsertf128h(xmm1, Address(rsp, 16)); + __ vinsertf128h(xmm2, Address(rsp, 32)); + __ vinsertf128h(xmm3, Address(rsp, 48)); + __ vinsertf128h(xmm4, Address(rsp, 64)); + __ vinsertf128h(xmm5, Address(rsp, 80)); + __ vinsertf128h(xmm6, Address(rsp, 96)); + __ vinsertf128h(xmm7, Address(rsp,112)); + __ addptr(rsp, additional_frame_bytes); } __ pop_FPU_state(); - __ addptr(rsp, FPU_regs_live*sizeof(jdouble)); // Pop FPU registers + __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers __ popf(); __ popa(); @@ -308,6 +358,13 @@ __ addptr(rsp, return_off * wordSize); } +// Is vector's size (in bytes) bigger than a size saved by default? +// 16 bytes XMM registers are saved by default using SSE2 movdqu instructions. +// Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated. +bool SharedRuntime::is_wide_vector(int size) { + return size > 16; +} + // The java_calling_convention describes stack locations as ideal slots on // a frame with no abi restrictions. Since we must observe abi restrictions // (like the placement of the register window) the slots must be biased by @@ -1346,12 +1403,12 @@ } static void verify_oop_args(MacroAssembler* masm, - int total_args_passed, + methodHandle method, const BasicType* sig_bt, const VMRegPair* regs) { Register temp_reg = rbx; // not part of any compiled calling seq if (VerifyOops) { - for (int i = 0; i < total_args_passed; i++) { + for (int i = 0; i < method->size_of_parameters(); i++) { if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { VMReg r = regs[i].first(); @@ -1368,35 +1425,32 @@ } static void gen_special_dispatch(MacroAssembler* masm, - int total_args_passed, - int comp_args_on_stack, - vmIntrinsics::ID special_dispatch, + methodHandle method, const BasicType* sig_bt, const VMRegPair* regs) { - verify_oop_args(masm, total_args_passed, sig_bt, regs); + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); // Now write the args into the outgoing interpreter space bool has_receiver = false; Register receiver_reg = noreg; int member_arg_pos = -1; Register member_reg = noreg; - int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch); + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); if (ref_kind != 0) { - member_arg_pos = total_args_passed - 1; // trailing MemberName argument + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument member_reg = rbx; // known to be free at this point has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); - } else if (special_dispatch == vmIntrinsics::_invokeBasic) { + } else if (iid == vmIntrinsics::_invokeBasic) { has_receiver = true; } else { - guarantee(false, err_msg("special_dispatch=%d", special_dispatch)); + fatal(err_msg_res("unexpected intrinsic id %d", iid)); } if (member_reg != noreg) { // Load the member_arg into register, if necessary. - assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); - assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); VMReg r = regs[member_arg_pos].first(); - assert(r->is_valid(), "bad member arg"); if (r->is_stack()) { __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); } else { @@ -1407,7 +1461,7 @@ if (has_receiver) { // Make sure the receiver is loaded into a register. - assert(total_args_passed > 0, "oob"); + assert(method->size_of_parameters() > 0, "oob"); assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); VMReg r = regs[0].first(); assert(r->is_valid(), "bad receiver arg"); @@ -1415,7 +1469,7 @@ // Porting note: This assumes that compiled calling conventions always // pass the receiver oop in a register. If this is not true on some // platform, pick a temp and load the receiver from stack. - assert(false, "receiver always in a register"); + fatal("receiver always in a register"); receiver_reg = rcx; // known to be free at this point __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); } else { @@ -1425,7 +1479,7 @@ } // Figure out which address we are really jumping to: - MethodHandles::generate_method_handle_dispatch(masm, special_dispatch, + MethodHandles::generate_method_handle_dispatch(masm, iid, receiver_reg, member_reg, /*for_compiler_entry:*/ true); } @@ -1461,8 +1515,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, methodHandle method, int compile_id, - int total_in_args, - int comp_args_on_stack, BasicType* in_sig_bt, VMRegPair* in_regs, BasicType ret_type) { @@ -1471,9 +1523,7 @@ intptr_t start = (intptr_t)__ pc(); int vep_offset = ((intptr_t)__ pc()) - start; gen_special_dispatch(masm, - total_in_args, - comp_args_on_stack, - method->intrinsic_id(), + method, in_sig_bt, in_regs); int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period @@ -1506,6 +1556,7 @@ // we convert the java signature to a C signature by inserting // the hidden arguments as arg[0] and possibly arg[1] (static method) + const int total_in_args = method->size_of_parameters(); int total_c_args = total_in_args; if (!is_critical_native) { total_c_args += 1; @@ -2738,7 +2789,6 @@ return 0; } - //------------------------------generate_deopt_blob---------------------------- void SharedRuntime::generate_deopt_blob() { // allocate space for the code @@ -3276,7 +3326,7 @@ // setup oopmap, and calls safepoint code to stop the compiled code for // a safepoint. // -SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) { +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { // Account for thread arg in our frame const int additional_words = 1; @@ -3296,17 +3346,18 @@ const Register java_thread = rdi; // callee-saved for VC++ address start = __ pc(); address call_pc = NULL; - + bool cause_return = (poll_type == POLL_AT_RETURN); + bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); // If cause_return is true we are at a poll_return and there is // the return address on the stack to the caller on the nmethod // that is safepoint. We can leave this return on the stack and // effectively complete the return and safepoint in the caller. // Otherwise we push space for a return address that the safepoint // handler will install later to make the stack walking sensible. - if( !cause_return ) - __ push(rbx); // Make room for return address (or push it again) - - map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false); + if (!cause_return) + __ push(rbx); // Make room for return address (or push it again) + + map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors); // The following is basically a call_VM. However, we need the precise // address of the call in order to generate an oopmap. Hence, we do all the @@ -3318,7 +3369,7 @@ __ set_last_Java_frame(java_thread, noreg, noreg, NULL); // if this was not a poll_return then we need to correct the return address now. - if( !cause_return ) { + if (!cause_return) { __ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset())); __ movptr(Address(rbp, wordSize), rax); } @@ -3346,15 +3397,14 @@ __ jcc(Assembler::equal, noException); // Exception pending - - RegisterSaver::restore_live_registers(masm); + RegisterSaver::restore_live_registers(masm, save_vectors); __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); __ bind(noException); // Normal exit, register restoring and exit - RegisterSaver::restore_live_registers(masm); + RegisterSaver::restore_live_registers(masm, save_vectors); __ ret(0); diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -116,8 +116,8 @@ }; public: - static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); - static void restore_live_registers(MacroAssembler* masm); + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); // Offsets into the register save area // Used by deoptimization when it is managing result register @@ -134,7 +134,19 @@ static void restore_result_registers(MacroAssembler* masm); }; -OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { + int vect_words = 0; +#ifdef COMPILER2 + if (save_vectors) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + // Save upper half of YMM registes + vect_words = 16 * 16 / wordSize; + additional_frame_words += vect_words; + } +#else + assert(!save_vectors, "vectors are generated only by C2"); +#endif // Always make the frame size 16-byte aligned int frame_size_in_bytes = round_to(additional_frame_words*wordSize + @@ -155,6 +167,27 @@ __ enter(); // rsp becomes 16-byte aligned here __ push_CPU_state(); // Push a multiple of 16 bytes + + if (vect_words > 0) { + assert(vect_words*wordSize == 256, ""); + __ subptr(rsp, 256); // Save upper half of YMM registes + __ vextractf128h(Address(rsp, 0),xmm0); + __ vextractf128h(Address(rsp, 16),xmm1); + __ vextractf128h(Address(rsp, 32),xmm2); + __ vextractf128h(Address(rsp, 48),xmm3); + __ vextractf128h(Address(rsp, 64),xmm4); + __ vextractf128h(Address(rsp, 80),xmm5); + __ vextractf128h(Address(rsp, 96),xmm6); + __ vextractf128h(Address(rsp,112),xmm7); + __ vextractf128h(Address(rsp,128),xmm8); + __ vextractf128h(Address(rsp,144),xmm9); + __ vextractf128h(Address(rsp,160),xmm10); + __ vextractf128h(Address(rsp,176),xmm11); + __ vextractf128h(Address(rsp,192),xmm12); + __ vextractf128h(Address(rsp,208),xmm13); + __ vextractf128h(Address(rsp,224),xmm14); + __ vextractf128h(Address(rsp,240),xmm15); + } if (frame::arg_reg_save_area_bytes != 0) { // Allocate argument register save area __ subptr(rsp, frame::arg_reg_save_area_bytes); @@ -167,112 +200,111 @@ OopMapSet *oop_maps = new OopMapSet(); OopMap* map = new OopMap(frame_size_in_slots, 0); - map->set_callee_saved(VMRegImpl::stack2reg( rax_off + additional_frame_slots), rax->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( rcx_off + additional_frame_slots), rcx->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( rdx_off + additional_frame_slots), rdx->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( rbx_off + additional_frame_slots), rbx->as_VMReg()); + +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) + + map->set_callee_saved(STACK_OFFSET( rax_off ), rax->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rcx_off ), rcx->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rdx_off ), rdx->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rbx_off ), rbx->as_VMReg()); // rbp location is known implicitly by the frame sender code, needs no oopmap // and the location where rbp was saved by is ignored - map->set_callee_saved(VMRegImpl::stack2reg( rsi_off + additional_frame_slots), rsi->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( rdi_off + additional_frame_slots), rdi->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r8_off + additional_frame_slots), r8->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r9_off + additional_frame_slots), r9->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r10_off + additional_frame_slots), r10->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r11_off + additional_frame_slots), r11->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r12_off + additional_frame_slots), r12->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r13_off + additional_frame_slots), r13->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r14_off + additional_frame_slots), r14->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg( r15_off + additional_frame_slots), r15->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm0_off + additional_frame_slots), xmm0->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm1_off + additional_frame_slots), xmm1->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm2_off + additional_frame_slots), xmm2->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm3_off + additional_frame_slots), xmm3->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm4_off + additional_frame_slots), xmm4->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm5_off + additional_frame_slots), xmm5->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm6_off + additional_frame_slots), xmm6->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm7_off + additional_frame_slots), xmm7->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm8_off + additional_frame_slots), xmm8->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm9_off + additional_frame_slots), xmm9->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm10_off + additional_frame_slots), xmm10->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm11_off + additional_frame_slots), xmm11->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm12_off + additional_frame_slots), xmm12->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm13_off + additional_frame_slots), xmm13->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm14_off + additional_frame_slots), xmm14->as_VMReg()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm15_off + additional_frame_slots), xmm15->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rsi_off ), rsi->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( rdi_off ), rdi->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r8_off ), r8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r9_off ), r9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r10_off ), r10->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r11_off ), r11->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r12_off ), r12->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm3_off ), xmm3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm4_off ), xmm4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm5_off ), xmm5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm6_off ), xmm6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm7_off ), xmm7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm8_off ), xmm8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm9_off ), xmm9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm10_off), xmm10->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg()); + map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg()); // %%% These should all be a waste but we'll keep things as they were for now if (true) { - map->set_callee_saved(VMRegImpl::stack2reg( raxH_off + additional_frame_slots), - rax->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( rcxH_off + additional_frame_slots), - rcx->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( rdxH_off + additional_frame_slots), - rdx->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( rbxH_off + additional_frame_slots), - rbx->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( raxH_off ), rax->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rcxH_off ), rcx->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rdxH_off ), rdx->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rbxH_off ), rbx->as_VMReg()->next()); // rbp location is known implicitly by the frame sender code, needs no oopmap - map->set_callee_saved(VMRegImpl::stack2reg( rsiH_off + additional_frame_slots), - rsi->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( rdiH_off + additional_frame_slots), - rdi->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r8H_off + additional_frame_slots), - r8->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r9H_off + additional_frame_slots), - r9->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r10H_off + additional_frame_slots), - r10->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r11H_off + additional_frame_slots), - r11->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r12H_off + additional_frame_slots), - r12->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r13H_off + additional_frame_slots), - r13->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r14H_off + additional_frame_slots), - r14->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg( r15H_off + additional_frame_slots), - r15->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm0H_off + additional_frame_slots), - xmm0->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm1H_off + additional_frame_slots), - xmm1->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm2H_off + additional_frame_slots), - xmm2->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm3H_off + additional_frame_slots), - xmm3->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm4H_off + additional_frame_slots), - xmm4->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm5H_off + additional_frame_slots), - xmm5->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm6H_off + additional_frame_slots), - xmm6->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm7H_off + additional_frame_slots), - xmm7->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm8H_off + additional_frame_slots), - xmm8->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm9H_off + additional_frame_slots), - xmm9->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm10H_off + additional_frame_slots), - xmm10->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm11H_off + additional_frame_slots), - xmm11->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm12H_off + additional_frame_slots), - xmm12->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm13H_off + additional_frame_slots), - xmm13->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm14H_off + additional_frame_slots), - xmm14->as_VMReg()->next()); - map->set_callee_saved(VMRegImpl::stack2reg(xmm15H_off + additional_frame_slots), - xmm15->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rsiH_off ), rsi->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( rdiH_off ), rdi->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r8H_off ), r8->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r9H_off ), r9->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r10H_off ), r10->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r11H_off ), r11->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r12H_off ), r12->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm3H_off ), xmm3->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm4H_off ), xmm4->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm5H_off ), xmm5->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm6H_off ), xmm6->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm7H_off ), xmm7->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm8H_off ), xmm8->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm9H_off ), xmm9->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm10H_off), xmm10->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next()); + map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next()); } return map; } -void RegisterSaver::restore_live_registers(MacroAssembler* masm) { +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { if (frame::arg_reg_save_area_bytes != 0) { // Pop arg register save area __ addptr(rsp, frame::arg_reg_save_area_bytes); } +#ifdef COMPILER2 + if (restore_vectors) { + // Restore upper half of YMM registes. + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + assert(MaxVectorSize == 32, "only 256bit vectors are supported now"); + __ vinsertf128h(xmm0, Address(rsp, 0)); + __ vinsertf128h(xmm1, Address(rsp, 16)); + __ vinsertf128h(xmm2, Address(rsp, 32)); + __ vinsertf128h(xmm3, Address(rsp, 48)); + __ vinsertf128h(xmm4, Address(rsp, 64)); + __ vinsertf128h(xmm5, Address(rsp, 80)); + __ vinsertf128h(xmm6, Address(rsp, 96)); + __ vinsertf128h(xmm7, Address(rsp,112)); + __ vinsertf128h(xmm8, Address(rsp,128)); + __ vinsertf128h(xmm9, Address(rsp,144)); + __ vinsertf128h(xmm10, Address(rsp,160)); + __ vinsertf128h(xmm11, Address(rsp,176)); + __ vinsertf128h(xmm12, Address(rsp,192)); + __ vinsertf128h(xmm13, Address(rsp,208)); + __ vinsertf128h(xmm14, Address(rsp,224)); + __ vinsertf128h(xmm15, Address(rsp,240)); + __ addptr(rsp, 256); + } +#else + assert(!restore_vectors, "vectors are generated only by C2"); +#endif // Recover CPU state __ pop_CPU_state(); // Get the rbp described implicitly by the calling convention (no oopMap) @@ -297,6 +329,12 @@ __ addptr(rsp, return_offset_in_bytes()); } +// Is vector's size (in bytes) bigger than a size saved by default? +// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. +bool SharedRuntime::is_wide_vector(int size) { + return size > 16; +} + // The java_calling_convention describes stack locations as ideal slots on // a frame with no abi restrictions. Since we must observe abi restrictions // (like the placement of the register window) the slots must be biased by @@ -1593,12 +1631,12 @@ }; static void verify_oop_args(MacroAssembler* masm, - int total_args_passed, + methodHandle method, const BasicType* sig_bt, const VMRegPair* regs) { Register temp_reg = rbx; // not part of any compiled calling seq if (VerifyOops) { - for (int i = 0; i < total_args_passed; i++) { + for (int i = 0; i < method->size_of_parameters(); i++) { if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { VMReg r = regs[i].first(); @@ -1615,35 +1653,32 @@ } static void gen_special_dispatch(MacroAssembler* masm, - int total_args_passed, - int comp_args_on_stack, - vmIntrinsics::ID special_dispatch, + methodHandle method, const BasicType* sig_bt, const VMRegPair* regs) { - verify_oop_args(masm, total_args_passed, sig_bt, regs); + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); // Now write the args into the outgoing interpreter space bool has_receiver = false; Register receiver_reg = noreg; int member_arg_pos = -1; Register member_reg = noreg; - int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch); + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); if (ref_kind != 0) { - member_arg_pos = total_args_passed - 1; // trailing MemberName argument + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument member_reg = rbx; // known to be free at this point has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); - } else if (special_dispatch == vmIntrinsics::_invokeBasic) { + } else if (iid == vmIntrinsics::_invokeBasic) { has_receiver = true; } else { - guarantee(false, err_msg("special_dispatch=%d", special_dispatch)); + fatal(err_msg_res("unexpected intrinsic id %d", iid)); } if (member_reg != noreg) { // Load the member_arg into register, if necessary. - assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); - assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); VMReg r = regs[member_arg_pos].first(); - assert(r->is_valid(), "bad member arg"); if (r->is_stack()) { __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); } else { @@ -1654,7 +1689,7 @@ if (has_receiver) { // Make sure the receiver is loaded into a register. - assert(total_args_passed > 0, "oob"); + assert(method->size_of_parameters() > 0, "oob"); assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); VMReg r = regs[0].first(); assert(r->is_valid(), "bad receiver arg"); @@ -1662,7 +1697,7 @@ // Porting note: This assumes that compiled calling conventions always // pass the receiver oop in a register. If this is not true on some // platform, pick a temp and load the receiver from stack. - assert(false, "receiver always in a register"); + fatal("receiver always in a register"); receiver_reg = j_rarg0; // known to be free at this point __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); } else { @@ -1672,7 +1707,7 @@ } // Figure out which address we are really jumping to: - MethodHandles::generate_method_handle_dispatch(masm, special_dispatch, + MethodHandles::generate_method_handle_dispatch(masm, iid, receiver_reg, member_reg, /*for_compiler_entry:*/ true); } @@ -1708,8 +1743,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, methodHandle method, int compile_id, - int total_in_args, - int comp_args_on_stack, BasicType* in_sig_bt, VMRegPair* in_regs, BasicType ret_type) { @@ -1718,9 +1751,7 @@ intptr_t start = (intptr_t)__ pc(); int vep_offset = ((intptr_t)__ pc()) - start; gen_special_dispatch(masm, - total_in_args, - comp_args_on_stack, - method->intrinsic_id(), + method, in_sig_bt, in_regs); int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period @@ -1754,6 +1785,7 @@ // we convert the java signature to a C signature by inserting // the hidden arguments as arg[0] and possibly arg[1] (static method) + const int total_in_args = method->size_of_parameters(); int total_c_args = total_in_args; if (!is_critical_native) { total_c_args += 1; @@ -3241,7 +3273,6 @@ return 0; } - //------------------------------generate_deopt_blob---------------------------- void SharedRuntime::generate_deopt_blob() { // Allocate space for the code @@ -3746,7 +3777,7 @@ // Generate a special Compile2Runtime blob that saves all registers, // and setup oopmap. // -SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) { +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); @@ -3761,6 +3792,8 @@ address start = __ pc(); address call_pc = NULL; int frame_size_in_words; + bool cause_return = (poll_type == POLL_AT_RETURN); + bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); // Make room for return address (or push it again) if (!cause_return) { @@ -3768,7 +3801,7 @@ } // Save registers, fpu state, and flags - map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors); // The following is basically a call_VM. However, we need the precise // address of the call in order to generate an oopmap. Hence, we do all the @@ -3805,7 +3838,7 @@ // Exception pending - RegisterSaver::restore_live_registers(masm); + RegisterSaver::restore_live_registers(masm, save_vectors); __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); @@ -3813,7 +3846,7 @@ __ bind(noException); // Normal exit, restore registers and exit. - RegisterSaver::restore_live_registers(masm); + RegisterSaver::restore_live_registers(masm, save_vectors); __ ret(0); diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/vm_version_x86.cpp --- a/src/cpu/x86/vm/vm_version_x86.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -363,6 +363,11 @@ } _supports_cx8 = supports_cmpxchg8(); + // xchg and xadd instructions + _supports_atomic_getset4 = true; + _supports_atomic_getadd4 = true; + LP64_ONLY(_supports_atomic_getset8 = true); + LP64_ONLY(_supports_atomic_getadd8 = true); #ifdef _LP64 // OS should support SSE for x64 and hardware should support at least SSE2. @@ -562,10 +567,10 @@ AllocatePrefetchInstr = 3; } // On family 15h processors use XMM and UnalignedLoadStores for Array Copy - if( supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { + if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { UseXMMForArrayCopy = true; } - if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { + if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { UseUnalignedLoadStores = true; } } @@ -612,16 +617,16 @@ MaxLoopPad = 11; } #endif // COMPILER2 - if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { + if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus } - if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus - if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { + if (supports_sse4_2() && supports_ht()) { // Newest Intel cpus + if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus } } - if( supports_sse4_2() && UseSSE >= 4 ) { - if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { + if (supports_sse4_2() && UseSSE >= 4) { + if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { UseSSE42Intrinsics = true; } } @@ -638,6 +643,13 @@ FLAG_SET_DEFAULT(UsePopCountInstruction, false); } +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(AlignVector)) { + // Modern processors allow misaligned memory operations for vectors. + AlignVector = !UseUnalignedLoadStores; + } +#endif // COMPILER2 + assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/x86.ad --- a/src/cpu/x86/vm/x86.ad Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/x86.ad Mon Sep 24 14:46:06 2012 -0700 @@ -498,10 +498,18 @@ case Op_PopCountL: if (!UsePopCountInstruction) return false; + break; case Op_MulVI: if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX return false; break; + case Op_CompareAndSwapL: +#ifdef _LP64 + case Op_CompareAndSwapP: +#endif + if (!VM_Version::supports_cx8()) + return false; + break; } return true; // Per default match rules are supported. diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Mon Sep 24 14:46:06 2012 -0700 @@ -7762,6 +7762,7 @@ // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ + predicate(VM_Version::supports_cx8()); match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" @@ -7798,6 +7799,47 @@ ins_pipe( pipe_cmpxchg ); %} +instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(KILL cr); + format %{ "ADDL [$mem],$add" %} + ins_encode %{ + if (os::is_MP()) { __ lock(); } + __ addl($mem$$Address, $add$$constant); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ + match(Set newval (GetAndAddI mem newval)); + effect(KILL cr); + format %{ "XADDL [$mem],$newval" %} + ins_encode %{ + if (os::is_MP()) { __ lock(); } + __ xaddl($mem$$Address, $newval$$Register); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgI( memory mem, rRegI newval) %{ + match(Set newval (GetAndSetI mem newval)); + format %{ "XCHGL $newval,[$mem]" %} + ins_encode %{ + __ xchgl($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgP( memory mem, pRegP newval) %{ + match(Set newval (GetAndSetP mem newval)); + format %{ "XCHGL $newval,[$mem]" %} + ins_encode %{ + __ xchgl($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + //----------Subtraction Instructions------------------------------------------- // Integer Subtraction Instructions instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ diff -r 04ed664b7e30 -r c92f43386117 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Fri Sep 21 14:39:56 2012 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Mon Sep 24 14:46:06 2012 -0700 @@ -7242,6 +7242,7 @@ rax_RegP oldval, rRegP newval, rFlagsReg cr) %{ + predicate(VM_Version::supports_cx8()); match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); @@ -7265,6 +7266,7 @@ rax_RegL oldval, rRegL newval, rFlagsReg cr) %{ + predicate(VM_Version::supports_cx8()); match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); @@ -7329,6 +7331,88 @@ ins_pipe( pipe_cmpxchg ); %} +instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(KILL cr); + format %{ "ADDL [$mem],$add" %} + ins_encode %{ + if (os::is_MP()) { __ lock(); } + __ addl($mem$$Address, $add$$constant); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{ + match(Set newval (GetAndAddI mem newval)); + effect(KILL cr); + format %{ "XADDL [$mem],$newval" %} + ins_encode %{ + if (os::is_MP()) { __ lock(); } + __ xaddl($mem$$Address, $newval$$Register); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddL_no_res( memory mem, Universe dummy, immL add, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem add)); + effect(KILL cr); + format %{ "ADDQ [$mem],$add" %} + ins_encode %{ + if (os::is_MP()) { __ lock(); } + __ addq($mem$$Address, $add$$constant); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{ + match(Set newval (GetAndAddL mem newval)); + effect(KILL cr); + format %{ "XADDQ [$mem],$newval" %} + ins_encode %{ + if (os::is_MP()) { __ lock(); } + __ xaddq($mem$$Address, $newval$$Register); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgI( memory mem, rRegI newval) %{ + match(Set newval (GetAndSetI mem newval)); + format %{ "XCHGL $newval,[$mem]" %} + ins_encode %{ + __ xchgl($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgL( memory mem, rRegL newval) %{ + match(Set newval (GetAndSetL mem newval)); + format %{ "XCHGL $newval,[$mem]" %} + ins_encode %{ + __ xchgq($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgP( memory mem, rRegP newval) %{ + match(Set newval (GetAndSetP mem newval)); + format %{ "XCHGQ $newval,[$mem]" %} + ins_encode %{ + __ xchgq($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + +instruct xchgN( memory mem, rRegN newval) %{ + match(Set newval (GetAndSetN mem newval)); + format %{ "XCHGL $newval,$mem]" %} + ins_encode %{ + __ xchgl($newval$$Register, $mem$$Address); + %} + ins_pipe( pipe_cmpxchg ); +%} + //----------Subtraction Instructions------------------------------------------- // Integer Subtraction Instructions diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/adlc/formssel.cpp --- a/src/share/vm/adlc/formssel.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/adlc/formssel.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -751,6 +751,7 @@ !strcmp(_matrule->_rChild->_opType,"DecodeN") || !strcmp(_matrule->_rChild->_opType,"EncodeP") || !strcmp(_matrule->_rChild->_opType,"LoadN") || + !strcmp(_matrule->_rChild->_opType,"GetAndSetN") || !strcmp(_matrule->_rChild->_opType,"LoadNKlass") || !strcmp(_matrule->_rChild->_opType,"CreateEx") || // type of exception !strcmp(_matrule->_rChild->_opType,"CheckCastPP")) ) return true; @@ -3399,7 +3400,9 @@ "StorePConditional", "StoreIConditional", "StoreLConditional", "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN", "StoreCM", - "ClearArray" + "ClearArray", + "GetAndAddI", "GetAndSetI", "GetAndSetP", + "GetAndAddL", "GetAndSetL", "GetAndSetN", }; int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*); if( strcmp(_opType,"PrefetchRead")==0 || diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/asm/codeBuffer.cpp --- a/src/share/vm/asm/codeBuffer.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/asm/codeBuffer.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -1026,25 +1026,30 @@ } return a; } + + // Convenience for add_comment. + CodeComment* find_last(intptr_t offset) { + CodeComment* a = find(offset); + if (a != NULL) { + while ((a->_next != NULL) && (a->_next->_offset == offset)) { + a = a->_next; + } + } + return a; + } }; void CodeComments::add_comment(intptr_t offset, const char * comment) { - CodeComment* c = new CodeComment(offset, comment); - CodeComment* insert = NULL; - if (_comments != NULL) { - CodeComment* c = _comments->find(offset); - insert = c; - while (c && c->offset() == offset) { - insert = c; - c = c->next(); - } - } - if (insert) { - // insert after comments with same offset - c->set_next(insert->next()); - insert->set_next(c); + CodeComment* c = new CodeComment(offset, comment); + CodeComment* inspos = (_comments == NULL) ? NULL : _comments->find_last(offset); + + if (inspos) { + // insert after already existing comments with same offset + c->set_next(inspos->next()); + inspos->set_next(c); } else { + // no comments with such offset, yet. Insert before anything else. c->set_next(_comments); _comments = c; } @@ -1052,12 +1057,11 @@ void CodeComments::assign(CodeComments& other) { - assert(_comments == NULL, "don't overwrite old value"); _comments = other._comments; } -void CodeComments::print_block_comment(outputStream* stream, intptr_t offset) { +void CodeComments::print_block_comment(outputStream* stream, intptr_t offset) const { if (_comments != NULL) { CodeComment* c = _comments->find(offset); while (c && c->offset() == offset) { @@ -1085,6 +1089,7 @@ void CodeBuffer::decode() { + ttyLocker ttyl; Disassembler::decode(decode_begin(), insts_end()); _decode_begin = insts_end(); } @@ -1096,6 +1101,7 @@ void CodeBuffer::decode_all() { + ttyLocker ttyl; for (int n = 0; n < (int)SECT_LIMIT; n++) { // dump contents of each section CodeSection* cs = code_section(n); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/asm/codeBuffer.hpp --- a/src/share/vm/asm/codeBuffer.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/asm/codeBuffer.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -253,7 +253,7 @@ } void add_comment(intptr_t offset, const char * comment) PRODUCT_RETURN; - void print_block_comment(outputStream* stream, intptr_t offset) PRODUCT_RETURN; + void print_block_comment(outputStream* stream, intptr_t offset) const PRODUCT_RETURN; void assign(CodeComments& other) PRODUCT_RETURN; void free() PRODUCT_RETURN; }; diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/asm/register.hpp --- a/src/share/vm/asm/register.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/asm/register.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -103,8 +103,8 @@ ) { assert( a != b, - err_msg("registers must be different: a=%d, b=%d", - a, b) + err_msg_res("registers must be different: a=%d, b=%d", + a, b) ); } @@ -117,8 +117,8 @@ assert( a != b && a != c && b != c, - err_msg("registers must be different: a=%d, b=%d, c=%d", - a, b, c) + err_msg_res("registers must be different: a=%d, b=%d, c=%d", + a, b, c) ); } @@ -133,8 +133,8 @@ a != b && a != c && a != d && b != c && b != d && c != d, - err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d", - a, b, c, d) + err_msg_res("registers must be different: a=%d, b=%d, c=%d, d=%d", + a, b, c, d) ); } @@ -151,8 +151,8 @@ && b != c && b != d && b != e && c != d && c != e && d != e, - err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d", - a, b, c, d, e) + err_msg_res("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d", + a, b, c, d, e) ); } @@ -171,8 +171,8 @@ && c != d && c != e && c != f && d != e && d != f && e != f, - err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d", - a, b, c, d, e, f) + err_msg_res("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d", + a, b, c, d, e, f) ); } @@ -193,8 +193,8 @@ && d != e && d != f && d != g && e != f && e != g && f != g, - err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d, g=%d", - a, b, c, d, e, f, g) + err_msg_res("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d, g=%d", + a, b, c, d, e, f, g) ); } @@ -217,8 +217,8 @@ && e != f && e != g && e != h && f != g && f != h && g != h, - err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d, g=%d, h=%d", - a, b, c, d, e, f, g, h) + err_msg_res("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d, g=%d, h=%d", + a, b, c, d, e, f, g, h) ); } @@ -243,8 +243,8 @@ && f != g && f != h && f != i && g != h && g != i && h != i, - err_msg("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d, g=%d, h=%d, i=%d", - a, b, c, d, e, f, g, h, i) + err_msg_res("registers must be different: a=%d, b=%d, c=%d, d=%d, e=%d, f=%d, g=%d, h=%d, i=%d", + a, b, c, d, e, f, g, h, i) ); } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_Canonicalizer.cpp --- a/src/share/vm/c1/c1_Canonicalizer.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_Canonicalizer.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -931,6 +931,7 @@ void Canonicalizer::do_UnsafePutRaw(UnsafePutRaw* x) { if (OptimizeUnsafes) do_UnsafeRawOp(x); } void Canonicalizer::do_UnsafeGetObject(UnsafeGetObject* x) {} void Canonicalizer::do_UnsafePutObject(UnsafePutObject* x) {} +void Canonicalizer::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {} void Canonicalizer::do_UnsafePrefetchRead (UnsafePrefetchRead* x) {} void Canonicalizer::do_UnsafePrefetchWrite(UnsafePrefetchWrite* x) {} void Canonicalizer::do_ProfileCall(ProfileCall* x) {} diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_Canonicalizer.hpp --- a/src/share/vm/c1/c1_Canonicalizer.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_Canonicalizer.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -100,6 +100,7 @@ virtual void do_UnsafePutRaw (UnsafePutRaw* x); virtual void do_UnsafeGetObject(UnsafeGetObject* x); virtual void do_UnsafePutObject(UnsafePutObject* x); + virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x); virtual void do_UnsafePrefetchRead (UnsafePrefetchRead* x); virtual void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x); virtual void do_ProfileCall (ProfileCall* x); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_Compilation.cpp --- a/src/share/vm/c1/c1_Compilation.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_Compilation.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -346,7 +346,8 @@ implicit_exception_table(), compiler(), _env->comp_level(), - has_unsafe_access() + has_unsafe_access(), + SharedRuntime::is_wide_vector(max_vector_size()) ); } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_Compilation.hpp --- a/src/share/vm/c1/c1_Compilation.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_Compilation.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -127,6 +127,7 @@ bool has_exception_handlers() const { return _has_exception_handlers; } bool has_fpu_code() const { return _has_fpu_code; } bool has_unsafe_access() const { return _has_unsafe_access; } + int max_vector_size() const { return 0; } ciMethod* method() const { return _method; } int osr_bci() const { return _osr_bci; } bool is_osr_compile() const { return osr_bci() >= 0; } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_GraphBuilder.cpp --- a/src/share/vm/c1/c1_GraphBuilder.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_GraphBuilder.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -3383,6 +3383,41 @@ append_unsafe_CAS(callee); return true; + case vmIntrinsics::_getAndAddInt: + if (!VM_Version::supports_atomic_getadd4()) { + return false; + } + return append_unsafe_get_and_set_obj(callee, true); + case vmIntrinsics::_getAndAddLong: + if (!VM_Version::supports_atomic_getadd8()) { + return false; + } + return append_unsafe_get_and_set_obj(callee, true); + case vmIntrinsics::_getAndSetInt: + if (!VM_Version::supports_atomic_getset4()) { + return false; + } + return append_unsafe_get_and_set_obj(callee, false); + case vmIntrinsics::_getAndSetLong: + if (!VM_Version::supports_atomic_getset8()) { + return false; + } + return append_unsafe_get_and_set_obj(callee, false); + case vmIntrinsics::_getAndSetObject: +#ifdef _LP64 + if (!UseCompressedOops && !VM_Version::supports_atomic_getset8()) { + return false; + } + if (UseCompressedOops && !VM_Version::supports_atomic_getset4()) { + return false; + } +#else + if (!VM_Version::supports_atomic_getset4()) { + return false; + } +#endif + return append_unsafe_get_and_set_obj(callee, false); + case vmIntrinsics::_Reference_get: // Use the intrinsic version of Reference.get() so that the value in // the referent field can be registered by the G1 pre-barrier code. @@ -4106,6 +4141,22 @@ } } +bool GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) { + if (InlineUnsafeOps) { + Values* args = state()->pop_arguments(callee->arg_size()); + BasicType t = callee->return_type()->basic_type(); + null_check(args->at(0)); + Instruction* offset = args->at(2); +#ifndef _LP64 + offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT))); +#endif + Instruction* op = append(new UnsafeGetAndSetObject(t, args->at(1), offset, args->at(3), is_add)); + compilation()->set_has_unsafe_access(true); + kill_all(); + push(op->type(), op); + } + return InlineUnsafeOps; +} #ifndef PRODUCT void GraphBuilder::print_stats() { diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_GraphBuilder.hpp --- a/src/share/vm/c1/c1_GraphBuilder.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_GraphBuilder.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -367,6 +367,7 @@ bool append_unsafe_put_raw(ciMethod* callee, BasicType t); bool append_unsafe_prefetch(ciMethod* callee, bool is_store, bool is_static); void append_unsafe_CAS(ciMethod* callee); + bool append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add); void print_inlining(ciMethod* callee, const char* msg, bool success = true); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_Instruction.hpp --- a/src/share/vm/c1/c1_Instruction.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_Instruction.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -102,6 +102,7 @@ class UnsafeObjectOp; class UnsafeGetObject; class UnsafePutObject; +class UnsafeGetAndSetObject; class UnsafePrefetch; class UnsafePrefetchRead; class UnsafePrefetchWrite; @@ -202,6 +203,7 @@ virtual void do_UnsafePutRaw (UnsafePutRaw* x) = 0; virtual void do_UnsafeGetObject(UnsafeGetObject* x) = 0; virtual void do_UnsafePutObject(UnsafePutObject* x) = 0; + virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) = 0; virtual void do_UnsafePrefetchRead (UnsafePrefetchRead* x) = 0; virtual void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x) = 0; virtual void do_ProfileCall (ProfileCall* x) = 0; @@ -2273,6 +2275,27 @@ f->visit(&_value); } }; +LEAF(UnsafeGetAndSetObject, UnsafeObjectOp) + private: + Value _value; // Value to be stored + bool _is_add; + public: + UnsafeGetAndSetObject(BasicType basic_type, Value object, Value offset, Value value, bool is_add) + : UnsafeObjectOp(basic_type, object, offset, false, false) + , _value(value) + , _is_add(is_add) + { + ASSERT_VALUES + } + + // accessors + bool is_add() const { return _is_add; } + Value value() { return _value; } + + // generic + virtual void input_values_do(ValueVisitor* f) { UnsafeObjectOp::input_values_do(f); + f->visit(&_value); } +}; BASE(UnsafePrefetch, UnsafeObjectOp) public: diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_InstructionPrinter.cpp --- a/src/share/vm/c1/c1_InstructionPrinter.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_InstructionPrinter.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -831,6 +831,12 @@ output()->put(')'); } +void InstructionPrinter::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { + print_unsafe_object_op(x, x->is_add()?"UnsafeGetAndSetObject (add)":"UnsafeGetAndSetObject"); + output()->print(", value "); + print_value(x->value()); + output()->put(')'); +} void InstructionPrinter::do_UnsafePrefetchRead(UnsafePrefetchRead* x) { print_unsafe_object_op(x, "UnsafePrefetchRead"); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_InstructionPrinter.hpp --- a/src/share/vm/c1/c1_InstructionPrinter.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_InstructionPrinter.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -128,6 +128,7 @@ virtual void do_UnsafePutRaw (UnsafePutRaw* x); virtual void do_UnsafeGetObject(UnsafeGetObject* x); virtual void do_UnsafePutObject(UnsafePutObject* x); + virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x); virtual void do_UnsafePrefetchRead (UnsafePrefetchRead* x); virtual void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x); virtual void do_ProfileCall (ProfileCall* x); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_LIR.cpp --- a/src/share/vm/c1/c1_LIR.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_LIR.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -264,6 +264,7 @@ #ifdef ASSERT switch (code()) { case lir_cmove: + case lir_xchg: break; default: @@ -630,6 +631,8 @@ case lir_shl: case lir_shr: case lir_ushr: + case lir_xadd: + case lir_xchg: { assert(op->as_Op2() != NULL, "must be"); LIR_Op2* op2 = (LIR_Op2*)op; @@ -641,6 +644,13 @@ if (op2->_opr2->is_valid()) do_input(op2->_opr2); if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1); if (op2->_result->is_valid()) do_output(op2->_result); + if (op->code() == lir_xchg || op->code() == lir_xadd) { + // on ARM and PPC, return value is loaded first so could + // destroy inputs. On other platforms that implement those + // (x86, sparc), the extra constrainsts are harmless. + if (op2->_opr1->is_valid()) do_temp(op2->_opr1); + if (op2->_opr2->is_valid()) do_temp(op2->_opr2); + } break; } @@ -1733,6 +1743,8 @@ case lir_shr: s = "shift_right"; break; case lir_ushr: s = "ushift_right"; break; case lir_alloc_array: s = "alloc_array"; break; + case lir_xadd: s = "xadd"; break; + case lir_xchg: s = "xchg"; break; // LIR_Op3 case lir_idiv: s = "idiv"; break; case lir_irem: s = "irem"; break; diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_LIR.hpp --- a/src/share/vm/c1/c1_LIR.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_LIR.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -963,6 +963,8 @@ , lir_alloc_array , lir_throw , lir_compare_to + , lir_xadd + , lir_xchg , end_op2 , begin_op3 , lir_idiv @@ -2191,6 +2193,9 @@ void profile_call(ciMethod* method, int bci, ciMethod* callee, LIR_Opr mdo, LIR_Opr recv, LIR_Opr t1, ciKlass* cha_klass) { append(new LIR_OpProfileCall(lir_profile_call, method, bci, callee, mdo, recv, t1, cha_klass)); } + + void xadd(LIR_Opr src, LIR_Opr add, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2(lir_xadd, src, add, res, tmp)); } + void xchg(LIR_Opr src, LIR_Opr set, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2(lir_xchg, src, set, res, tmp)); } }; void print_LIR(BlockList* blocks); @@ -2287,16 +2292,21 @@ LIR_Address* address = opr->as_address_ptr(); if (address != NULL) { // special handling for addresses: add base and index register of the address - // both are always input operands! + // both are always input operands or temp if we want to extend + // their liveness! + if (mode == outputMode) { + mode = inputMode; + } + assert (mode == inputMode || mode == tempMode, "input or temp only for addresses"); if (address->_base->is_valid()) { assert(address->_base->is_register(), "must be"); - assert(_oprs_len[inputMode] < maxNumberOfOperands, "array overflow"); - _oprs_new[inputMode][_oprs_len[inputMode]++] = &address->_base; + assert(_oprs_len[mode] < maxNumberOfOperands, "array overflow"); + _oprs_new[mode][_oprs_len[mode]++] = &address->_base; } if (address->_index->is_valid()) { assert(address->_index->is_register(), "must be"); - assert(_oprs_len[inputMode] < maxNumberOfOperands, "array overflow"); - _oprs_new[inputMode][_oprs_len[inputMode]++] = &address->_index; + assert(_oprs_len[mode] < maxNumberOfOperands, "array overflow"); + _oprs_new[mode][_oprs_len[mode]++] = &address->_index; } } else { diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_LIRAssembler.cpp --- a/src/share/vm/c1/c1_LIRAssembler.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_LIRAssembler.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -773,6 +773,11 @@ throw_op(op->in_opr1(), op->in_opr2(), op->info()); break; + case lir_xadd: + case lir_xchg: + atomic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->tmp1_opr()); + break; + default: Unimplemented(); break; diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_LIRAssembler.hpp --- a/src/share/vm/c1/c1_LIRAssembler.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_LIRAssembler.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -252,6 +252,8 @@ void verify_oop_map(CodeEmitInfo* info); + void atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp); + #ifdef TARGET_ARCH_x86 # include "c1_LIRAssembler_x86.hpp" #endif diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_LIRGenerator.hpp --- a/src/share/vm/c1/c1_LIRGenerator.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_LIRGenerator.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -527,6 +527,7 @@ virtual void do_UnsafePutRaw (UnsafePutRaw* x); virtual void do_UnsafeGetObject(UnsafeGetObject* x); virtual void do_UnsafePutObject(UnsafePutObject* x); + virtual void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x); virtual void do_UnsafePrefetchRead (UnsafePrefetchRead* x); virtual void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x); virtual void do_ProfileCall (ProfileCall* x); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_Optimizer.cpp --- a/src/share/vm/c1/c1_Optimizer.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_Optimizer.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -505,6 +505,7 @@ void do_UnsafePutRaw (UnsafePutRaw* x); void do_UnsafeGetObject(UnsafeGetObject* x); void do_UnsafePutObject(UnsafePutObject* x); + void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x); void do_UnsafePrefetchRead (UnsafePrefetchRead* x); void do_UnsafePrefetchWrite(UnsafePrefetchWrite* x); void do_ProfileCall (ProfileCall* x); @@ -676,6 +677,7 @@ void NullCheckVisitor::do_UnsafePutRaw (UnsafePutRaw* x) {} void NullCheckVisitor::do_UnsafeGetObject(UnsafeGetObject* x) {} void NullCheckVisitor::do_UnsafePutObject(UnsafePutObject* x) {} +void NullCheckVisitor::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {} void NullCheckVisitor::do_UnsafePrefetchRead (UnsafePrefetchRead* x) {} void NullCheckVisitor::do_UnsafePrefetchWrite(UnsafePrefetchWrite* x) {} void NullCheckVisitor::do_ProfileCall (ProfileCall* x) { nce()->clear_last_explicit_null_check(); } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/c1/c1_ValueMap.hpp --- a/src/share/vm/c1/c1_ValueMap.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/c1/c1_ValueMap.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -157,6 +157,7 @@ void do_Invoke (Invoke* x) { kill_memory(); } void do_UnsafePutRaw (UnsafePutRaw* x) { kill_memory(); } void do_UnsafePutObject(UnsafePutObject* x) { kill_memory(); } + void do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { kill_memory(); } void do_Intrinsic (Intrinsic* x) { if (!x->preserves_state()) kill_memory(); } void do_Phi (Phi* x) { /* nothing to do */ } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/ci/ciEnv.cpp --- a/src/share/vm/ci/ciEnv.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/ci/ciEnv.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -921,7 +921,8 @@ ImplicitExceptionTable* inc_table, AbstractCompiler* compiler, int comp_level, - bool has_unsafe_access) { + bool has_unsafe_access, + bool has_wide_vectors) { VM_ENTRY_MARK; nmethod* nm = NULL; { @@ -1016,6 +1017,7 @@ } } else { nm->set_has_unsafe_access(has_unsafe_access); + nm->set_has_wide_vectors(has_wide_vectors); // Record successful registration. // (Put nm into the task handle *before* publishing to the Java heap.) diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/ci/ciEnv.hpp --- a/src/share/vm/ci/ciEnv.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/ci/ciEnv.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -362,7 +362,8 @@ ImplicitExceptionTable* inc_table, AbstractCompiler* compiler, int comp_level, - bool has_unsafe_access); + bool has_unsafe_access, + bool has_wide_vectors); // Access to certain well known ciObjects. diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/classfile/vmSymbols.hpp --- a/src/share/vm/classfile/vmSymbols.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/classfile/vmSymbols.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -873,6 +873,20 @@ do_name( putOrderedInt_name, "putOrderedInt") \ do_alias( putOrderedInt_signature, /*(Ljava/lang/Object;JI)V*/ putInt_signature) \ \ + do_intrinsic(_getAndAddInt, sun_misc_Unsafe, getAndAddInt_name, getAndAddInt_signature, F_R) \ + do_name( getAndAddInt_name, "getAndAddInt") \ + do_signature(getAndAddInt_signature, "(Ljava/lang/Object;JI)I" ) \ + do_intrinsic(_getAndAddLong, sun_misc_Unsafe, getAndAddLong_name, getAndAddLong_signature, F_R) \ + do_name( getAndAddLong_name, "getAndAddLong") \ + do_signature(getAndAddLong_signature, "(Ljava/lang/Object;JJ)J" ) \ + do_intrinsic(_getAndSetInt, sun_misc_Unsafe, getAndSet_name, getAndSetInt_signature, F_R) \ + do_name( getAndSet_name, "getAndSet") \ + do_alias( getAndSetInt_signature, /*"(Ljava/lang/Object;JI)I"*/ getAndAddInt_signature) \ + do_intrinsic(_getAndSetLong, sun_misc_Unsafe, getAndSet_name, getAndSetLong_signature, F_R) \ + do_alias( getAndSetLong_signature, /*"(Ljava/lang/Object;JJ)J"*/ getAndAddLong_signature) \ + do_intrinsic(_getAndSetObject, sun_misc_Unsafe, getAndSet_name, getAndSetObject_signature, F_R) \ + do_signature(getAndSetObject_signature, "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \ + \ /* prefetch_signature is shared by all prefetch variants */ \ do_signature( prefetch_signature, "(Ljava/lang/Object;J)V") \ \ diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/code/codeBlob.cpp --- a/src/share/vm/code/codeBlob.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/code/codeBlob.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -162,8 +162,10 @@ assert(strlen(name1) + strlen(name2) < sizeof(stub_id), ""); jio_snprintf(stub_id, sizeof(stub_id), "%s%s", name1, name2); if (PrintStubCode) { + ttyLocker ttyl; tty->print_cr("Decoding %s " INTPTR_FORMAT, stub_id, (intptr_t) stub); Disassembler::decode(stub->code_begin(), stub->code_end()); + tty->cr(); } Forte::register_stub(stub_id, stub->code_begin(), stub->code_end()); @@ -548,6 +550,7 @@ } void RuntimeStub::print_on(outputStream* st) const { + ttyLocker ttyl; CodeBlob::print_on(st); st->print("Runtime Stub (" INTPTR_FORMAT "): ", this); st->print_cr(name()); @@ -563,6 +566,7 @@ } void SingletonBlob::print_on(outputStream* st) const { + ttyLocker ttyl; CodeBlob::print_on(st); st->print_cr(name()); Disassembler::decode((CodeBlob*)this, st); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/code/codeBlob.hpp --- a/src/share/vm/code/codeBlob.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/code/codeBlob.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -184,7 +184,7 @@ static void trace_new_stub(CodeBlob* blob, const char* name1, const char* name2 = ""); // Print the comment associated with offset on stream, if there is one - virtual void print_block_comment(outputStream* stream, address block_begin) { + virtual void print_block_comment(outputStream* stream, address block_begin) const { intptr_t offset = (intptr_t)(block_begin - code_begin()); _comments.print_block_comment(stream, offset); } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/code/icBuffer.hpp --- a/src/share/vm/code/icBuffer.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/code/icBuffer.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -25,6 +25,7 @@ #ifndef SHARE_VM_CODE_ICBUFFER_HPP #define SHARE_VM_CODE_ICBUFFER_HPP +#include "asm/codeBuffer.hpp" #include "code/stubs.hpp" #include "interpreter/bytecodes.hpp" #include "memory/allocation.hpp" @@ -48,7 +49,8 @@ protected: friend class ICStubInterface; // This will be called only by ICStubInterface - void initialize(int size) { _size = size; _ic_site = NULL; } + void initialize(int size, + CodeComments comments) { _size = size; _ic_site = NULL; } void finalize(); // called when a method is removed // General info diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/code/nmethod.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -463,6 +463,7 @@ _has_unsafe_access = 0; _has_method_handle_invokes = 0; _lazy_critical_native = 0; + _has_wide_vectors = 0; _marked_for_deoptimization = 0; _lock_count = 0; _stack_traversal_mark = 0; @@ -700,7 +701,9 @@ // then print the requested information if (PrintNativeNMethods) { print_code(); - oop_maps->print(); + if (oop_maps != NULL) { + oop_maps->print(); + } } if (PrintRelocations) { print_relocations(); @@ -2669,7 +2672,7 @@ return NULL; } -void nmethod::print_nmethod_labels(outputStream* stream, address block_begin) { +void nmethod::print_nmethod_labels(outputStream* stream, address block_begin) const { if (block_begin == entry_point()) stream->print_cr("[Entry Point]"); if (block_begin == verified_entry_point()) stream->print_cr("[Verified Entry Point]"); if (block_begin == exception_begin()) stream->print_cr("[Exception Handler]"); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/code/nmethod.hpp --- a/src/share/vm/code/nmethod.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/code/nmethod.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -177,6 +177,7 @@ unsigned int _has_unsafe_access:1; // May fault due to unsafe access. unsigned int _has_method_handle_invokes:1; // Has this method MethodHandle invokes? unsigned int _lazy_critical_native:1; // Lazy JNI critical native + unsigned int _has_wide_vectors:1; // Preserve wide vectors at safepoints // Protected by Patching_lock unsigned char _state; // {alive, not_entrant, zombie, unloaded} @@ -442,6 +443,9 @@ bool is_lazy_critical_native() const { return _lazy_critical_native; } void set_lazy_critical_native(bool z) { _lazy_critical_native = z; } + bool has_wide_vectors() const { return _has_wide_vectors; } + void set_has_wide_vectors(bool z) { _has_wide_vectors = z; } + int comp_level() const { return _comp_level; } // Support for oops in scopes and relocs: @@ -649,11 +653,11 @@ void log_state_change() const; // Prints block-level comments, including nmethod specific block labels: - virtual void print_block_comment(outputStream* stream, address block_begin) { + virtual void print_block_comment(outputStream* stream, address block_begin) const { print_nmethod_labels(stream, block_begin); CodeBlob::print_block_comment(stream, block_begin); } - void print_nmethod_labels(outputStream* stream, address block_begin); + void print_nmethod_labels(outputStream* stream, address block_begin) const; // Prints a comment for one native instruction (reloc info, pc desc) void print_code_comment_on(outputStream* st, int column, address begin, address end); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/code/stubs.cpp --- a/src/share/vm/code/stubs.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/code/stubs.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -101,7 +101,8 @@ Stub* StubQueue::request_committed(int code_size) { Stub* s = request(code_size); - if (s != NULL) commit(code_size); + CodeComments comments; + if (s != NULL) commit(code_size, comments); return s; } @@ -118,7 +119,8 @@ assert(_buffer_limit == _buffer_size, "buffer must be fully usable"); if (_queue_end + requested_size <= _buffer_size) { // code fits in at the end => nothing to do - stub_initialize(s, requested_size); + CodeComments comments; + stub_initialize(s, requested_size, comments); return s; } else { // stub doesn't fit in at the queue end @@ -135,7 +137,8 @@ // Queue: |XXX|.......|XXXXXXX|.......| // ^0 ^end ^begin ^limit ^size s = current_stub(); - stub_initialize(s, requested_size); + CodeComments comments; + stub_initialize(s, requested_size, comments); return s; } // Not enough space left @@ -144,12 +147,12 @@ } -void StubQueue::commit(int committed_code_size) { +void StubQueue::commit(int committed_code_size, CodeComments& comments) { assert(committed_code_size > 0, "committed_code_size must be > 0"); int committed_size = round_to(stub_code_size_to_size(committed_code_size), CodeEntryAlignment); Stub* s = current_stub(); assert(committed_size <= stub_size(s), "committed size must not exceed requested size"); - stub_initialize(s, committed_size); + stub_initialize(s, committed_size, comments); _queue_end += committed_size; _number_of_stubs++; if (_mutex != NULL) _mutex->unlock(); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/code/stubs.hpp --- a/src/share/vm/code/stubs.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/code/stubs.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -25,6 +25,7 @@ #ifndef SHARE_VM_CODE_STUBS_HPP #define SHARE_VM_CODE_STUBS_HPP +#include "asm/codeBuffer.hpp" #include "memory/allocation.hpp" #ifdef TARGET_OS_FAMILY_linux # include "os_linux.inline.hpp" @@ -71,7 +72,8 @@ class Stub VALUE_OBJ_CLASS_SPEC { public: // Initialization/finalization - void initialize(int size) { ShouldNotCallThis(); } // called to initialize/specify the stub's size + void initialize(int size, + CodeComments& comments) { ShouldNotCallThis(); } // called to initialize/specify the stub's size void finalize() { ShouldNotCallThis(); } // called before the stub is deallocated // General info/converters @@ -104,7 +106,8 @@ class StubInterface: public CHeapObj { public: // Initialization/finalization - virtual void initialize(Stub* self, int size) = 0; // called after creation (called twice if allocated via (request, commit)) + virtual void initialize(Stub* self, int size, + CodeComments& comments) = 0; // called after creation (called twice if allocated via (request, commit)) virtual void finalize(Stub* self) = 0; // called before deallocation // General info/converters @@ -132,7 +135,8 @@ \ public: \ /* Initialization/finalization */ \ - virtual void initialize(Stub* self, int size) { cast(self)->initialize(size); } \ + virtual void initialize(Stub* self, int size, \ + CodeComments& comments) { cast(self)->initialize(size, comments); } \ virtual void finalize(Stub* self) { cast(self)->finalize(); } \ \ /* General info */ \ @@ -171,7 +175,8 @@ Stub* current_stub() const { return stub_at(_queue_end); } // Stub functionality accessed via interface - void stub_initialize(Stub* s, int size) { assert(size % CodeEntryAlignment == 0, "size not aligned"); _stub_interface->initialize(s, size); } + void stub_initialize(Stub* s, int size, + CodeComments& comments) { assert(size % CodeEntryAlignment == 0, "size not aligned"); _stub_interface->initialize(s, size, comments); } void stub_finalize(Stub* s) { _stub_interface->finalize(s); } int stub_size(Stub* s) const { return _stub_interface->size(s); } bool stub_contains(Stub* s, address pc) const { return _stub_interface->code_begin(s) <= pc && pc < _stub_interface->code_end(s); } @@ -200,7 +205,8 @@ // Stub allocation (atomic transactions) Stub* request_committed(int code_size); // request a stub that provides exactly code_size space for code Stub* request(int requested_code_size); // request a stub with a (maximum) code space - locks the queue - void commit (int committed_code_size); // commit the previously requested stub - unlocks the queue + void commit (int committed_code_size, + CodeComments& comments); // commit the previously requested stub - unlocks the queue // Stub deallocation void remove_first(); // remove the first stub in the queue diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/compiler/disassembler.cpp --- a/src/share/vm/compiler/disassembler.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/compiler/disassembler.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -148,6 +148,7 @@ private: nmethod* _nm; CodeBlob* _code; + CodeComments _comments; outputStream* _output; address _start, _end; @@ -187,7 +188,7 @@ void print_address(address value); public: - decode_env(CodeBlob* code, outputStream* output); + decode_env(CodeBlob* code, outputStream* output, CodeComments c = CodeComments()); address decode_instructions(address start, address end); @@ -229,12 +230,13 @@ const char* options() { return _option_buf; } }; -decode_env::decode_env(CodeBlob* code, outputStream* output) { +decode_env::decode_env(CodeBlob* code, outputStream* output, CodeComments c) { memset(this, 0, sizeof(*this)); _output = output ? output : tty; _code = code; if (code != NULL && code->is_nmethod()) _nm = (nmethod*) code; + _comments.assign(c); // by default, output pc but not bytes: _print_pc = true; @@ -356,6 +358,7 @@ if (cb != NULL) { cb->print_block_comment(st, p); } + _comments.print_block_comment(st, (intptr_t)(p - _start)); if (_print_pc) { st->print(" " PTR_FORMAT ": ", p); } @@ -467,10 +470,9 @@ env.decode_instructions(cb->code_begin(), cb->code_end()); } - -void Disassembler::decode(address start, address end, outputStream* st) { +void Disassembler::decode(address start, address end, outputStream* st, CodeComments c) { if (!load_library()) return; - decode_env env(CodeCache::find_blob_unsafe(start), st); + decode_env env(CodeCache::find_blob_unsafe(start), st, c); env.decode_instructions(start, end); } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/compiler/disassembler.hpp --- a/src/share/vm/compiler/disassembler.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/compiler/disassembler.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -25,6 +25,7 @@ #ifndef SHARE_VM_COMPILER_DISASSEMBLER_HPP #define SHARE_VM_COMPILER_DISASSEMBLER_HPP +#include "asm/codeBuffer.hpp" #include "runtime/globals.hpp" #ifdef TARGET_OS_FAMILY_linux # include "os_linux.inline.hpp" @@ -87,7 +88,7 @@ } static void decode(CodeBlob *cb, outputStream* st = NULL); static void decode(nmethod* nm, outputStream* st = NULL); - static void decode(address begin, address end, outputStream* st = NULL); + static void decode(address begin, address end, outputStream* st = NULL, CodeComments c = CodeComments()); }; #endif // SHARE_VM_COMPILER_DISASSEMBLER_HPP diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/interpreter/interpreter.cpp --- a/src/share/vm/interpreter/interpreter.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/interpreter/interpreter.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -60,6 +60,8 @@ void InterpreterCodelet::print_on(outputStream* st) const { + ttyLocker ttyl; + if (PrintInterpreter) { st->cr(); st->print_cr("----------------------------------------------------------------------"); @@ -72,7 +74,7 @@ if (PrintInterpreter) { st->cr(); - Disassembler::decode(code_begin(), code_end(), st); + Disassembler::decode(code_begin(), code_end(), st, DEBUG_ONLY(_comments) NOT_DEBUG(CodeComments())); } } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/interpreter/interpreter.hpp --- a/src/share/vm/interpreter/interpreter.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/interpreter/interpreter.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -48,10 +48,12 @@ int _size; // the size in bytes const char* _description; // a description of the codelet, for debugging & printing Bytecodes::Code _bytecode; // associated bytecode if any + DEBUG_ONLY(CodeComments _comments;) // Comments for annotating assembler output. public: // Initialization/finalization - void initialize(int size) { _size = size; } + void initialize(int size, + CodeComments& comments) { _size = size; DEBUG_ONLY(_comments.assign(comments);) } void finalize() { ShouldNotCallThis(); } // General info/converters @@ -129,7 +131,7 @@ // commit Codelet - AbstractInterpreter::code()->commit((*_masm)->code()->pure_insts_size()); + AbstractInterpreter::code()->commit((*_masm)->code()->pure_insts_size(), (*_masm)->code()->comments()); // make sure nobody can use _masm outside a CodeletMark lifespan *_masm = NULL; } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/oops/method.cpp --- a/src/share/vm/oops/method.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/oops/method.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -251,8 +251,12 @@ int Method::bci_from(address bcp) const { +#ifdef ASSERT + { ResourceMark rm; assert(is_native() && bcp == code_base() || contains(bcp) || is_error_reported(), err_msg("bcp doesn't belong to this method: bcp: " INTPTR_FORMAT ", method: %s", bcp, name_and_sig_as_C_string())); + } +#endif return bcp - code_base(); } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -85,7 +85,7 @@ "Max vector size in bytes, " \ "actual size could be less depending on elements type") \ \ - product(bool, AlignVector, false, \ + product(bool, AlignVector, true, \ "Perform vector store/load alignment in loop") \ \ product(intx, NumberOfLoopInstrToAlign, 4, \ @@ -535,7 +535,7 @@ notproduct(bool, TraceSpilling, false, \ "Trace spilling") \ \ - notproduct(bool, TraceTypeProfile, false, \ + diagnostic(bool, TraceTypeProfile, false, \ "Trace type profile") \ \ develop(bool, PoisonOSREntry, true, \ diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/classes.hpp --- a/src/share/vm/opto/classes.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/classes.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -83,6 +83,12 @@ macro(CompareAndSwapL) macro(CompareAndSwapP) macro(CompareAndSwapN) +macro(GetAndAddI) +macro(GetAndAddL) +macro(GetAndSetI) +macro(GetAndSetL) +macro(GetAndSetP) +macro(GetAndSetN) macro(Con) macro(ConN) macro(ConD) diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/compile.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -825,7 +825,8 @@ &_handler_table, &_inc_table, compiler, env()->comp_level(), - has_unsafe_access() + has_unsafe_access(), + SharedRuntime::is_wide_vector(max_vector_size()) ); } } @@ -963,6 +964,7 @@ _trap_can_recompile = false; // no traps emitted yet _major_progress = true; // start out assuming good things will happen set_has_unsafe_access(false); + set_max_vector_size(0); Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist)); set_decompile_count(0); @@ -2274,6 +2276,12 @@ case Op_CompareAndSwapL: case Op_CompareAndSwapP: case Op_CompareAndSwapN: + case Op_GetAndAddI: + case Op_GetAndAddL: + case Op_GetAndSetI: + case Op_GetAndSetL: + case Op_GetAndSetP: + case Op_GetAndSetN: case Op_StoreP: case Op_StoreN: case Op_LoadB: diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/compile.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -279,6 +279,7 @@ bool _has_split_ifs; // True if the method _may_ have some split-if bool _has_unsafe_access; // True if the method _may_ produce faults in unsafe loads or stores. bool _has_stringbuilder; // True StringBuffers or StringBuilders are allocated + int _max_vector_size; // Maximum size of generated vectors uint _trap_hist[trapHistLength]; // Cumulative traps bool _trap_can_recompile; // Have we emitted a recompiling trap? uint _decompile_count; // Cumulative decompilation counts. @@ -443,6 +444,8 @@ void set_has_unsafe_access(bool z) { _has_unsafe_access = z; } bool has_stringbuilder() const { return _has_stringbuilder; } void set_has_stringbuilder(bool z) { _has_stringbuilder = z; } + int max_vector_size() const { return _max_vector_size; } + void set_max_vector_size(int s) { _max_vector_size = s; } void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; } uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; } bool trap_can_recompile() const { return _trap_can_recompile; } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/connode.cpp --- a/src/share/vm/opto/connode.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/connode.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -480,7 +480,9 @@ opc == Op_CheckCastPP || opc == Op_StorePConditional || opc == Op_CompareAndSwapP || - opc == Op_CompareAndSwapN; + opc == Op_CompareAndSwapN || + opc == Op_GetAndSetP || + opc == Op_GetAndSetN; } return possible_alias; } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/doCall.cpp --- a/src/share/vm/opto/doCall.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/doCall.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -40,11 +40,10 @@ #include "prims/nativeLookup.hpp" #include "runtime/sharedRuntime.hpp" -#ifndef PRODUCT void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) { - if (TraceTypeProfile || PrintInlining || PrintOptoInlining) { + if (TraceTypeProfile || PrintInlining NOT_PRODUCT(|| PrintOptoInlining)) { if (!PrintInlining) { - if (!PrintOpto && !PrintCompilation) { + if (NOT_PRODUCT(!PrintOpto &&) !PrintCompilation) { method->print_short_name(); tty->cr(); } @@ -56,7 +55,6 @@ tty->cr(); } } -#endif CallGenerator* Compile::call_generator(ciMethod* callee, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, @@ -225,13 +223,13 @@ } if (miss_cg != NULL) { if (next_hit_cg != NULL) { - NOT_PRODUCT(trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1))); + trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1)); // We don't need to record dependency on a receiver here and below. // Whenever we inline, the dependency is added by Parse::Parse(). miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX); } if (miss_cg != NULL) { - NOT_PRODUCT(trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count)); + trace_type_profile(jvms->method(), jvms->depth() - 1, jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count); CallGenerator* cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0)); if (cg != NULL) return cg; } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/escape.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -282,6 +282,26 @@ return has_non_escaping_obj; } +// Utility function for nodes that load an object +void ConnectionGraph::add_objload_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist) { + // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because + // ThreadLocal has RawPtr type. + const Type* t = _igvn->type(n); + if (t->make_ptr() != NULL) { + Node* adr = n->in(MemNode::Address); +#ifdef ASSERT + if (!adr->is_AddP()) { + assert(_igvn->type(adr)->isa_rawptr(), "sanity"); + } else { + assert((ptnode_adr(adr->_idx) == NULL || + ptnode_adr(adr->_idx)->as_Field()->is_oop()), "sanity"); + } +#endif + add_local_var_and_edge(n, PointsToNode::NoEscape, + adr, delayed_worklist); + } +} + // Populate Connection Graph with PointsTo nodes and create simple // connection graph edges. void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist) { @@ -387,22 +407,7 @@ case Op_LoadP: case Op_LoadN: case Op_LoadPLocked: { - // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because - // ThreadLocal has RawPrt type. - const Type* t = igvn->type(n); - if (t->make_ptr() != NULL) { - Node* adr = n->in(MemNode::Address); -#ifdef ASSERT - if (!adr->is_AddP()) { - assert(igvn->type(adr)->isa_rawptr(), "sanity"); - } else { - assert((ptnode_adr(adr->_idx) == NULL || - ptnode_adr(adr->_idx)->as_Field()->is_oop()), "sanity"); - } -#endif - add_local_var_and_edge(n, PointsToNode::NoEscape, - adr, delayed_worklist); - } + add_objload_to_connection_graph(n, delayed_worklist); break; } case Op_Parm: { @@ -417,7 +422,7 @@ } case Op_Phi: { // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because - // ThreadLocal has RawPrt type. + // ThreadLocal has RawPtr type. const Type* t = n->as_Phi()->type(); if (t->make_ptr() != NULL) { add_local_var(n, PointsToNode::NoEscape); @@ -446,6 +451,11 @@ } break; } + case Op_GetAndSetP: + case Op_GetAndSetN: { + add_objload_to_connection_graph(n, delayed_worklist); + // fallthrough + } case Op_StoreP: case Op_StoreN: case Op_StorePConditional: @@ -585,7 +595,7 @@ case Op_LoadN: case Op_LoadPLocked: { // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because - // ThreadLocal has RawPrt type. + // ThreadLocal has RawPtr type. const Type* t = _igvn->type(n); if (t->make_ptr() != NULL) { Node* adr = n->in(MemNode::Address); @@ -596,7 +606,7 @@ } case Op_Phi: { // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because - // ThreadLocal has RawPrt type. + // ThreadLocal has RawPtr type. const Type* t = n->as_Phi()->type(); if (t->make_ptr() != NULL) { for (uint i = 1; i < n->req(); i++) { @@ -638,8 +648,16 @@ case Op_StoreN: case Op_StorePConditional: case Op_CompareAndSwapP: - case Op_CompareAndSwapN: { + case Op_CompareAndSwapN: + case Op_GetAndSetP: + case Op_GetAndSetN: { Node* adr = n->in(MemNode::Address); + if (opcode == Op_GetAndSetP || opcode == Op_GetAndSetN) { + const Type* t = _igvn->type(n); + if (t->make_ptr() != NULL) { + add_local_var_and_edge(n, PointsToNode::NoEscape, adr, NULL); + } + } const Type *adr_type = _igvn->type(adr); adr_type = adr_type->make_ptr(); if (adr_type->isa_oopptr() || diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/escape.hpp --- a/src/share/vm/opto/escape.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/escape.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -371,6 +371,8 @@ _nodes.at_put(n->_idx, ptn); } + // Utility function for nodes that load an object + void add_objload_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist); // Create PointsToNode node and add it to Connection Graph. void add_node_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/library_call.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -65,6 +65,8 @@ private: LibraryIntrinsic* _intrinsic; // the library intrinsic being called + const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr = false); + public: LibraryCallKit(JVMState* caller, LibraryIntrinsic* intrinsic) : GraphKit(caller), @@ -241,7 +243,8 @@ Node* src, Node* src_offset, Node* dest, Node* dest_offset, Node* copy_length, bool dest_uninitialized); - bool inline_unsafe_CAS(BasicType type); + typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind; + bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind); bool inline_unsafe_ordered_store(BasicType type); bool inline_fp_conversions(vmIntrinsics::ID id); bool inline_numberOfLeadingZeros(vmIntrinsics::ID id); @@ -290,6 +293,11 @@ case vmIntrinsics::_compareTo: case vmIntrinsics::_equals: case vmIntrinsics::_equalsC: + case vmIntrinsics::_getAndAddInt: + case vmIntrinsics::_getAndAddLong: + case vmIntrinsics::_getAndSetInt: + case vmIntrinsics::_getAndSetLong: + case vmIntrinsics::_getAndSetObject: break; // InlineNatives does not control String.compareTo case vmIntrinsics::_Reference_get: break; // InlineNatives does not control Reference.get @@ -369,6 +377,42 @@ // across safepoint since GC can change it value. break; + case vmIntrinsics::_compareAndSwapObject: +#ifdef _LP64 + if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return NULL; +#endif + break; + + case vmIntrinsics::_compareAndSwapLong: + if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return NULL; + break; + + case vmIntrinsics::_getAndAddInt: + if (!Matcher::match_rule_supported(Op_GetAndAddI)) return NULL; + break; + + case vmIntrinsics::_getAndAddLong: + if (!Matcher::match_rule_supported(Op_GetAndAddL)) return NULL; + break; + + case vmIntrinsics::_getAndSetInt: + if (!Matcher::match_rule_supported(Op_GetAndSetI)) return NULL; + break; + + case vmIntrinsics::_getAndSetLong: + if (!Matcher::match_rule_supported(Op_GetAndSetL)) return NULL; + break; + + case vmIntrinsics::_getAndSetObject: +#ifdef _LP64 + if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return NULL; + if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return NULL; + break; +#else + if (!Matcher::match_rule_supported(Op_GetAndSetP)) return NULL; + break; +#endif + default: assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility"); assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?"); @@ -620,11 +664,11 @@ return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static); case vmIntrinsics::_compareAndSwapObject: - return inline_unsafe_CAS(T_OBJECT); + return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg); case vmIntrinsics::_compareAndSwapInt: - return inline_unsafe_CAS(T_INT); + return inline_unsafe_load_store(T_INT, LS_cmpxchg); case vmIntrinsics::_compareAndSwapLong: - return inline_unsafe_CAS(T_LONG); + return inline_unsafe_load_store(T_LONG, LS_cmpxchg); case vmIntrinsics::_putOrderedObject: return inline_unsafe_ordered_store(T_OBJECT); @@ -633,6 +677,17 @@ case vmIntrinsics::_putOrderedLong: return inline_unsafe_ordered_store(T_LONG); + case vmIntrinsics::_getAndAddInt: + return inline_unsafe_load_store(T_INT, LS_xadd); + case vmIntrinsics::_getAndAddLong: + return inline_unsafe_load_store(T_LONG, LS_xadd); + case vmIntrinsics::_getAndSetInt: + return inline_unsafe_load_store(T_INT, LS_xchg); + case vmIntrinsics::_getAndSetLong: + return inline_unsafe_load_store(T_LONG, LS_xchg); + case vmIntrinsics::_getAndSetObject: + return inline_unsafe_load_store(T_OBJECT, LS_xchg); + case vmIntrinsics::_currentThread: return inline_native_currentThread(); case vmIntrinsics::_isInterrupted: @@ -2301,6 +2356,43 @@ // Interpret Unsafe.fieldOffset cookies correctly: extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset); +const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr) { + // Attempt to infer a sharper value type from the offset and base type. + ciKlass* sharpened_klass = NULL; + + // See if it is an instance field, with an object type. + if (alias_type->field() != NULL) { + assert(!is_native_ptr, "native pointer op cannot use a java address"); + if (alias_type->field()->type()->is_klass()) { + sharpened_klass = alias_type->field()->type()->as_klass(); + } + } + + // See if it is a narrow oop array. + if (adr_type->isa_aryptr()) { + if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes()) { + const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr(); + if (elem_type != NULL) { + sharpened_klass = elem_type->klass(); + } + } + } + + if (sharpened_klass != NULL) { + const TypeOopPtr* tjp = TypeOopPtr::make_from_klass(sharpened_klass); + +#ifndef PRODUCT + if (PrintIntrinsics || PrintInlining || PrintOptoInlining) { + tty->print(" from base type: "); adr_type->dump(); + tty->print(" sharpened value: "); tjp->dump(); + } +#endif + // Sharpen the value type. + return tjp; + } + return NULL; +} + bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile) { if (callee()->is_static()) return false; // caller must have the capability! @@ -2430,39 +2522,9 @@ offset != top() && heap_base_oop != top(); if (!is_store && type == T_OBJECT) { - // Attempt to infer a sharper value type from the offset and base type. - ciKlass* sharpened_klass = NULL; - - // See if it is an instance field, with an object type. - if (alias_type->field() != NULL) { - assert(!is_native_ptr, "native pointer op cannot use a java address"); - if (alias_type->field()->type()->is_klass()) { - sharpened_klass = alias_type->field()->type()->as_klass(); - } - } - - // See if it is a narrow oop array. - if (adr_type->isa_aryptr()) { - if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes()) { - const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr(); - if (elem_type != NULL) { - sharpened_klass = elem_type->klass(); - } - } - } - - if (sharpened_klass != NULL) { - const TypeOopPtr* tjp = TypeOopPtr::make_from_klass(sharpened_klass); - - // Sharpen the value type. + const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type, is_native_ptr); + if (tjp != NULL) { value_type = tjp; - -#ifndef PRODUCT - if (PrintIntrinsics || PrintInlining || PrintOptoInlining) { - tty->print(" from base type: "); adr_type->dump(); - tty->print(" sharpened value: "); value_type->dump(); - } -#endif } } @@ -2673,9 +2735,9 @@ return true; } -//----------------------------inline_unsafe_CAS---------------------------- - -bool LibraryCallKit::inline_unsafe_CAS(BasicType type) { +//----------------------------inline_unsafe_load_store---------------------------- + +bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind) { // This basic scheme here is the same as inline_unsafe_access, but // differs in enough details that combining them would make the code // overly confusing. (This is a true fact! I originally combined @@ -2686,37 +2748,47 @@ if (callee()->is_static()) return false; // caller must have the capability! #ifndef PRODUCT + BasicType rtype; { ResourceMark rm; - // Check the signatures. ciSignature* sig = signature(); + rtype = sig->return_type()->basic_type(); + if (kind == LS_xadd || kind == LS_xchg) { + // Check the signatures. #ifdef ASSERT - BasicType rtype = sig->return_type()->basic_type(); - assert(rtype == T_BOOLEAN, "CAS must return boolean"); - assert(sig->count() == 4, "CAS has 4 arguments"); - assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object"); - assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long"); + assert(rtype == type, "get and set must return the expected type"); + assert(sig->count() == 3, "get and set has 3 arguments"); + assert(sig->type_at(0)->basic_type() == T_OBJECT, "get and set base is object"); + assert(sig->type_at(1)->basic_type() == T_LONG, "get and set offset is long"); + assert(sig->type_at(2)->basic_type() == type, "get and set must take expected type as new value/delta"); #endif // ASSERT + } else if (kind == LS_cmpxchg) { + // Check the signatures. +#ifdef ASSERT + assert(rtype == T_BOOLEAN, "CAS must return boolean"); + assert(sig->count() == 4, "CAS has 4 arguments"); + assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object"); + assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long"); +#endif // ASSERT + } else { + ShouldNotReachHere(); + } } #endif //PRODUCT // number of stack slots per value argument (1 or 2) int type_words = type2size[type]; - // Cannot inline wide CAS on machines that don't support it natively - if (type2aelembytes(type) > BytesPerInt && !VM_Version::supports_cx8()) - return false; - C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - // Argument words: "this" plus oop plus offset plus oldvalue plus newvalue; - int nargs = 1 + 1 + 2 + type_words + type_words; - - // pop arguments: newval, oldval, offset, base, and receiver + // Argument words: "this" plus oop plus offset (plus oldvalue) plus newvalue/delta; + int nargs = 1 + 1 + 2 + ((kind == LS_cmpxchg) ? type_words : 0) + type_words; + + // pop arguments: newval, offset, base, and receiver debug_only(int saved_sp = _sp); _sp += nargs; Node* newval = (type_words == 1) ? pop() : pop_pair(); - Node* oldval = (type_words == 1) ? pop() : pop_pair(); + Node* oldval = (kind == LS_cmpxchg) ? ((type_words == 1) ? pop() : pop_pair()) : NULL; Node *offset = pop_pair(); Node *base = pop(); Node *receiver = pop(); @@ -2740,16 +2812,24 @@ Node* adr = make_unsafe_address(base, offset); const TypePtr *adr_type = _gvn.type(adr)->isa_ptr(); - // (Unlike inline_unsafe_access, there seems no point in trying - // to refine types. Just use the coarse types here. + // For CAS, unlike inline_unsafe_access, there seems no point in + // trying to refine types. Just use the coarse types here. const Type *value_type = Type::get_const_basic_type(type); Compile::AliasType* alias_type = C->alias_type(adr_type); assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here"); + + if (kind == LS_xchg && type == T_OBJECT) { + const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type); + if (tjp != NULL) { + value_type = tjp; + } + } + int alias_idx = C->get_alias_index(adr_type); - // Memory-model-wise, a CAS acts like a little synchronized block, - // so needs barriers on each side. These don't translate into - // actual barriers on most machines, but we still need rest of + // Memory-model-wise, a LoadStore acts like a little synchronized + // block, so needs barriers on each side. These don't translate + // into actual barriers on most machines, but we still need rest of // compiler to respect ordering. insert_mem_bar(Op_MemBarRelease); @@ -2762,13 +2842,29 @@ // For now, we handle only those cases that actually exist: ints, // longs, and Object. Adding others should be straightforward. - Node* cas; + Node* load_store; switch(type) { case T_INT: - cas = _gvn.transform(new (C, 5) CompareAndSwapINode(control(), mem, adr, newval, oldval)); + if (kind == LS_xadd) { + load_store = _gvn.transform(new (C, 4) GetAndAddINode(control(), mem, adr, newval, adr_type)); + } else if (kind == LS_xchg) { + load_store = _gvn.transform(new (C, 4) GetAndSetINode(control(), mem, adr, newval, adr_type)); + } else if (kind == LS_cmpxchg) { + load_store = _gvn.transform(new (C, 5) CompareAndSwapINode(control(), mem, adr, newval, oldval)); + } else { + ShouldNotReachHere(); + } break; case T_LONG: - cas = _gvn.transform(new (C, 5) CompareAndSwapLNode(control(), mem, adr, newval, oldval)); + if (kind == LS_xadd) { + load_store = _gvn.transform(new (C, 4) GetAndAddLNode(control(), mem, adr, newval, adr_type)); + } else if (kind == LS_xchg) { + load_store = _gvn.transform(new (C, 4) GetAndSetLNode(control(), mem, adr, newval, adr_type)); + } else if (kind == LS_cmpxchg) { + load_store = _gvn.transform(new (C, 5) CompareAndSwapLNode(control(), mem, adr, newval, oldval)); + } else { + ShouldNotReachHere(); + } break; case T_OBJECT: // Transformation of a value which could be NULL pointer (CastPP #NULL) @@ -2778,7 +2874,6 @@ newval = _gvn.makecon(TypePtr::NULL_PTR); // Reference stores need a store barrier. - // (They don't if CAS fails, but it isn't worth checking.) pre_barrier(true /* do_load*/, control(), base, adr, alias_idx, newval, value_type->make_oopptr(), NULL /* pre_val*/, @@ -2786,32 +2881,50 @@ #ifdef _LP64 if (adr->bottom_type()->is_ptr_to_narrowoop()) { Node *newval_enc = _gvn.transform(new (C, 2) EncodePNode(newval, newval->bottom_type()->make_narrowoop())); - Node *oldval_enc = _gvn.transform(new (C, 2) EncodePNode(oldval, oldval->bottom_type()->make_narrowoop())); - cas = _gvn.transform(new (C, 5) CompareAndSwapNNode(control(), mem, adr, - newval_enc, oldval_enc)); + if (kind == LS_xchg) { + load_store = _gvn.transform(new (C, 4) GetAndSetNNode(control(), mem, adr, + newval_enc, adr_type, value_type->make_narrowoop())); + } else { + assert(kind == LS_cmpxchg, "wrong LoadStore operation"); + Node *oldval_enc = _gvn.transform(new (C, 2) EncodePNode(oldval, oldval->bottom_type()->make_narrowoop())); + load_store = _gvn.transform(new (C, 5) CompareAndSwapNNode(control(), mem, adr, + newval_enc, oldval_enc)); + } } else #endif { - cas = _gvn.transform(new (C, 5) CompareAndSwapPNode(control(), mem, adr, newval, oldval)); + if (kind == LS_xchg) { + load_store = _gvn.transform(new (C, 4) GetAndSetPNode(control(), mem, adr, newval, adr_type, value_type->is_oopptr())); + } else { + assert(kind == LS_cmpxchg, "wrong LoadStore operation"); + load_store = _gvn.transform(new (C, 5) CompareAndSwapPNode(control(), mem, adr, newval, oldval)); + } } - post_barrier(control(), cas, base, adr, alias_idx, newval, T_OBJECT, true); + post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true); break; default: ShouldNotReachHere(); break; } - // SCMemProjNodes represent the memory state of CAS. Their main - // role is to prevent CAS nodes from being optimized away when their - // results aren't used. - Node* proj = _gvn.transform( new (C, 1) SCMemProjNode(cas)); + // SCMemProjNodes represent the memory state of a LoadStore. Their + // main role is to prevent LoadStore nodes from being optimized away + // when their results aren't used. + Node* proj = _gvn.transform( new (C, 1) SCMemProjNode(load_store)); set_memory(proj, alias_idx); // Add the trailing membar surrounding the access insert_mem_bar(Op_MemBarCPUOrder); insert_mem_bar(Op_MemBarAcquire); - push(cas); +#ifdef _LP64 + if (type == T_OBJECT && adr->bottom_type()->is_ptr_to_narrowoop() && kind == LS_xchg) { + load_store = _gvn.transform(new (C, 2) DecodeNNode(load_store, load_store->bottom_type()->make_ptr())); + } +#endif + + assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match"); + push_node(load_store->bottom_type()->basic_type(), load_store); return true; } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/matcher.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -2134,10 +2134,10 @@ case Op_CompareAndSwapP: case Op_CompareAndSwapN: { // Convert trinary to binary-tree Node *newval = n->in(MemNode::ValueIn ); - Node *oldval = n->in(LoadStoreNode::ExpectedIn); + Node *oldval = n->in(LoadStoreConditionalNode::ExpectedIn); Node *pair = new (C, 3) BinaryNode( oldval, newval ); n->set_req(MemNode::ValueIn,pair); - n->del_req(LoadStoreNode::ExpectedIn); + n->del_req(LoadStoreConditionalNode::ExpectedIn); break; } case Op_CMoveD: // Convert trinary to binary-tree diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/memnode.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -2552,14 +2552,38 @@ } //============================================================================= -LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : Node(5) { +//----------------------------------LoadStoreNode------------------------------ +LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required ) + : Node(required), + _type(rt), + _adr_type(at) +{ init_req(MemNode::Control, c ); init_req(MemNode::Memory , mem); init_req(MemNode::Address, adr); init_req(MemNode::ValueIn, val); - init_req( ExpectedIn, ex ); init_class_id(Class_LoadStore); - +} + +uint LoadStoreNode::ideal_reg() const { + return _type->ideal_reg(); +} + +bool LoadStoreNode::result_not_used() const { + for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) { + Node *x = fast_out(i); + if (x->Opcode() == Op_SCMemProj) continue; + return false; + } + return true; +} + +uint LoadStoreNode::size_of() const { return sizeof(*this); } + +//============================================================================= +//----------------------------------LoadStoreConditionalNode-------------------- +LoadStoreConditionalNode::LoadStoreConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : LoadStoreNode(c, mem, adr, val, NULL, TypeInt::BOOL, 5) { + init_req(ExpectedIn, ex ); } //============================================================================= diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/memnode.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -657,23 +657,36 @@ //------------------------------LoadStoreNode--------------------------- // Note: is_Mem() method returns 'true' for this class. class LoadStoreNode : public Node { +private: + const Type* const _type; // What kind of value is loaded? + const TypePtr* _adr_type; // What kind of memory is being addressed? + virtual uint size_of() const; // Size is bigger +public: + LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required ); + virtual bool depends_only_on_test() const { return false; } + virtual uint match_edge(uint idx) const { return idx == MemNode::Address || idx == MemNode::ValueIn; } + + virtual const Type *bottom_type() const { return _type; } + virtual uint ideal_reg() const; + virtual const class TypePtr *adr_type() const { return _adr_type; } // returns bottom_type of address + + bool result_not_used() const; +}; + +class LoadStoreConditionalNode : public LoadStoreNode { public: enum { ExpectedIn = MemNode::ValueIn+1 // One more input than MemNode }; - LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex); - virtual bool depends_only_on_test() const { return false; } - virtual const Type *bottom_type() const { return TypeInt::BOOL; } - virtual uint ideal_reg() const { return Op_RegI; } - virtual uint match_edge(uint idx) const { return idx == MemNode::Address || idx == MemNode::ValueIn; } + LoadStoreConditionalNode(Node *c, Node *mem, Node *adr, Node *val, Node *ex); }; //------------------------------StorePConditionalNode--------------------------- // Conditionally store pointer to memory, if no change since prior // load-locked. Sets flags for success or failure of the store. -class StorePConditionalNode : public LoadStoreNode { +class StorePConditionalNode : public LoadStoreConditionalNode { public: - StorePConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { } + StorePConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreConditionalNode(c, mem, adr, val, ll) { } virtual int Opcode() const; // Produces flags virtual uint ideal_reg() const { return Op_RegFlags; } @@ -682,9 +695,9 @@ //------------------------------StoreIConditionalNode--------------------------- // Conditionally store int to memory, if no change since prior // load-locked. Sets flags for success or failure of the store. -class StoreIConditionalNode : public LoadStoreNode { +class StoreIConditionalNode : public LoadStoreConditionalNode { public: - StoreIConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ii ) : LoadStoreNode(c, mem, adr, val, ii) { } + StoreIConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ii ) : LoadStoreConditionalNode(c, mem, adr, val, ii) { } virtual int Opcode() const; // Produces flags virtual uint ideal_reg() const { return Op_RegFlags; } @@ -693,9 +706,9 @@ //------------------------------StoreLConditionalNode--------------------------- // Conditionally store long to memory, if no change since prior // load-locked. Sets flags for success or failure of the store. -class StoreLConditionalNode : public LoadStoreNode { +class StoreLConditionalNode : public LoadStoreConditionalNode { public: - StoreLConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { } + StoreLConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreConditionalNode(c, mem, adr, val, ll) { } virtual int Opcode() const; // Produces flags virtual uint ideal_reg() const { return Op_RegFlags; } @@ -703,32 +716,75 @@ //------------------------------CompareAndSwapLNode--------------------------- -class CompareAndSwapLNode : public LoadStoreNode { +class CompareAndSwapLNode : public LoadStoreConditionalNode { public: - CompareAndSwapLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { } + CompareAndSwapLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { } virtual int Opcode() const; }; //------------------------------CompareAndSwapINode--------------------------- -class CompareAndSwapINode : public LoadStoreNode { +class CompareAndSwapINode : public LoadStoreConditionalNode { public: - CompareAndSwapINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { } + CompareAndSwapINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { } virtual int Opcode() const; }; //------------------------------CompareAndSwapPNode--------------------------- -class CompareAndSwapPNode : public LoadStoreNode { +class CompareAndSwapPNode : public LoadStoreConditionalNode { public: - CompareAndSwapPNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { } + CompareAndSwapPNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { } virtual int Opcode() const; }; //------------------------------CompareAndSwapNNode--------------------------- -class CompareAndSwapNNode : public LoadStoreNode { +class CompareAndSwapNNode : public LoadStoreConditionalNode { +public: + CompareAndSwapNNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { } + virtual int Opcode() const; +}; + +//------------------------------GetAndAddINode--------------------------- +class GetAndAddINode : public LoadStoreNode { +public: + GetAndAddINode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at ) : LoadStoreNode(c, mem, adr, val, at, TypeInt::INT, 4) { } + virtual int Opcode() const; +}; + +//------------------------------GetAndAddLNode--------------------------- +class GetAndAddLNode : public LoadStoreNode { public: - CompareAndSwapNNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { } + GetAndAddLNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at ) : LoadStoreNode(c, mem, adr, val, at, TypeLong::LONG, 4) { } + virtual int Opcode() const; +}; + + +//------------------------------GetAndSetINode--------------------------- +class GetAndSetINode : public LoadStoreNode { +public: + GetAndSetINode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at ) : LoadStoreNode(c, mem, adr, val, at, TypeInt::INT, 4) { } + virtual int Opcode() const; +}; + +//------------------------------GetAndSetINode--------------------------- +class GetAndSetLNode : public LoadStoreNode { +public: + GetAndSetLNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at ) : LoadStoreNode(c, mem, adr, val, at, TypeLong::LONG, 4) { } + virtual int Opcode() const; +}; + +//------------------------------GetAndSetPNode--------------------------- +class GetAndSetPNode : public LoadStoreNode { +public: + GetAndSetPNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t ) : LoadStoreNode(c, mem, adr, val, at, t, 4) { } + virtual int Opcode() const; +}; + +//------------------------------GetAndSetNNode--------------------------- +class GetAndSetNNode : public LoadStoreNode { +public: + GetAndSetNNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* t ) : LoadStoreNode(c, mem, adr, val, at, t, 4) { } virtual int Opcode() const; }; diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/output.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -1869,7 +1869,9 @@ if (!do_scheduling()) return; - assert(MaxVectorSize <= 8, "scheduling code works only with pairs"); + // Scheduling code works only with pairs (8 bytes) maximum. + if (max_vector_size() > 8) + return; NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); ) diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/superword.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -179,6 +179,7 @@ for (int i = 0; i < _block.length(); i++) { Node* n = _block.at(i); if (n->is_Mem() && !n->is_LoadStore() && in_bb(n) && + n->Opcode() != Op_LoadUI2L && is_java_primitive(n->as_Mem()->memory_type())) { int align = memory_alignment(n->as_Mem(), 0); if (align != bottom_align) { @@ -481,12 +482,19 @@ int vw = vector_width_in_bytes(mem_ref); assert(vw > 1, "sanity"); int stride_sign = (scale * iv_stride()) > 0 ? 1 : -1; - int iv_adjustment = (stride_sign * vw - (offset % vw)) % vw; + // At least one iteration is executed in pre-loop by default. As result + // several iterations are needed to align memory operations in main-loop even + // if offset is 0. + int iv_adjustment_in_bytes = (stride_sign * vw - (offset % vw)); + int elt_size = align_to_ref_p.memory_size(); + assert(((ABS(iv_adjustment_in_bytes) % elt_size) == 0), + err_msg_res("(%d) should be divisible by (%d)", iv_adjustment_in_bytes, elt_size)); + int iv_adjustment = iv_adjustment_in_bytes/elt_size; #ifndef PRODUCT if (TraceSuperWord) tty->print_cr("\noffset = %d iv_adjust = %d elt_size = %d scale = %d iv_stride = %d vect_size %d", - offset, iv_adjustment, align_to_ref_p.memory_size(), scale, iv_stride(), vw); + offset, iv_adjustment, elt_size, scale, iv_stride(), vw); #endif return iv_adjustment; } @@ -1350,11 +1358,14 @@ insert_extracts(_packset.at(i)); } + Compile* C = _phase->C; + uint max_vlen_in_bytes = 0; for (int i = 0; i < _block.length(); i++) { Node* n = _block.at(i); Node_List* p = my_pack(n); if (p && n == executed_last(p)) { uint vlen = p->size(); + uint vlen_in_bytes = 0; Node* vn = NULL; Node* low_adr = p->at(0); Node* first = executed_first(p); @@ -1364,7 +1375,8 @@ Node* mem = first->in(MemNode::Memory); Node* adr = low_adr->in(MemNode::Address); const TypePtr* atyp = n->adr_type(); - vn = LoadVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n)); + vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n)); + vlen_in_bytes = vn->as_LoadVector()->memory_size(); } else if (n->is_Store()) { // Promote value to be stored to vector Node* val = vector_opd(p, MemNode::ValueIn); @@ -1372,7 +1384,8 @@ Node* mem = first->in(MemNode::Memory); Node* adr = low_adr->in(MemNode::Address); const TypePtr* atyp = n->adr_type(); - vn = StoreVectorNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen); + vn = StoreVectorNode::make(C, opc, ctl, mem, adr, atyp, val, vlen); + vlen_in_bytes = vn->as_StoreVector()->memory_size(); } else if (n->req() == 3) { // Promote operands to vector Node* in1 = vector_opd(p, 1); @@ -1383,7 +1396,8 @@ in1 = in2; in2 = tmp; } - vn = VectorNode::make(_phase->C, opc, in1, in2, vlen, velt_basic_type(n)); + vn = VectorNode::make(C, opc, in1, in2, vlen, velt_basic_type(n)); + vlen_in_bytes = vn->as_Vector()->length_in_bytes(); } else { ShouldNotReachHere(); } @@ -1395,6 +1409,10 @@ _igvn.replace_node(pm, vn); } _igvn._worklist.push(vn); + + if (vlen_in_bytes > max_vlen_in_bytes) { + max_vlen_in_bytes = vlen_in_bytes; + } #ifdef ASSERT if (TraceNewVectors) { tty->print("new Vector node: "); @@ -1403,6 +1421,7 @@ #endif } } + C->set_max_vector_size(max_vlen_in_bytes); } //------------------------------vector_opd--------------------------- @@ -1439,7 +1458,7 @@ } assert(opd->bottom_type()->isa_int(), "int type only"); // Move non constant shift count into XMM register. - cnt = new (_phase->C, 2) MoveI2FNode(cnt); + cnt = new (C, 2) MoveI2FNode(cnt); } if (cnt != opd) { _phase->_igvn.register_new_node_with_optimizer(cnt); @@ -1480,10 +1499,10 @@ _phase->_igvn.register_new_node_with_optimizer(pk); _phase->set_ctrl(pk, _phase->get_ctrl(opd)); #ifdef ASSERT - if (TraceNewVectors) { - tty->print("new Vector node: "); - pk->dump(); - } + if (TraceNewVectors) { + tty->print("new Vector node: "); + pk->dump(); + } #endif return pk; } @@ -1805,7 +1824,7 @@ //------------------------------memory_alignment--------------------------- // Alignment within a vector memory reference -int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) { +int SuperWord::memory_alignment(MemNode* s, int iv_adjust) { SWPointer p(s, this); if (!p.valid()) { return bottom_align; @@ -1815,7 +1834,7 @@ return bottom_align; // No vectors for this type } int offset = p.offset_in_bytes(); - offset += iv_adjust_in_bytes; + offset += iv_adjust*p.memory_size(); int off_rem = offset % vw; int off_mod = off_rem >= 0 ? off_rem : off_rem + vw; return off_mod; @@ -1838,7 +1857,7 @@ bool SuperWord::same_velt_type(Node* n1, Node* n2) { const Type* vt1 = velt_type(n1); - const Type* vt2 = velt_type(n1); + const Type* vt2 = velt_type(n2); if (vt1->basic_type() == T_INT && vt2->basic_type() == T_INT) { // Compare vectors element sizes for integer types. return data_size(n1) == data_size(n2); diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/opto/superword.hpp --- a/src/share/vm/opto/superword.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/opto/superword.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -400,7 +400,7 @@ // Return the node executed last in pack p. Node* executed_last(Node_List* p); // Alignment within a vector memory reference - int memory_alignment(MemNode* s, int iv_adjust_in_bytes); + int memory_alignment(MemNode* s, int iv_adjust); // (Start, end] half-open range defining which operands are vector void vector_opd_range(Node* n, uint* start, uint* end); // Smallest type containing range of values diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/runtime/sharedRuntime.cpp --- a/src/share/vm/runtime/sharedRuntime.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/runtime/sharedRuntime.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -88,6 +88,7 @@ RuntimeStub* SharedRuntime::_resolve_static_call_blob; DeoptimizationBlob* SharedRuntime::_deopt_blob; +SafepointBlob* SharedRuntime::_polling_page_vectors_safepoint_handler_blob; SafepointBlob* SharedRuntime::_polling_page_safepoint_handler_blob; SafepointBlob* SharedRuntime::_polling_page_return_handler_blob; @@ -104,8 +105,14 @@ _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C), "resolve_virtual_call"); _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C), "resolve_static_call"); - _polling_page_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), false); - _polling_page_return_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), true); +#ifdef COMPILER2 + // Vectors are generated only by C2. + if (is_wide_vector(MaxVectorSize)) { + _polling_page_vectors_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_VECTOR_LOOP); + } +#endif // COMPILER2 + _polling_page_safepoint_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_LOOP); + _polling_page_return_handler_blob = generate_handler_blob(CAST_FROM_FN_PTR(address, SafepointSynchronize::handle_polling_page_exception), POLL_AT_RETURN); generate_deopt_blob(); @@ -535,10 +542,15 @@ "Only polling locations are used for safepoint"); bool at_poll_return = ((nmethod*)cb)->is_at_poll_return(pc); + bool has_wide_vectors = ((nmethod*)cb)->has_wide_vectors(); if (at_poll_return) { assert(SharedRuntime::polling_page_return_handler_blob() != NULL, "polling page return stub not created yet"); stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + } else if (has_wide_vectors) { + assert(SharedRuntime::polling_page_vectors_safepoint_handler_blob() != NULL, + "polling page vectors safepoint stub not created yet"); + stub = SharedRuntime::polling_page_vectors_safepoint_handler_blob()->entry_point(); } else { assert(SharedRuntime::polling_page_safepoint_handler_blob() != NULL, "polling page safepoint stub not created yet"); @@ -1618,6 +1630,31 @@ return callee_method; } +#ifdef ASSERT +void SharedRuntime::check_member_name_argument_is_last_argument(methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + ResourceMark rm; + const int total_args_passed = method->size_of_parameters(); + const VMRegPair* regs_with_member_name = regs; + VMRegPair* regs_without_member_name = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed - 1); + + const int member_arg_pos = total_args_passed - 1; + assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); + assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); + + const bool is_outgoing = method->is_method_handle_intrinsic(); + int comp_args_on_stack = java_calling_convention(sig_bt, regs_without_member_name, total_args_passed - 1, is_outgoing); + + for (int i = 0; i < member_arg_pos; i++) { + VMReg a = regs_with_member_name[i].first(); + VMReg b = regs_without_member_name[i].first(); + assert(a->value() == b->value(), err_msg_res("register allocation mismatch: a=%d, b=%d", a->value(), b->value())); + } + assert(regs_with_member_name[member_arg_pos].first()->is_valid(), "bad member arg"); +} +#endif + // --------------------------------------------------------------------------- // We are calling the interpreter via a c2i. Normally this would mean that // we were called by a compiled method. However we could have lost a race @@ -2423,6 +2460,7 @@ #ifndef PRODUCT // debugging suppport if (PrintAdapterHandlers || PrintStubCode) { + ttyLocker ttyl; entry->print_adapter_on(tty); tty->print_cr("i2c argument handler #%d for: %s %s (%d bytes generated)", _adapters->number_of_entries(), (method->is_static() ? "static" : "receiver"), @@ -2430,8 +2468,10 @@ tty->print_cr("c2i argument handler starts at %p",entry->get_c2i_entry()); if (Verbose || PrintStubCode) { address first_pc = entry->base_address(); - if (first_pc != NULL) + if (first_pc != NULL) { Disassembler::decode(first_pc, first_pc + insts_size); + tty->cr(); + } } } #endif @@ -2546,10 +2586,10 @@ MacroAssembler _masm(&buffer); // Fill in the signature array, for the calling-convention call. - int total_args_passed = method->size_of_parameters(); - - BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType,total_args_passed); - VMRegPair* regs = NEW_RESOURCE_ARRAY(VMRegPair,total_args_passed); + const int total_args_passed = method->size_of_parameters(); + + BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); + VMRegPair* regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); int i=0; if( !method->is_static() ) // Pass in receiver first sig_bt[i++] = T_OBJECT; @@ -2559,7 +2599,7 @@ if( ss.type() == T_LONG || ss.type() == T_DOUBLE ) sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots } - assert( i==total_args_passed, "" ); + assert(i == total_args_passed, ""); BasicType ret_type = ss.type(); // Now get the compiled-Java layout as input (or output) arguments. @@ -2572,9 +2612,8 @@ nm = SharedRuntime::generate_native_wrapper(&_masm, method, compile_id, - total_args_passed, - comp_args_on_stack, - sig_bt,regs, + sig_bt, + regs, ret_type); } } diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/runtime/sharedRuntime.hpp --- a/src/share/vm/runtime/sharedRuntime.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/runtime/sharedRuntime.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -62,6 +62,7 @@ static DeoptimizationBlob* _deopt_blob; + static SafepointBlob* _polling_page_vectors_safepoint_handler_blob; static SafepointBlob* _polling_page_safepoint_handler_blob; static SafepointBlob* _polling_page_return_handler_blob; @@ -75,7 +76,8 @@ #endif // !PRODUCT private: - static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return); + enum { POLL_AT_RETURN, POLL_AT_LOOP, POLL_AT_VECTOR_LOOP }; + static SafepointBlob* generate_handler_blob(address call_ptr, int poll_type); static RuntimeStub* generate_resolve_blob(address destination, const char* name); public: @@ -223,6 +225,7 @@ static SafepointBlob* polling_page_return_handler_blob() { return _polling_page_return_handler_blob; } static SafepointBlob* polling_page_safepoint_handler_blob() { return _polling_page_safepoint_handler_blob; } + static SafepointBlob* polling_page_vectors_safepoint_handler_blob() { return _polling_page_vectors_safepoint_handler_blob; } // Counters #ifndef PRODUCT @@ -345,7 +348,11 @@ // the bottom of the frame the first 16 words will be skipped and SharedInfo::stack0 // will be just above it. ( // return value is the maximum number of VMReg stack slots the convention will use. - static int java_calling_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed, int is_outgoing); + static int java_calling_convention(const BasicType* sig_bt, VMRegPair* regs, int total_args_passed, int is_outgoing); + + static void check_member_name_argument_is_last_argument(methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) NOT_DEBUG_RETURN; // Ditto except for calling C static int c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed); @@ -412,6 +419,10 @@ // when an interrupt occurs. static uint out_preserve_stack_slots(); + // Is vector's size (in bytes) bigger than a size saved by default? + // For example, on x86 16 bytes XMM registers are saved by default. + static bool is_wide_vector(int size); + // Save and restore a native result static void save_native_result(MacroAssembler *_masm, BasicType ret_type, int frame_slots ); static void restore_native_result(MacroAssembler *_masm, BasicType ret_type, int frame_slots ); @@ -425,13 +436,11 @@ // The wrapper may contain special-case code if the given method // is a JNI critical method, or a compiled method handle adapter, // such as _invokeBasic, _linkToVirtual, etc. - static nmethod *generate_native_wrapper(MacroAssembler* masm, + static nmethod* generate_native_wrapper(MacroAssembler* masm, methodHandle method, int compile_id, - int total_args_passed, - int max_arg, - BasicType *sig_bt, - VMRegPair *regs, + BasicType* sig_bt, + VMRegPair* regs, BasicType ret_type ); // Block before entering a JNI critical method diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/runtime/vm_version.cpp --- a/src/share/vm/runtime/vm_version.cpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/runtime/vm_version.cpp Mon Sep 24 14:46:06 2012 -0700 @@ -45,6 +45,10 @@ const char* Abstract_VM_Version::_s_vm_release = Abstract_VM_Version::vm_release(); const char* Abstract_VM_Version::_s_internal_vm_info_string = Abstract_VM_Version::internal_vm_info_string(); bool Abstract_VM_Version::_supports_cx8 = false; +bool Abstract_VM_Version::_supports_atomic_getset4 = false; +bool Abstract_VM_Version::_supports_atomic_getset8 = false; +bool Abstract_VM_Version::_supports_atomic_getadd4 = false; +bool Abstract_VM_Version::_supports_atomic_getadd8 = false; unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U; int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0; diff -r 04ed664b7e30 -r c92f43386117 src/share/vm/runtime/vm_version.hpp --- a/src/share/vm/runtime/vm_version.hpp Fri Sep 21 14:39:56 2012 -0700 +++ b/src/share/vm/runtime/vm_version.hpp Mon Sep 24 14:46:06 2012 -0700 @@ -37,6 +37,10 @@ static const char* _s_internal_vm_info_string; // These are set by machine-dependent initializations static bool _supports_cx8; + static bool _supports_atomic_getset4; + static bool _supports_atomic_getset8; + static bool _supports_atomic_getadd4; + static bool _supports_atomic_getadd8; static unsigned int _logical_processors_per_package; static int _vm_major_version; static int _vm_minor_version; @@ -75,6 +79,13 @@ // does HW support an 8-byte compare-exchange operation? static bool supports_cx8() {return _supports_cx8;} + // does HW support atomic get-and-set or atomic get-and-add? Used + // to guide intrinsification decisions for Unsafe atomic ops + static bool supports_atomic_getset4() {return _supports_atomic_getset4;} + static bool supports_atomic_getset8() {return _supports_atomic_getset8;} + static bool supports_atomic_getadd4() {return _supports_atomic_getadd4;} + static bool supports_atomic_getadd8() {return _supports_atomic_getadd8;} + static unsigned int logical_processors_per_package() { return _logical_processors_per_package; } diff -r 04ed664b7e30 -r c92f43386117 test/compiler/7196199/Test7196199.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7196199/Test7196199.java Mon Sep 24 14:46:06 2012 -0700 @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7196199 + * @summary java/text/Bidi/Bug6665028.java failed: Bidi run count incorrect + * + * @run main/othervm/timeout=400 -Xmx32m -Xbatch -XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation -XX:CompileCommand=exclude,Test7196199.test -XX:+SafepointALot -XX:GuaranteedSafepointInterval=100 Test7196199 + */ + + +public class Test7196199 { + private static final int ARRLEN = 97; + private static final int ITERS = 5000; + private static final int INI_ITERS = 1000; + private static final int SFP_ITERS = 10000; + private static final float SFP_ITERS_F = 10000.f; + private static final float VALUE = 15.f; + public static void main(String args[]) { + int errn = test(); + if (errn > 0) { + System.err.println("FAILED: " + errn + " errors"); + System.exit(97); + } + System.out.println("PASSED"); + } + + static int test() { + float[] a0 = new float[ARRLEN]; + float[] a1 = new float[ARRLEN]; + // Initialize + for (int i=0; i 0) + return errn; + + System.out.println("Time"); + long start, end; + + start = System.currentTimeMillis(); + for (int i=0; i