# HG changeset patch # User trims # Date 1261565851 28800 # Node ID 9749fbc4859bd2afa2d435c532ac21d8143d38d3 # Parent 920875ae1277b3cf3703bc7dc61e7c2e15d9bcf5# Parent 7ac7d558e8958df2f908432ec72b5e9e247e3871 Merge diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/assembler_x86.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -7666,7 +7666,7 @@ #ifdef ASSERT Label L; - testl(tmp, tmp); + testptr(tmp, tmp); jccb(Assembler::notZero, L); hlt(); bind(L); diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/interp_masm_x86_32.cpp --- a/src/cpu/x86/vm/interp_masm_x86_32.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -196,6 +196,9 @@ } else { assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic"); movl(reg, Address(rsi, bcp_offset)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line"); notl(reg); // convert to plain index } diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/interp_masm_x86_64.cpp --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -185,12 +185,30 @@ } +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + bool giant_index) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (!giant_index) { + load_unsigned_short(index, Address(r13, bcp_offset)); + } else { + assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic"); + movl(index, Address(r13, bcp_offset)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line"); + notl(index); // convert to plain index + } +} + + void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, - int bcp_offset) { - assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + int bcp_offset, + bool giant_index) { assert(cache != index, "must use different registers"); - load_unsigned_short(index, Address(r13, bcp_offset)); + get_cache_index_at_bcp(index, bcp_offset, giant_index); movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize)); assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); // convert from field index to ConstantPoolCacheEntry index @@ -200,10 +218,10 @@ void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register tmp, - int bcp_offset) { - assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + int bcp_offset, + bool giant_index) { assert(cache != tmp, "must use different register"); - load_unsigned_short(tmp, Address(r13, bcp_offset)); + get_cache_index_at_bcp(tmp, bcp_offset, giant_index); assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); // convert from field index to ConstantPoolCacheEntry index // and from word offset to byte offset @@ -1236,7 +1254,8 @@ void InterpreterMacroAssembler::profile_virtual_call(Register receiver, Register mdp, - Register reg2) { + Register reg2, + bool receiver_can_be_null) { if (ProfileInterpreter) { Label profile_continue; @@ -1246,8 +1265,15 @@ // We are making a call. Increment the count. increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + Label skip_receiver_profile; + if (receiver_can_be_null) { + testptr(receiver, receiver); + jcc(Assembler::zero, skip_receiver_profile); + } + // Record the receiver type. record_klass_in_profile(receiver, mdp, reg2); + bind(skip_receiver_profile); // The method data pointer needs to be updated to reflect the new target. update_mdp_by_constant(mdp, diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/interp_masm_x86_64.hpp --- a/src/cpu/x86/vm/interp_masm_x86_64.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/interp_masm_x86_64.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -95,9 +95,10 @@ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); void get_cache_and_index_at_bcp(Register cache, Register index, - int bcp_offset); + int bcp_offset, bool giant_index = false); void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, - int bcp_offset); + int bcp_offset, bool giant_index = false); + void get_cache_index_at_bcp(Register index, int bcp_offset, bool giant_index = false); void pop_ptr(Register r = rax); @@ -236,7 +237,8 @@ void profile_call(Register mdp); void profile_final_call(Register mdp); void profile_virtual_call(Register receiver, Register mdp, - Register scratch2); + Register scratch2, + bool receiver_can_be_null = false); void profile_ret(Register return_bci, Register mdp); void profile_null_seen(Register mdp); void profile_typecheck(Register mdp, Register klass, Register scratch); diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/interpreter_x86_64.cpp --- a/src/cpu/x86/vm/interpreter_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/interpreter_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -277,12 +277,11 @@ address entry_point = __ pc(); // abstract method entry - // remove return address. Not really needed, since exception - // handling throws away expression stack - __ pop(rbx); - // adjust stack to what a normal return would do - __ mov(rsp, r13); + // pop return address, reset last_sp to NULL + __ empty_expression_stack(); + __ restore_bcp(); // rsi must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) // throw exception __ call_VM(noreg, CAST_FROM_FN_PTR(address, @@ -300,7 +299,10 @@ if (!EnableMethodHandles) { return generate_abstract_entry(); } - return generate_abstract_entry(); //6815692// + + address entry_point = MethodHandles::generate_method_handle_interpreter_entry(_masm); + + return entry_point; } diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/methodHandles_x86.cpp --- a/src/cpu/x86/vm/methodHandles_x86.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -448,7 +448,7 @@ rbx_index, Address::times_ptr, base + vtableEntry::method_offset_in_bytes()); Register rbx_method = rbx_temp; - __ movl(rbx_method, vtable_entry_addr); + __ movptr(rbx_method, vtable_entry_addr); __ verify_oop(rbx_method); __ jmp(rbx_method_fie); diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -2935,6 +2935,16 @@ // arraycopy stubs used by compilers generate_arraycopy_stubs(); + + // generic method handle stubs + if (EnableMethodHandles && SystemDictionary::MethodHandle_klass() != NULL) { + for (MethodHandles::EntryKind ek = MethodHandles::_EK_FIRST; + ek < MethodHandles::_EK_LIMIT; + ek = MethodHandles::EntryKind(1 + (int)ek)) { + StubCodeMark mark(this, "MethodHandle", MethodHandles::entry_name(ek)); + MethodHandles::generate_method_handle_stub(_masm, ek); + } + } } public: diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -100,21 +100,26 @@ return entry; } -// Arguments are: required type in rarg1, failing object (or NULL) in rarg2 +// Arguments are: required type at TOS+8, failing object (or NULL) at TOS+4. address TemplateInterpreterGenerator::generate_WrongMethodType_handler() { address entry = __ pc(); __ pop(c_rarg2); // failing object is at TOS __ pop(c_rarg1); // required type is at TOS+8 - // expression stack must be empty before entering the VM if an - // exception happened + __ verify_oop(c_rarg1); + __ verify_oop(c_rarg2); + + // Various method handle types use interpreter registers as temps. + __ restore_bcp(); + __ restore_locals(); + + // Expression stack must be empty before entering the VM for an exception. __ empty_expression_stack(); __ call_VM(noreg, CAST_FROM_FN_PTR(address, - InterpreterRuntime:: - throw_WrongMethodTypeException), + InterpreterRuntime::throw_WrongMethodTypeException), // pass required type, failing object (or NULL) c_rarg1, c_rarg2); return entry; @@ -182,15 +187,29 @@ __ restore_bcp(); __ restore_locals(); - __ get_cache_and_index_at_bcp(rbx, rcx, 1); + Label L_got_cache, L_giant_index; + if (EnableInvokeDynamic) { + __ cmpb(Address(r13, 0), Bytecodes::_invokedynamic); + __ jcc(Assembler::equal, L_giant_index); + } + __ get_cache_and_index_at_bcp(rbx, rcx, 1, false); + __ bind(L_got_cache); __ movl(rbx, Address(rbx, rcx, - Address::times_8, + Address::times_ptr, in_bytes(constantPoolCacheOopDesc::base_offset()) + 3 * wordSize)); __ andl(rbx, 0xFF); if (TaggedStackInterpreter) __ shll(rbx, 1); // 2 slots per parameter. __ lea(rsp, Address(rsp, rbx, Address::times_8)); __ dispatch_next(state, step); + + // out of the main line of code... + if (EnableInvokeDynamic) { + __ bind(L_giant_index); + __ get_cache_and_index_at_bcp(rbx, rcx, 1, true); + __ jmp(L_got_cache); + } + return entry; } diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/templateTable_x86_32.cpp --- a/src/cpu/x86/vm/templateTable_x86_32.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -3146,7 +3146,6 @@ __ profile_call(rsi); } - Label handle_unlinked_site; __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx))); __ null_check(rcx); __ prepare_to_jump_from_interpreted(); diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/templateTable_x86_64.cpp --- a/src/cpu/x86/vm/templateTable_x86_64.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -203,18 +203,15 @@ __ jcc(Assembler::notEqual, fast_patch); __ get_method(scratch); // Let breakpoint table handling rewrite to quicker bytecode - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::set_original_bytecode_at), - scratch, r13, bc); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, r13, bc); #ifndef ASSERT __ jmpb(patch_done); +#else + __ jmp(patch_done); +#endif __ bind(fast_patch); } -#else - __ jmp(patch_done); - __ bind(fast_patch); - } +#ifdef ASSERT Label okay; __ load_unsigned_byte(scratch, at_bcp(0)); __ cmpl(scratch, (int) Bytecodes::java_code(bytecode)); @@ -2054,26 +2051,28 @@ } } -void TemplateTable::resolve_cache_and_index(int byte_no, - Register Rcache, - Register index) { +void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) { assert(byte_no == 1 || byte_no == 2, "byte_no out of range"); + bool is_invokedynamic = (bytecode() == Bytecodes::_invokedynamic); const Register temp = rbx; assert_different_registers(Rcache, index, temp); const int shift_count = (1 + byte_no) * BitsPerByte; Label resolved; - __ get_cache_and_index_at_bcp(Rcache, index, 1); - __ movl(temp, Address(Rcache, - index, Address::times_8, - constantPoolCacheOopDesc::base_offset() + - ConstantPoolCacheEntry::indices_offset())); - __ shrl(temp, shift_count); - // have we resolved this bytecode? - __ andl(temp, 0xFF); - __ cmpl(temp, (int) bytecode()); - __ jcc(Assembler::equal, resolved); + __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic); + if (is_invokedynamic) { + // we are resolved if the f1 field contains a non-null CallSite object + __ cmpptr(Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()), (int32_t) NULL_WORD); + __ jcc(Assembler::notEqual, resolved); + } else { + __ movl(temp, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset())); + __ shrl(temp, shift_count); + // have we resolved this bytecode? + __ andl(temp, 0xFF); + __ cmpl(temp, (int) bytecode()); + __ jcc(Assembler::equal, resolved); + } // resolve first time through address entry; @@ -2090,6 +2089,9 @@ case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); break; + case Bytecodes::_invokedynamic: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); + break; default: ShouldNotReachHere(); break; @@ -2098,7 +2100,7 @@ __ call_VM(noreg, entry, temp); // Update registers with resolved info - __ get_cache_and_index_at_bcp(Rcache, index, 1); + __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic); __ bind(resolved); } @@ -2832,15 +2834,14 @@ ShouldNotReachHere(); } -void TemplateTable::prepare_invoke(Register method, - Register index, - int byte_no, - Bytecodes::Code code) { +void TemplateTable::prepare_invoke(Register method, Register index, int byte_no) { // determine flags + Bytecodes::Code code = bytecode(); const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; const bool is_invokevirtual = code == Bytecodes::_invokevirtual; const bool is_invokespecial = code == Bytecodes::_invokespecial; - const bool load_receiver = code != Bytecodes::_invokestatic; + const bool load_receiver = (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic); const bool receiver_null_check = is_invokespecial; const bool save_flags = is_invokeinterface || is_invokevirtual; // setup registers & access constant pool cache @@ -2858,9 +2859,13 @@ __ movl(recv, flags); __ andl(recv, 0xFF); if (TaggedStackInterpreter) __ shll(recv, 1); // index*2 - __ movptr(recv, Address(rsp, recv, Address::times_8, - -Interpreter::expr_offset_in_bytes(1))); - __ verify_oop(recv); + Address recv_addr(rsp, recv, Address::times_8, -Interpreter::expr_offset_in_bytes(1)); + if (is_invokedynamic) { + __ lea(recv, recv_addr); + } else { + __ movptr(recv, recv_addr); + __ verify_oop(recv); + } } // do null check if needed @@ -2878,10 +2883,14 @@ ConstantPoolCacheEntry::verify_tosBits(); // load return address { - ExternalAddress return_5((address)Interpreter::return_5_addrs_by_index_table()); - ExternalAddress return_3((address)Interpreter::return_3_addrs_by_index_table()); - __ lea(rscratch1, (is_invokeinterface ? return_5 : return_3)); - __ movptr(flags, Address(rscratch1, flags, Address::times_8)); + address table_addr; + if (is_invokeinterface || is_invokedynamic) + table_addr = (address)Interpreter::return_5_addrs_by_index_table(); + else + table_addr = (address)Interpreter::return_3_addrs_by_index_table(); + ExternalAddress table(table_addr); + __ lea(rscratch1, table); + __ movptr(flags, Address(rscratch1, flags, Address::times_ptr)); } // push return address @@ -2947,7 +2956,7 @@ void TemplateTable::invokevirtual(int byte_no) { transition(vtos, vtos); - prepare_invoke(rbx, noreg, byte_no, bytecode()); + prepare_invoke(rbx, noreg, byte_no); // rbx: index // rcx: receiver @@ -2959,7 +2968,7 @@ void TemplateTable::invokespecial(int byte_no) { transition(vtos, vtos); - prepare_invoke(rbx, noreg, byte_no, bytecode()); + prepare_invoke(rbx, noreg, byte_no); // do the call __ verify_oop(rbx); __ profile_call(rax); @@ -2969,7 +2978,7 @@ void TemplateTable::invokestatic(int byte_no) { transition(vtos, vtos); - prepare_invoke(rbx, noreg, byte_no, bytecode()); + prepare_invoke(rbx, noreg, byte_no); // do the call __ verify_oop(rbx); __ profile_call(rax); @@ -2983,7 +2992,7 @@ void TemplateTable::invokeinterface(int byte_no) { transition(vtos, vtos); - prepare_invoke(rax, rbx, byte_no, bytecode()); + prepare_invoke(rax, rbx, byte_no); // rax: Interface // rbx: index @@ -3072,7 +3081,24 @@ return; } - __ stop("invokedynamic NYI");//6815692// + prepare_invoke(rax, rbx, byte_no); + + // rax: CallSite object (f1) + // rbx: unused (f2) + // rcx: receiver address + // rdx: flags (unused) + + if (ProfileInterpreter) { + Label L; + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(r13); + } + + __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx))); + __ null_check(rcx); + __ prepare_to_jump_from_interpreted(); + __ jump_to_method_handle_entry(rcx, rdx); } diff -r 920875ae1277 -r 9749fbc4859b src/cpu/x86/vm/templateTable_x86_64.hpp --- a/src/cpu/x86/vm/templateTable_x86_64.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/cpu/x86/vm/templateTable_x86_64.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -22,8 +22,7 @@ * */ - static void prepare_invoke(Register method, Register index, int byte_no, - Bytecodes::Code code); + static void prepare_invoke(Register method, Register index, int byte_no); static void invokevirtual_helper(Register index, Register recv, Register flags); static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/classfile/classFileParser.cpp --- a/src/share/vm/classfile/classFileParser.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/classfile/classFileParser.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -2511,23 +2511,12 @@ fac_ptr->nonstatic_byte_count -= 1; (*fields_ptr)->ushort_at_put(i + instanceKlass::signature_index_offset, word_sig_index); - if (wordSize == jintSize) { - fac_ptr->nonstatic_word_count += 1; - } else { - fac_ptr->nonstatic_double_count += 1; - } - - FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i+4); + fac_ptr->nonstatic_word_count += 1; + + FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i + instanceKlass::low_offset); assert(atype == NONSTATIC_BYTE, ""); FieldAllocationType new_atype = NONSTATIC_WORD; - if (wordSize > jintSize) { - if (Universe::field_type_should_be_aligned(T_LONG)) { - atype = NONSTATIC_ALIGNED_DOUBLE; - } else { - atype = NONSTATIC_DOUBLE; - } - } - (*fields_ptr)->ushort_at_put(i+4, new_atype); + (*fields_ptr)->ushort_at_put(i + instanceKlass::low_offset, new_atype); found_vmentry = true; break; @@ -3085,7 +3074,7 @@ int len = fields->length(); for (int i = 0; i < len; i += instanceKlass::next_offset) { int real_offset; - FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i+4); + FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i + instanceKlass::low_offset); switch (atype) { case STATIC_OOP: real_offset = next_static_oop_offset; @@ -3173,8 +3162,8 @@ default: ShouldNotReachHere(); } - fields->short_at_put(i+4, extract_low_short_from_int(real_offset) ); - fields->short_at_put(i+5, extract_high_short_from_int(real_offset) ); + fields->short_at_put(i + instanceKlass::low_offset, extract_low_short_from_int(real_offset)); + fields->short_at_put(i + instanceKlass::high_offset, extract_high_short_from_int(real_offset)); } // Size of instances diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/code/nmethod.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -414,9 +414,8 @@ } const char* nmethod::compile_kind() const { - if (method() == NULL) return "unloaded"; - if (is_native_method()) return "c2n"; if (is_osr_method()) return "osr"; + if (method() != NULL && is_native_method()) return "c2n"; return NULL; } @@ -1127,6 +1126,9 @@ } flags.state = unloaded; + // Log the unloading. + log_state_change(); + // The methodOop is gone at this point assert(_method == NULL, "Tautology"); @@ -1137,8 +1139,6 @@ void nmethod::invalidate_osr_method() { assert(_entry_bci != InvocationEntryBci, "wrong kind of nmethod"); - if (_entry_bci != InvalidOSREntryBci) - inc_decompile_count(); // Remove from list of active nmethods if (method() != NULL) instanceKlass::cast(method()->method_holder())->remove_osr_nmethod(this); @@ -1146,59 +1146,63 @@ _entry_bci = InvalidOSREntryBci; } -void nmethod::log_state_change(int state) const { +void nmethod::log_state_change() const { if (LogCompilation) { if (xtty != NULL) { ttyLocker ttyl; // keep the following output all in one block - xtty->begin_elem("make_not_entrant %sthread='" UINTX_FORMAT "'", - (state == zombie ? "zombie='1' " : ""), - os::current_thread_id()); + if (flags.state == unloaded) { + xtty->begin_elem("make_unloaded thread='" UINTX_FORMAT "'", + os::current_thread_id()); + } else { + xtty->begin_elem("make_not_entrant thread='" UINTX_FORMAT "'%s", + os::current_thread_id(), + (flags.state == zombie ? " zombie='1'" : "")); + } log_identity(xtty); xtty->stamp(); xtty->end_elem(); } } - if (PrintCompilation) { - print_on(tty, state == zombie ? "made zombie " : "made not entrant "); + if (PrintCompilation && flags.state != unloaded) { + print_on(tty, flags.state == zombie ? "made zombie " : "made not entrant "); tty->cr(); } } // Common functionality for both make_not_entrant and make_zombie -void nmethod::make_not_entrant_or_zombie(int state) { +bool nmethod::make_not_entrant_or_zombie(int state) { assert(state == zombie || state == not_entrant, "must be zombie or not_entrant"); - // Code for an on-stack-replacement nmethod is removed when a class gets unloaded. - // They never become zombie/non-entrant, so the nmethod sweeper will never remove - // them. Instead the entry_bci is set to InvalidOSREntryBci, so the osr nmethod - // will never be used anymore. That the nmethods only gets removed when class unloading - // happens, make life much simpler, since the nmethods are not just going to disappear - // out of the blue. - if (is_osr_method()) { - if (osr_entry_bci() != InvalidOSREntryBci) { - // only log this once - log_state_change(state); - } - invalidate_osr_method(); - return; + // If the method is already zombie there is nothing to do + if (is_zombie()) { + return false; } - // If the method is already zombie or set to the state we want, nothing to do - if (is_zombie() || (state == not_entrant && is_not_entrant())) { - return; - } - - log_state_change(state); - // Make sure the nmethod is not flushed in case of a safepoint in code below. nmethodLocker nml(this); { + // invalidate osr nmethod before acquiring the patching lock since + // they both acquire leaf locks and we don't want a deadlock. + // This logic is equivalent to the logic below for patching the + // verified entry point of regular methods. + if (is_osr_method()) { + // this effectively makes the osr nmethod not entrant + invalidate_osr_method(); + } + // Enter critical section. Does not block for safepoint. MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag); + + if (flags.state == state) { + // another thread already performed this transition so nothing + // to do, but return false to indicate this. + return false; + } + // The caller can be calling the method statically or through an inline // cache call. - if (!is_not_entrant()) { + if (!is_osr_method() && !is_not_entrant()) { NativeJump::patch_verified_entry(entry_point(), verified_entry_point(), SharedRuntime::get_handle_wrong_method_stub()); assert (NativeJump::instruction_size == nmethod::_zombie_instruction_size, ""); @@ -1217,6 +1221,10 @@ // Change state flags.state = state; + + // Log the transition once + log_state_change(); + } // leave critical region under Patching_lock if (state == not_entrant) { @@ -1240,7 +1248,6 @@ // It's a true state change, so mark the method as decompiled. inc_decompile_count(); - // zombie only - if a JVMTI agent has enabled the CompiledMethodUnload event // and it hasn't already been reported for this nmethod then report it now. // (the event may have been reported earilier if the GC marked it for unloading). @@ -1268,7 +1275,7 @@ // Check whether method got unloaded at a safepoint before this, // if so we can skip the flushing steps below - if (method() == NULL) return; + if (method() == NULL) return true; // Remove nmethod from method. // We need to check if both the _code and _from_compiled_code_entry_point @@ -1282,6 +1289,8 @@ HandleMark hm; method()->clear_code(); } + + return true; } diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/code/nmethod.hpp --- a/src/share/vm/code/nmethod.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/code/nmethod.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -252,7 +252,9 @@ void* operator new(size_t size, int nmethod_size); const char* reloc_string_for(u_char* begin, u_char* end); - void make_not_entrant_or_zombie(int state); + // Returns true if this thread changed the state of the nmethod or + // false if another thread performed the transition. + bool make_not_entrant_or_zombie(int state); void inc_decompile_count(); // used to check that writes to nmFlags are done consistently. @@ -375,10 +377,12 @@ bool is_zombie() const { return flags.state == zombie; } bool is_unloaded() const { return flags.state == unloaded; } - // Make the nmethod non entrant. The nmethod will continue to be alive. - // It is used when an uncommon trap happens. - void make_not_entrant() { make_not_entrant_or_zombie(not_entrant); } - void make_zombie() { make_not_entrant_or_zombie(zombie); } + // Make the nmethod non entrant. The nmethod will continue to be + // alive. It is used when an uncommon trap happens. Returns true + // if this thread changed the state of the nmethod or false if + // another thread performed the transition. + bool make_not_entrant() { return make_not_entrant_or_zombie(not_entrant); } + bool make_zombie() { return make_not_entrant_or_zombie(zombie); } // used by jvmti to track if the unload event has been reported bool unload_reported() { return _unload_reported; } @@ -563,7 +567,7 @@ // Logging void log_identity(xmlStream* log) const; void log_new_nmethod() const; - void log_state_change(int state) const; + void log_state_change() const; // Prints a comment for one native instruction (reloc info, pc desc) void print_code_comment_on(outputStream* st, int column, address begin, address end); diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -42,28 +42,49 @@ _n_periods(0), _threads(NULL), _n_threads(0) { - if (G1ConcRefine) { - _n_threads = (int)thread_num(); - if (_n_threads > 0) { - _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); - int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); - ConcurrentG1RefineThread *next = NULL; - for (int i = _n_threads - 1; i >= 0; i--) { - ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); - assert(t != NULL, "Conc refine should have been created"); - assert(t->cg1r() == this, "Conc refine thread should refer to this"); - _threads[i] = t; - next = t; - } - } + + // Ergomonically select initial concurrent refinement parameters + if (FLAG_IS_DEFAULT(G1ConcRefineGreenZone)) { + FLAG_SET_DEFAULT(G1ConcRefineGreenZone, MAX2(ParallelGCThreads, 1)); + } + set_green_zone(G1ConcRefineGreenZone); + + if (FLAG_IS_DEFAULT(G1ConcRefineYellowZone)) { + FLAG_SET_DEFAULT(G1ConcRefineYellowZone, green_zone() * 3); + } + set_yellow_zone(MAX2(G1ConcRefineYellowZone, green_zone())); + + if (FLAG_IS_DEFAULT(G1ConcRefineRedZone)) { + FLAG_SET_DEFAULT(G1ConcRefineRedZone, yellow_zone() * 2); + } + set_red_zone(MAX2(G1ConcRefineRedZone, yellow_zone())); + _n_worker_threads = thread_num(); + // We need one extra thread to do the young gen rset size sampling. + _n_threads = _n_worker_threads + 1; + reset_threshold_step(); + + _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); + int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); + ConcurrentG1RefineThread *next = NULL; + for (int i = _n_threads - 1; i >= 0; i--) { + ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); + assert(t != NULL, "Conc refine should have been created"); + assert(t->cg1r() == this, "Conc refine thread should refer to this"); + _threads[i] = t; + next = t; } } -size_t ConcurrentG1Refine::thread_num() { - if (G1ConcRefine) { - return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads; +void ConcurrentG1Refine::reset_threshold_step() { + if (FLAG_IS_DEFAULT(G1ConcRefineThresholdStep)) { + _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1); + } else { + _thread_threshold_step = G1ConcRefineThresholdStep; } - return 0; +} + +int ConcurrentG1Refine::thread_num() { + return MAX2((G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads, 1); } void ConcurrentG1Refine::init() { @@ -123,6 +144,15 @@ } } +void ConcurrentG1Refine::reinitialize_threads() { + reset_threshold_step(); + if (_threads != NULL) { + for (int i = 0; i < _n_threads; i++) { + _threads[i]->initialize(); + } + } +} + ConcurrentG1Refine::~ConcurrentG1Refine() { if (G1ConcRSLogCacheSize > 0) { assert(_card_counts != NULL, "Logic"); @@ -384,4 +414,3 @@ st->cr(); } } - diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -29,6 +29,31 @@ class ConcurrentG1Refine: public CHeapObj { ConcurrentG1RefineThread** _threads; int _n_threads; + int _n_worker_threads; + /* + * The value of the update buffer queue length falls into one of 3 zones: + * green, yellow, red. If the value is in [0, green) nothing is + * done, the buffers are left unprocessed to enable the caching effect of the + * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement + * threads are gradually activated. In [yellow, red) all threads are + * running. If the length becomes red (max queue length) the mutators start + * processing the buffers. + * + * There are some interesting cases (with G1AdaptiveConcRefine turned off): + * 1) green = yellow = red = 0. In this case the mutator will process all + * buffers. Except for those that are created by the deferred updates + * machinery during a collection. + * 2) green = 0. Means no caching. Can be a good way to minimize the + * amount of time spent updating rsets during a collection. + */ + int _green_zone; + int _yellow_zone; + int _red_zone; + + int _thread_threshold_step; + + // Reset the threshold step value based of the current zone boundaries. + void reset_threshold_step(); // The cache for card refinement. bool _use_cache; @@ -147,6 +172,8 @@ void init(); // Accomplish some initialization that has to wait. void stop(); + void reinitialize_threads(); + // Iterate over the conc refine threads void threads_do(ThreadClosure *tc); @@ -178,7 +205,20 @@ void clear_and_record_card_counts(); - static size_t thread_num(); + static int thread_num(); void print_worker_threads_on(outputStream* st) const; + + void set_green_zone(int x) { _green_zone = x; } + void set_yellow_zone(int x) { _yellow_zone = x; } + void set_red_zone(int x) { _red_zone = x; } + + int green_zone() const { return _green_zone; } + int yellow_zone() const { return _yellow_zone; } + int red_zone() const { return _red_zone; } + + int total_thread_num() const { return _n_threads; } + int worker_thread_num() const { return _n_worker_threads; } + + int thread_threshold_step() const { return _thread_threshold_step; } }; diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -25,10 +25,6 @@ #include "incls/_precompiled.incl" #include "incls/_concurrentG1RefineThread.cpp.incl" -// ======= Concurrent Mark Thread ======== - -// The CM thread is created when the G1 garbage collector is used - ConcurrentG1RefineThread:: ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next, int worker_id_offset, int worker_id) : @@ -37,19 +33,42 @@ _worker_id(worker_id), _active(false), _next(next), + _monitor(NULL), _cg1r(cg1r), - _vtime_accum(0.0), - _interval_ms(5.0) + _vtime_accum(0.0) { + + // Each thread has its own monitor. The i-th thread is responsible for signalling + // to thread i+1 if the number of buffers in the queue exceeds a threashold for this + // thread. Monitors are also used to wake up the threads during termination. + // The 0th worker in notified by mutator threads and has a special monitor. + // The last worker is used for young gen rset size sampling. + if (worker_id > 0) { + _monitor = new Monitor(Mutex::nonleaf, "Refinement monitor", true); + } else { + _monitor = DirtyCardQ_CBL_mon; + } + initialize(); create_and_start(); } +void ConcurrentG1RefineThread::initialize() { + if (_worker_id < cg1r()->worker_thread_num()) { + // Current thread activation threshold + _threshold = MIN2(cg1r()->thread_threshold_step() * (_worker_id + 1) + cg1r()->green_zone(), + cg1r()->yellow_zone()); + // A thread deactivates once the number of buffer reached a deactivation threshold + _deactivation_threshold = MAX2(_threshold - cg1r()->thread_threshold_step(), cg1r()->green_zone()); + } else { + set_active(true); + } +} + void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); G1CollectorPolicy* g1p = g1h->g1_policy(); if (g1p->adaptive_young_list_length()) { int regions_visited = 0; - g1h->young_list_rs_length_sampling_init(); while (g1h->young_list_rs_length_sampling_more()) { g1h->young_list_rs_length_sampling_next(); @@ -70,99 +89,121 @@ } } -void ConcurrentG1RefineThread::run() { - initialize_in_thread(); +void ConcurrentG1RefineThread::run_young_rs_sampling() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); _vtime_start = os::elapsedVTime(); - wait_for_universe_init(); + while(!_should_terminate) { + _sts.join(); + sample_young_list_rs_lengths(); + _sts.leave(); - while (!_should_terminate) { - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - // Wait for completed log buffers to exist. - { - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); - while (((_worker_id == 0 && !dcqs.process_completed_buffers()) || - (_worker_id > 0 && !is_active())) && - !_should_terminate) { - DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); - } - } - - if (_should_terminate) { - return; + if (os::supports_vtime()) { + _vtime_accum = (os::elapsedVTime() - _vtime_start); + } else { + _vtime_accum = 0.0; } - // Now we take them off (this doesn't hold locks while it applies - // closures.) (If we did a full collection, then we'll do a full - // traversal. - _sts.join(); - int n_logs = 0; - int lower_limit = 0; - double start_vtime_sec; // only used when G1SmoothConcRefine is on - int prev_buffer_num; // only used when G1SmoothConcRefine is on - // This thread activation threshold - int threshold = G1UpdateBufferQueueProcessingThreshold * _worker_id; - // Next thread activation threshold - int next_threshold = threshold + G1UpdateBufferQueueProcessingThreshold; - int deactivation_threshold = MAX2(threshold - G1UpdateBufferQueueProcessingThreshold / 2, 0); + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + if (_should_terminate) { + break; + } + _monitor->wait(Mutex::_no_safepoint_check_flag, G1ConcRefineServiceInterval); + } +} - if (G1SmoothConcRefine) { - lower_limit = 0; - start_vtime_sec = os::elapsedVTime(); - prev_buffer_num = (int) dcqs.completed_buffers_num(); - } else { - lower_limit = G1UpdateBufferQueueProcessingThreshold / 4; // For now. +void ConcurrentG1RefineThread::wait_for_completed_buffers() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + while (!_should_terminate && !is_active()) { + _monitor->wait(Mutex::_no_safepoint_check_flag); + } +} + +bool ConcurrentG1RefineThread::is_active() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + return _worker_id > 0 ? _active : dcqs.process_completed_buffers(); +} + +void ConcurrentG1RefineThread::activate() { + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + if (_worker_id > 0) { + if (G1TraceConcurrentRefinement) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + gclog_or_tty->print_cr("G1-Refine-activated worker %d, on threshold %d, current %d", + _worker_id, _threshold, (int)dcqs.completed_buffers_num()); } - while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) { - double end_vtime_sec; - double elapsed_vtime_sec; - int elapsed_vtime_ms; - int curr_buffer_num = (int) dcqs.completed_buffers_num(); + set_active(true); + } else { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.set_process_completed(true); + } + _monitor->notify(); +} - if (G1SmoothConcRefine) { - end_vtime_sec = os::elapsedVTime(); - elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; - elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); +void ConcurrentG1RefineThread::deactivate() { + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + if (_worker_id > 0) { + if (G1TraceConcurrentRefinement) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + gclog_or_tty->print_cr("G1-Refine-deactivated worker %d, off threshold %d, current %d", + _worker_id, _deactivation_threshold, (int)dcqs.completed_buffers_num()); + } + set_active(false); + } else { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.set_process_completed(false); + } +} + +void ConcurrentG1RefineThread::run() { + initialize_in_thread(); + wait_for_universe_init(); - if (curr_buffer_num > prev_buffer_num || - curr_buffer_num > next_threshold) { - decreaseInterval(elapsed_vtime_ms); - } else if (curr_buffer_num < prev_buffer_num) { - increaseInterval(elapsed_vtime_ms); - } + if (_worker_id >= cg1r()->worker_thread_num()) { + run_young_rs_sampling(); + terminate(); + } + + _vtime_start = os::elapsedVTime(); + while (!_should_terminate) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + + // Wait for work + wait_for_completed_buffers(); + + if (_should_terminate) { + break; + } + + _sts.join(); + + do { + int curr_buffer_num = (int)dcqs.completed_buffers_num(); + // If the number of the buffers falls down into the yellow zone, + // that means that the transition period after the evacuation pause has ended. + if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) { + dcqs.set_completed_queue_padding(0); } - if (_worker_id == 0) { - sample_young_list_rs_lengths(); - } else if (curr_buffer_num < deactivation_threshold) { + + if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) { // If the number of the buffer has fallen below our threshold // we should deactivate. The predecessor will reactivate this // thread should the number of the buffers cross the threshold again. - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); deactivate(); - if (G1TraceConcurrentRefinement) { - gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id); - } break; } // Check if we need to activate the next thread. - if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) { - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) { _next->activate(); - DirtyCardQ_CBL_mon->notify_all(); - if (G1TraceConcurrentRefinement) { - gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id); - } } + } while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, cg1r()->green_zone())); - if (G1SmoothConcRefine) { - prev_buffer_num = curr_buffer_num; - _sts.leave(); - os::sleep(Thread::current(), (jlong) _interval_ms, false); - _sts.join(); - start_vtime_sec = os::elapsedVTime(); - } - n_logs++; + // We can exit the loop above while being active if there was a yield request. + if (is_active()) { + deactivate(); } + _sts.leave(); if (os::supports_vtime()) { @@ -172,7 +213,6 @@ } } assert(_should_terminate, "just checking"); - terminate(); } @@ -191,8 +231,8 @@ } { - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); - DirtyCardQ_CBL_mon->notify_all(); + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + _monitor->notify(); } { diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -40,42 +40,36 @@ // when the number of the rset update buffer crosses a certain threshold. A successor // would self-deactivate when the number of the buffers falls below the threshold. bool _active; - ConcurrentG1RefineThread * _next; - public: - virtual void run(); - - bool is_active() { return _active; } - void activate() { _active = true; } - void deactivate() { _active = false; } - - private: - ConcurrentG1Refine* _cg1r; - - double _interval_ms; + ConcurrentG1RefineThread* _next; + Monitor* _monitor; + ConcurrentG1Refine* _cg1r; - void decreaseInterval(int processing_time_ms) { - double min_interval_ms = (double) processing_time_ms; - _interval_ms = 0.8 * _interval_ms; - if (_interval_ms < min_interval_ms) - _interval_ms = min_interval_ms; - } - void increaseInterval(int processing_time_ms) { - double max_interval_ms = 9.0 * (double) processing_time_ms; - _interval_ms = 1.1 * _interval_ms; - if (max_interval_ms > 0 && _interval_ms > max_interval_ms) - _interval_ms = max_interval_ms; - } + int _thread_threshold_step; + // This thread activation threshold + int _threshold; + // This thread deactivation threshold + int _deactivation_threshold; - void sleepBeforeNextCycle(); + void sample_young_list_rs_lengths(); + void run_young_rs_sampling(); + void wait_for_completed_buffers(); + + void set_active(bool x) { _active = x; } + bool is_active(); + void activate(); + void deactivate(); // For use by G1CollectedHeap, which is a friend. static SuspendibleThreadSet* sts() { return &_sts; } - public: +public: + virtual void run(); // Constructor ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next, int worker_id_offset, int worker_id); + void initialize(); + // Printing void print() const; void print_on(outputStream* st) const; @@ -83,13 +77,10 @@ // Total virtual time so far. double vtime_accum() { return _vtime_accum; } - ConcurrentG1Refine* cg1r() { return _cg1r; } - - void sample_young_list_rs_lengths(); + ConcurrentG1Refine* cg1r() { return _cg1r; } // Yield for GC - void yield(); - + void yield(); // shutdown void stop(); }; diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -760,7 +760,6 @@ rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); - satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); satb_mq_set.set_active_all_threads(true); // update_g1_committed() will be called at the end of an evac pause diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -61,8 +61,8 @@ #pragma warning( disable:4355 ) // 'this' : used in base member initializer list #endif // _MSC_VER -DirtyCardQueueSet::DirtyCardQueueSet() : - PtrQueueSet(true /*notify_when_complete*/), +DirtyCardQueueSet::DirtyCardQueueSet(bool notify_when_complete) : + PtrQueueSet(notify_when_complete), _closure(NULL), _shared_dirty_card_queue(this, true /*perm*/), _free_ids(NULL), @@ -77,12 +77,12 @@ } void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, + int process_completed_threshold, int max_completed_queue, Mutex* lock, PtrQueueSet* fl_owner) { - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner); + PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, + max_completed_queue, fl_owner); set_buffer_size(G1UpdateBufferSize); - set_process_completed_threshold(G1UpdateBufferQueueProcessingThreshold); - _shared_dirty_card_queue.set_lock(lock); _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon); } @@ -154,9 +154,10 @@ return b; } -DirtyCardQueueSet::CompletedBufferNode* + +BufferNode* DirtyCardQueueSet::get_completed_buffer(int stop_at) { - CompletedBufferNode* nd = NULL; + BufferNode* nd = NULL; MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); if ((int)_n_completed_buffers <= stop_at) { @@ -166,10 +167,11 @@ if (_completed_buffers_head != NULL) { nd = _completed_buffers_head; - _completed_buffers_head = nd->next; + _completed_buffers_head = nd->next(); if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL; _n_completed_buffers--; + assert(_n_completed_buffers >= 0, "Invariant"); } debug_only(assert_completed_buffer_list_len_correct_locked()); return nd; @@ -177,20 +179,19 @@ bool DirtyCardQueueSet:: apply_closure_to_completed_buffer_helper(int worker_i, - CompletedBufferNode* nd) { + BufferNode* nd) { if (nd != NULL) { + void **buf = BufferNode::make_buffer_from_node(nd); + size_t index = nd->index(); bool b = - DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, - nd->index, _sz, + DirtyCardQueue::apply_closure_to_buffer(_closure, buf, + index, _sz, true, worker_i); - void** buf = nd->buf; - size_t index = nd->index; - delete nd; if (b) { deallocate_buffer(buf); return true; // In normal case, go on to next buffer. } else { - enqueue_complete_buffer(buf, index, true); + enqueue_complete_buffer(buf, index); return false; } } else { @@ -203,32 +204,33 @@ bool during_pause) { assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause"); - CompletedBufferNode* nd = get_completed_buffer(stop_at); + BufferNode* nd = get_completed_buffer(stop_at); bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); if (res) Atomic::inc(&_processed_buffers_rs_thread); return res; } void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() { - CompletedBufferNode* nd = _completed_buffers_head; + BufferNode* nd = _completed_buffers_head; while (nd != NULL) { bool b = - DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz, - false); + DirtyCardQueue::apply_closure_to_buffer(_closure, + BufferNode::make_buffer_from_node(nd), + 0, _sz, false); guarantee(b, "Should not stop early."); - nd = nd->next; + nd = nd->next(); } } void DirtyCardQueueSet::abandon_logs() { assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); - CompletedBufferNode* buffers_to_delete = NULL; + BufferNode* buffers_to_delete = NULL; { MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); while (_completed_buffers_head != NULL) { - CompletedBufferNode* nd = _completed_buffers_head; - _completed_buffers_head = nd->next; - nd->next = buffers_to_delete; + BufferNode* nd = _completed_buffers_head; + _completed_buffers_head = nd->next(); + nd->set_next(buffers_to_delete); buffers_to_delete = nd; } _n_completed_buffers = 0; @@ -236,10 +238,9 @@ debug_only(assert_completed_buffer_list_len_correct_locked()); } while (buffers_to_delete != NULL) { - CompletedBufferNode* nd = buffers_to_delete; - buffers_to_delete = nd->next; - deallocate_buffer(nd->buf); - delete nd; + BufferNode* nd = buffers_to_delete; + buffers_to_delete = nd->next(); + deallocate_buffer(BufferNode::make_buffer_from_node(nd)); } // Since abandon is done only at safepoints, we can safely manipulate // these queues. diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -84,11 +84,12 @@ jint _processed_buffers_rs_thread; public: - DirtyCardQueueSet(); + DirtyCardQueueSet(bool notify_when_complete = true); void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0, - Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL); + int process_completed_threshold, + int max_completed_queue, + Mutex* lock, PtrQueueSet* fl_owner = NULL); // The number of parallel ids that can be claimed to allow collector or // mutator threads to do card-processing work. @@ -123,9 +124,9 @@ bool during_pause = false); bool apply_closure_to_completed_buffer_helper(int worker_i, - CompletedBufferNode* nd); + BufferNode* nd); - CompletedBufferNode* get_completed_buffer(int stop_at); + BufferNode* get_completed_buffer(int stop_at); // Applies the current closure to all completed buffers, // non-consumptively. diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -1375,6 +1375,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : SharedHeap(policy_), _g1_policy(policy_), + _dirty_card_queue_set(false), _ref_processor(NULL), _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)), _bot_shared(NULL), @@ -1460,8 +1461,6 @@ Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); - // We allocate this in any case, but only do no work if the command line - // param is off. _cg1r = new ConcurrentG1Refine(); // Reserve the maximum. @@ -1594,18 +1593,20 @@ JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon, SATB_Q_FL_lock, - 0, + G1SATBProcessCompletedThreshold, Shared_SATB_Q_lock); JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, - G1UpdateBufferQueueMaxLength, + concurrent_g1_refine()->yellow_zone(), + concurrent_g1_refine()->red_zone(), Shared_DirtyCardQ_lock); if (G1DeferredRSUpdate) { dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, - 0, + -1, // never trigger processing + -1, // no limit on length Shared_DirtyCardQ_lock, &JavaThread::dirty_card_queue_set()); } @@ -4239,10 +4240,11 @@ RedirtyLoggedCardTableEntryFastClosure redirty; dirty_card_queue_set().set_closure(&redirty); dirty_card_queue_set().apply_closure_to_all_completed_buffers(); - JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set()); + + DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set(); + dcq.merge_bufferlists(&dirty_card_queue_set()); assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); } - COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -1914,6 +1914,10 @@ calculate_young_list_min_length(); calculate_young_list_target_config(); + // Note that _mmu_tracker->max_gc_time() returns the time in seconds. + double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSUpdatePauseFractionPercent / 100.0; + adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms); + // _target_pause_time_ms = -1.0; @@ -1921,6 +1925,47 @@ // +void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time, + double update_rs_processed_buffers, + double goal_ms) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + ConcurrentG1Refine *cg1r = G1CollectedHeap::heap()->concurrent_g1_refine(); + + if (G1AdaptiveConcRefine) { + const int k_gy = 3, k_gr = 6; + const double inc_k = 1.1, dec_k = 0.9; + + int g = cg1r->green_zone(); + if (update_rs_time > goal_ms) { + g = (int)(g * dec_k); // Can become 0, that's OK. That would mean a mutator-only processing. + } else { + if (update_rs_time < goal_ms && update_rs_processed_buffers > g) { + g = (int)MAX2(g * inc_k, g + 1.0); + } + } + // Change the refinement threads params + cg1r->set_green_zone(g); + cg1r->set_yellow_zone(g * k_gy); + cg1r->set_red_zone(g * k_gr); + cg1r->reinitialize_threads(); + + int processing_threshold_delta = MAX2((int)(cg1r->green_zone() * sigma()), 1); + int processing_threshold = MIN2(cg1r->green_zone() + processing_threshold_delta, + cg1r->yellow_zone()); + // Change the barrier params + dcqs.set_process_completed_threshold(processing_threshold); + dcqs.set_max_completed_queue(cg1r->red_zone()); + } + + int curr_queue_size = dcqs.completed_buffers_num(); + if (curr_queue_size >= cg1r->yellow_zone()) { + dcqs.set_completed_queue_padding(curr_queue_size); + } else { + dcqs.set_completed_queue_padding(0); + } + dcqs.notify_if_necessary(); +} + double G1CollectorPolicy:: predict_young_collection_elapsed_time_ms(size_t adjustment) { diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -316,6 +316,10 @@ bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group); #endif // PRODUCT + void adjust_concurrent_refinement(double update_rs_time, + double update_rs_processed_buffers, + double goal_ms); + protected: double _pause_time_target_ms; double _recorded_young_cset_choice_time_ms; diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -85,7 +85,7 @@ diagnostic(bool, G1SummarizeZFStats, false, \ "Summarize zero-filling info") \ \ - develop(bool, G1TraceConcurrentRefinement, false, \ + diagnostic(bool, G1TraceConcurrentRefinement, false, \ "Trace G1 concurrent refinement") \ \ product(intx, G1MarkStackSize, 2 * 1024 * 1024, \ @@ -94,19 +94,6 @@ product(intx, G1MarkRegionStackSize, 1024 * 1024, \ "Size of the region stack for concurrent marking.") \ \ - develop(bool, G1ConcRefine, true, \ - "If true, run concurrent rem set refinement for G1") \ - \ - develop(intx, G1ConcRefineTargTraversals, 4, \ - "Number of concurrent refinement we try to achieve") \ - \ - develop(intx, G1ConcRefineInitialDelta, 4, \ - "Number of heap regions of alloc ahead of starting collection " \ - "pause to start concurrent refinement (initially)") \ - \ - develop(bool, G1SmoothConcRefine, true, \ - "Attempts to smooth out the overhead of concurrent refinement") \ - \ develop(bool, G1ConcZeroFill, true, \ "If true, run concurrent zero-filling thread") \ \ @@ -178,13 +165,38 @@ product(intx, G1UpdateBufferSize, 256, \ "Size of an update buffer") \ \ - product(intx, G1UpdateBufferQueueProcessingThreshold, 5, \ + product(intx, G1ConcRefineYellowZone, 0, \ "Number of enqueued update buffers that will " \ - "trigger concurrent processing") \ + "trigger concurrent processing. Will be selected ergonomically " \ + "by default.") \ + \ + product(intx, G1ConcRefineRedZone, 0, \ + "Maximum number of enqueued update buffers before mutator " \ + "threads start processing new ones instead of enqueueing them. " \ + "Will be selected ergonomically by default. Zero will disable " \ + "concurrent processing.") \ + \ + product(intx, G1ConcRefineGreenZone, 0, \ + "The number of update buffers that are left in the queue by the " \ + "concurrent processing threads. Will be selected ergonomically " \ + "by default.") \ \ - product(intx, G1UpdateBufferQueueMaxLength, 30, \ - "Maximum number of enqueued update buffers before mutator " \ - "threads start processing new ones instead of enqueueing them") \ + product(intx, G1ConcRefineServiceInterval, 300, \ + "The last concurrent refinement thread wakes up every " \ + "specified number of milliseconds to do miscellaneous work.") \ + \ + product(intx, G1ConcRefineThresholdStep, 0, \ + "Each time the rset update queue increases by this amount " \ + "activate the next refinement thread if available. " \ + "Will be selected ergonomically by default.") \ + \ + product(intx, G1RSUpdatePauseFractionPercent, 10, \ + "A target percentage of time that is allowed to be spend on " \ + "process RS update buffers during the collection pause.") \ + \ + product(bool, G1AdaptiveConcRefine, true, \ + "Select green, yellow and red zones adaptively to meet the " \ + "the pause requirements.") \ \ develop(intx, G1ConcRSLogCacheSize, 10, \ "Log base 2 of the length of conc RS hot-card cache.") \ diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/ptrQueue.cpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -64,8 +64,8 @@ while (_index == 0) { handle_zero_index(); } + assert(_index > 0, "postcondition"); - _index -= oopSize; _buf[byte_index_to_index((int)_index)] = ptr; assert(0 <= _index && _index <= _sz, "Invariant."); @@ -99,95 +99,110 @@ assert(_sz > 0, "Didn't set a buffer size."); MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); if (_fl_owner->_buf_free_list != NULL) { - void** res = _fl_owner->_buf_free_list; - _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0]; + void** res = BufferNode::make_buffer_from_node(_fl_owner->_buf_free_list); + _fl_owner->_buf_free_list = _fl_owner->_buf_free_list->next(); _fl_owner->_buf_free_list_sz--; - // Just override the next pointer with NULL, just in case we scan this part - // of the buffer. - res[0] = NULL; return res; } else { - return (void**) NEW_C_HEAP_ARRAY(char, _sz); + // Allocate space for the BufferNode in front of the buffer. + char *b = NEW_C_HEAP_ARRAY(char, _sz + BufferNode::aligned_size()); + return BufferNode::make_buffer_from_block(b); } } void PtrQueueSet::deallocate_buffer(void** buf) { assert(_sz > 0, "Didn't set a buffer size."); MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); - buf[0] = (void*)_fl_owner->_buf_free_list; - _fl_owner->_buf_free_list = buf; + BufferNode *node = BufferNode::make_node_from_buffer(buf); + node->set_next(_fl_owner->_buf_free_list); + _fl_owner->_buf_free_list = node; _fl_owner->_buf_free_list_sz++; } void PtrQueueSet::reduce_free_list() { + assert(_fl_owner == this, "Free list reduction is allowed only for the owner"); // For now we'll adopt the strategy of deleting half. MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); size_t n = _buf_free_list_sz / 2; while (n > 0) { assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong."); - void** head = _buf_free_list; - _buf_free_list = (void**)_buf_free_list[0]; - FREE_C_HEAP_ARRAY(char, head); + void* b = BufferNode::make_block_from_node(_buf_free_list); + _buf_free_list = _buf_free_list->next(); + FREE_C_HEAP_ARRAY(char, b); _buf_free_list_sz --; n--; } } -void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) { - // I use explicit locking here because there's a bailout in the middle. - _cbl_mon->lock_without_safepoint_check(); - - Thread* thread = Thread::current(); - assert( ignore_max_completed || - thread->is_Java_thread() || - SafepointSynchronize::is_at_safepoint(), - "invariant" ); - ignore_max_completed = ignore_max_completed || !thread->is_Java_thread(); +void PtrQueue::handle_zero_index() { + assert(0 == _index, "Precondition."); + // This thread records the full buffer and allocates a new one (while + // holding the lock if there is one). + if (_buf != NULL) { + if (_lock) { + locking_enqueue_completed_buffer(_buf); + } else { + if (qset()->process_or_enqueue_complete_buffer(_buf)) { + // Recycle the buffer. No allocation. + _sz = qset()->buffer_size(); + _index = _sz; + return; + } + } + } + // Reallocate the buffer + _buf = qset()->allocate_buffer(); + _sz = qset()->buffer_size(); + _index = _sz; + assert(0 <= _index && _index <= _sz, "Invariant."); +} - if (!ignore_max_completed && _max_completed_queue > 0 && - _n_completed_buffers >= (size_t) _max_completed_queue) { - _cbl_mon->unlock(); - bool b = mut_process_buffer(buf); - if (b) { - deallocate_buffer(buf); - return; +bool PtrQueueSet::process_or_enqueue_complete_buffer(void** buf) { + if (Thread::current()->is_Java_thread()) { + // We don't lock. It is fine to be epsilon-precise here. + if (_max_completed_queue == 0 || _max_completed_queue > 0 && + _n_completed_buffers >= _max_completed_queue + _completed_queue_padding) { + bool b = mut_process_buffer(buf); + if (b) { + // True here means that the buffer hasn't been deallocated and the caller may reuse it. + return true; + } } + } + // The buffer will be enqueued. The caller will have to get a new one. + enqueue_complete_buffer(buf); + return false; +} - // Otherwise, go ahead and enqueue the buffer. Must reaquire the lock. - _cbl_mon->lock_without_safepoint_check(); - } - - // Here we still hold the _cbl_mon. - CompletedBufferNode* cbn = new CompletedBufferNode; - cbn->buf = buf; - cbn->next = NULL; - cbn->index = index; +void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index) { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + BufferNode* cbn = BufferNode::new_from_buffer(buf); + cbn->set_index(index); if (_completed_buffers_tail == NULL) { assert(_completed_buffers_head == NULL, "Well-formedness"); _completed_buffers_head = cbn; _completed_buffers_tail = cbn; } else { - _completed_buffers_tail->next = cbn; + _completed_buffers_tail->set_next(cbn); _completed_buffers_tail = cbn; } _n_completed_buffers++; - if (!_process_completed && + if (!_process_completed && _process_completed_threshold >= 0 && _n_completed_buffers >= _process_completed_threshold) { _process_completed = true; if (_notify_when_complete) - _cbl_mon->notify_all(); + _cbl_mon->notify(); } debug_only(assert_completed_buffer_list_len_correct_locked()); - _cbl_mon->unlock(); } int PtrQueueSet::completed_buffers_list_length() { int n = 0; - CompletedBufferNode* cbn = _completed_buffers_head; + BufferNode* cbn = _completed_buffers_head; while (cbn != NULL) { n++; - cbn = cbn->next; + cbn = cbn->next(); } return n; } @@ -198,7 +213,7 @@ } void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() { - guarantee((size_t)completed_buffers_list_length() == _n_completed_buffers, + guarantee(completed_buffers_list_length() == _n_completed_buffers, "Completed buffer length is wrong."); } @@ -207,12 +222,8 @@ _sz = sz * oopSize; } -void PtrQueueSet::set_process_completed_threshold(size_t sz) { - _process_completed_threshold = sz; -} - -// Merge lists of buffers. Notify waiting threads if the length of the list -// exceeds threshold. The source queue is emptied as a result. The queues +// Merge lists of buffers. Notify the processing threads. +// The source queue is emptied as a result. The queues // must share the monitor. void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); @@ -224,7 +235,7 @@ } else { assert(_completed_buffers_head != NULL, "Well formedness"); if (src->_completed_buffers_head != NULL) { - _completed_buffers_tail->next = src->_completed_buffers_head; + _completed_buffers_tail->set_next(src->_completed_buffers_head); _completed_buffers_tail = src->_completed_buffers_tail; } } @@ -237,31 +248,13 @@ assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || _completed_buffers_head != NULL && _completed_buffers_tail != NULL, "Sanity"); - - if (!_process_completed && - _n_completed_buffers >= _process_completed_threshold) { - _process_completed = true; - if (_notify_when_complete) - _cbl_mon->notify_all(); - } } -// Merge free lists of the two queues. The free list of the source -// queue is emptied as a result. The queues must share the same -// mutex that guards free lists. -void PtrQueueSet::merge_freelists(PtrQueueSet* src) { - assert(_fl_lock == src->_fl_lock, "Should share the same lock"); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - if (_buf_free_list != NULL) { - void **p = _buf_free_list; - while (*p != NULL) { - p = (void**)*p; - } - *p = src->_buf_free_list; - } else { - _buf_free_list = src->_buf_free_list; +void PtrQueueSet::notify_if_necessary() { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_n_completed_buffers >= _process_completed_threshold || _max_completed_queue == 0) { + _process_completed = true; + if (_notify_when_complete) + _cbl_mon->notify(); } - _buf_free_list_sz += src->_buf_free_list_sz; - src->_buf_free_list = NULL; - src->_buf_free_list_sz = 0; } diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/ptrQueue.hpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -27,8 +27,10 @@ // the addresses of modified old-generation objects. This type supports // this operation. +// The definition of placement operator new(size_t, void*) in the . +#include + class PtrQueueSet; - class PtrQueue VALUE_OBJ_CLASS_SPEC { protected: @@ -77,7 +79,7 @@ else enqueue_known_active(ptr); } - inline void handle_zero_index(); + void handle_zero_index(); void locking_enqueue_completed_buffer(void** buf); void enqueue_known_active(void* ptr); @@ -126,34 +128,65 @@ }; +class BufferNode { + size_t _index; + BufferNode* _next; +public: + BufferNode() : _index(0), _next(NULL) { } + BufferNode* next() const { return _next; } + void set_next(BufferNode* n) { _next = n; } + size_t index() const { return _index; } + void set_index(size_t i) { _index = i; } + + // Align the size of the structure to the size of the pointer + static size_t aligned_size() { + static const size_t alignment = round_to(sizeof(BufferNode), sizeof(void*)); + return alignment; + } + + // BufferNode is allocated before the buffer. + // The chunk of memory that holds both of them is a block. + + // Produce a new BufferNode given a buffer. + static BufferNode* new_from_buffer(void** buf) { + return new (make_block_from_buffer(buf)) BufferNode; + } + + // The following are the required conversion routines: + static BufferNode* make_node_from_buffer(void** buf) { + return (BufferNode*)make_block_from_buffer(buf); + } + static void** make_buffer_from_node(BufferNode *node) { + return make_buffer_from_block(node); + } + static void* make_block_from_node(BufferNode *node) { + return (void*)node; + } + static void** make_buffer_from_block(void* p) { + return (void**)((char*)p + aligned_size()); + } + static void* make_block_from_buffer(void** p) { + return (void*)((char*)p - aligned_size()); + } +}; + // A PtrQueueSet represents resources common to a set of pointer queues. // In particular, the individual queues allocate buffers from this shared // set, and return completed buffers to the set. // All these variables are are protected by the TLOQ_CBL_mon. XXX ??? class PtrQueueSet VALUE_OBJ_CLASS_SPEC { - protected: - - class CompletedBufferNode: public CHeapObj { - public: - void** buf; - size_t index; - CompletedBufferNode* next; - CompletedBufferNode() : buf(NULL), - index(0), next(NULL){ } - }; - Monitor* _cbl_mon; // Protects the fields below. - CompletedBufferNode* _completed_buffers_head; - CompletedBufferNode* _completed_buffers_tail; - size_t _n_completed_buffers; - size_t _process_completed_threshold; + BufferNode* _completed_buffers_head; + BufferNode* _completed_buffers_tail; + int _n_completed_buffers; + int _process_completed_threshold; volatile bool _process_completed; // This (and the interpretation of the first element as a "next" // pointer) are protected by the TLOQ_FL_lock. Mutex* _fl_lock; - void** _buf_free_list; + BufferNode* _buf_free_list; size_t _buf_free_list_sz; // Queue set can share a freelist. The _fl_owner variable // specifies the owner. It is set to "this" by default. @@ -170,6 +203,7 @@ // Maximum number of elements allowed on completed queue: after that, // enqueuer does the work itself. Zero indicates no maximum. int _max_completed_queue; + int _completed_queue_padding; int completed_buffers_list_length(); void assert_completed_buffer_list_len_correct_locked(); @@ -191,9 +225,12 @@ // Because of init-order concerns, we can't pass these as constructor // arguments. void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0, + int process_completed_threshold, + int max_completed_queue, PtrQueueSet *fl_owner = NULL) { _max_completed_queue = max_completed_queue; + _process_completed_threshold = process_completed_threshold; + _completed_queue_padding = 0; assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); _cbl_mon = cbl_mon; _fl_lock = fl_lock; @@ -208,14 +245,17 @@ void deallocate_buffer(void** buf); // Declares that "buf" is a complete buffer. - void enqueue_complete_buffer(void** buf, size_t index = 0, - bool ignore_max_completed = false); + void enqueue_complete_buffer(void** buf, size_t index = 0); + + // To be invoked by the mutator. + bool process_or_enqueue_complete_buffer(void** buf); bool completed_buffers_exist_dirty() { return _n_completed_buffers > 0; } bool process_completed_buffers() { return _process_completed; } + void set_process_completed(bool x) { _process_completed = x; } bool active() { return _all_active; } @@ -226,15 +266,24 @@ // Get the buffer size. size_t buffer_size() { return _sz; } - // Set the number of completed buffers that triggers log processing. - void set_process_completed_threshold(size_t sz); + // Get/Set the number of completed buffers that triggers log processing. + void set_process_completed_threshold(int sz) { _process_completed_threshold = sz; } + int process_completed_threshold() const { return _process_completed_threshold; } // Must only be called at a safe point. Indicates that the buffer free // list size may be reduced, if that is deemed desirable. void reduce_free_list(); - size_t completed_buffers_num() { return _n_completed_buffers; } + int completed_buffers_num() { return _n_completed_buffers; } void merge_bufferlists(PtrQueueSet* src); - void merge_freelists(PtrQueueSet* src); + + void set_max_completed_queue(int m) { _max_completed_queue = m; } + int max_completed_queue() { return _max_completed_queue; } + + void set_completed_queue_padding(int padding) { _completed_queue_padding = padding; } + int completed_queue_padding() { return _completed_queue_padding; } + + // Notify the consumer if the number of buffers crossed the threshold + void notify_if_necessary(); }; diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/satbQueue.cpp --- a/src/share/vm/gc_implementation/g1/satbQueue.cpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp Wed Dec 23 02:57:31 2009 -0800 @@ -67,9 +67,9 @@ {} void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue, + int process_completed_threshold, Mutex* lock) { - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, -1); _shared_satb_queue.set_lock(lock); if (ParallelGCThreads > 0) { _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads); @@ -122,12 +122,12 @@ bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, int worker) { - CompletedBufferNode* nd = NULL; + BufferNode* nd = NULL; { MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); if (_completed_buffers_head != NULL) { nd = _completed_buffers_head; - _completed_buffers_head = nd->next; + _completed_buffers_head = nd->next(); if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL; _n_completed_buffers--; if (_n_completed_buffers == 0) _process_completed = false; @@ -135,9 +135,9 @@ } ObjectClosure* cl = (par ? _par_closures[worker] : _closure); if (nd != NULL) { - ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz); - deallocate_buffer(nd->buf); - delete nd; + void **buf = BufferNode::make_buffer_from_node(nd); + ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz); + deallocate_buffer(buf); return true; } else { return false; @@ -145,13 +145,13 @@ } void SATBMarkQueueSet::abandon_partial_marking() { - CompletedBufferNode* buffers_to_delete = NULL; + BufferNode* buffers_to_delete = NULL; { MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); while (_completed_buffers_head != NULL) { - CompletedBufferNode* nd = _completed_buffers_head; - _completed_buffers_head = nd->next; - nd->next = buffers_to_delete; + BufferNode* nd = _completed_buffers_head; + _completed_buffers_head = nd->next(); + nd->set_next(buffers_to_delete); buffers_to_delete = nd; } _completed_buffers_tail = NULL; @@ -159,10 +159,9 @@ DEBUG_ONLY(assert_completed_buffer_list_len_correct_locked()); } while (buffers_to_delete != NULL) { - CompletedBufferNode* nd = buffers_to_delete; - buffers_to_delete = nd->next; - deallocate_buffer(nd->buf); - delete nd; + BufferNode* nd = buffers_to_delete; + buffers_to_delete = nd->next(); + deallocate_buffer(BufferNode::make_buffer_from_node(nd)); } assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); // So we can safely manipulate these queues. diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/g1/satbQueue.hpp --- a/src/share/vm/gc_implementation/g1/satbQueue.hpp Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp Wed Dec 23 02:57:31 2009 -0800 @@ -60,8 +60,8 @@ SATBMarkQueueSet(); void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0, - Mutex* lock = NULL); + int process_completed_threshold, + Mutex* lock); static void handle_zero_index_for_thread(JavaThread* t); diff -r 920875ae1277 -r 9749fbc4859b src/share/vm/gc_implementation/includeDB_gc_g1 --- a/src/share/vm/gc_implementation/includeDB_gc_g1 Tue Dec 22 16:35:08 2009 -0800 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 Wed Dec 23 02:57:31 2009 -0800 @@ -109,7 +109,6 @@ dirtyCardQueue.cpp dirtyCardQueue.hpp dirtyCardQueue.cpp heapRegionRemSet.hpp dirtyCardQueue.cpp mutexLocker.hpp -dirtyCardQueue.cpp ptrQueue.inline.hpp dirtyCardQueue.cpp safepoint.hpp dirtyCardQueue.cpp thread.hpp dirtyCardQueue.cpp thread_.inline.hpp @@ -319,7 +318,6 @@ ptrQueue.cpp mutex.hpp ptrQueue.cpp mutexLocker.hpp ptrQueue.cpp ptrQueue.hpp -ptrQueue.cpp ptrQueue.inline.hpp ptrQueue.cpp thread_.inline.hpp ptrQueue.hpp allocation.hpp @@ -329,7 +327,6 @@ satbQueue.cpp allocation.inline.hpp satbQueue.cpp mutexLocker.hpp -satbQueue.cpp ptrQueue.inline.hpp satbQueue.cpp satbQueue.hpp satbQueue.cpp sharedHeap.hpp satbQueue.cpp thread.hpp