# HG changeset patch # User roland # Date 1389707507 -3600 # Node ID add2caa66e7e25305d6a7d8d2f782d3a319a594c # Parent d7773b29c65ad0ca04fb8aba654a0d7a8ebdce25 8026253: New type profiling points: sparc support Summary: c1 and interpreter support for new type profiling on sparc Reviewed-by: kvn, twisti diff -r d7773b29c65a -r add2caa66e7e src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Jan 14 12:44:12 2014 +0100 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Jan 14 14:51:47 2014 +0100 @@ -1315,7 +1315,7 @@ } Address LIR_Assembler::as_Address(LIR_Address* addr) { - Register reg = addr->base()->as_register(); + Register reg = addr->base()->as_pointer_register(); LIR_Opr index = addr->index(); if (index->is_illegal()) { return Address(reg, addr->disp()); @@ -3101,7 +3101,145 @@ } void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { - fatal("Type profiling not implemented on this platform"); + Register obj = op->obj()->as_register(); + Register tmp1 = op->tmp()->as_pointer_register(); + Register tmp2 = G1; + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + + __ verify_oop(obj); + + if (tmp1 != obj) { + __ mov(obj, tmp1); + } + if (do_null) { + __ br_notnull_short(tmp1, Assembler::pt, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ld_ptr(mdo_addr, tmp1); + __ or3(tmp1, TypeEntries::null_seen, tmp1); + __ st_ptr(tmp1, mdo_addr); + } + if (do_update) { + __ ba(next); + __ delayed()->nop(); + } +#ifdef ASSERT + } else { + __ br_notnull_short(tmp1, Assembler::pt, update); + __ stop("unexpect null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + Label ok; + __ load_klass(tmp1, tmp1); + metadata2reg(exact_klass->constant_encoding(), tmp2); + __ cmp_and_br_short(tmp1, tmp2, Assembler::equal, Assembler::pt, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); + } +#endif + + Label do_update; + __ ld_ptr(mdo_addr, tmp2); + + if (!no_conflict) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + metadata2reg(exact_klass->constant_encoding(), tmp1); + } else { + __ load_klass(tmp1, tmp1); + } + + __ xor3(tmp1, tmp2, tmp1); + __ btst(TypeEntries::type_klass_mask, tmp1); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ brx(Assembler::zero, false, Assembler::pt, next); + __ delayed()-> + + btst(TypeEntries::type_unknown, tmp1); + // already unknown. Nothing to do anymore. + __ brx(Assembler::notZero, false, Assembler::pt, next); + + if (TypeEntries::is_type_none(current_klass)) { + __ delayed()->btst(TypeEntries::type_mask, tmp2); + __ brx(Assembler::zero, true, Assembler::pt, do_update); + // first time here. Set profile type. + __ delayed()->or3(tmp2, tmp1, tmp2); + } else { + __ delayed()->nop(); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ btst(TypeEntries::type_unknown, tmp2); + // already unknown. Nothing to do anymore. + __ brx(Assembler::notZero, false, Assembler::pt, next); + __ delayed()->nop(); + } + + // different than before. Cannot keep accurate profile. + __ or3(tmp2, TypeEntries::type_unknown, tmp2); + } else { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + metadata2reg(exact_klass->constant_encoding(), tmp1); + __ xor3(tmp1, tmp2, tmp1); + __ btst(TypeEntries::type_klass_mask, tmp1); + __ brx(Assembler::zero, false, Assembler::pt, next); +#ifdef ASSERT + + { + Label ok; + __ delayed()->btst(TypeEntries::type_mask, tmp2); + __ brx(Assembler::zero, true, Assembler::pt, ok); + __ delayed()->nop(); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } + // first time here. Set profile type. + __ or3(tmp2, tmp1, tmp2); +#else + // first time here. Set profile type. + __ delayed()->or3(tmp2, tmp1, tmp2); +#endif + + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + // already unknown. Nothing to do anymore. + __ btst(TypeEntries::type_unknown, tmp2); + __ brx(Assembler::notZero, false, Assembler::pt, next); + __ delayed()->or3(tmp2, TypeEntries::type_unknown, tmp2); + } + } + + __ bind(do_update); + __ st_ptr(tmp2, mdo_addr); + + __ bind(next); + } } void LIR_Assembler::align_backward_branch_target() { @@ -3321,9 +3459,14 @@ void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) { LIR_Address* addr = addr_opr->as_address_ptr(); - assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1 && Assembler::is_simm13(addr->disp()), "can't handle complex addresses yet"); - - __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register()); + assert(addr->index()->is_illegal() && addr->scale() == LIR_Address::times_1, "can't handle complex addresses yet"); + + if (Assembler::is_simm13(addr->disp())) { + __ add(addr->base()->as_pointer_register(), addr->disp(), dest->as_pointer_register()); + } else { + __ set(addr->disp(), G3_scratch); + __ add(addr->base()->as_pointer_register(), G3_scratch, dest->as_pointer_register()); + } } diff -r d7773b29c65a -r add2caa66e7e src/cpu/sparc/vm/interp_masm_sparc.cpp --- a/src/cpu/sparc/vm/interp_masm_sparc.cpp Tue Jan 14 12:44:12 2014 +0100 +++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp Tue Jan 14 14:51:47 2014 +0100 @@ -1892,6 +1892,220 @@ } } +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { + Label not_null, do_nothing, do_update; + + assert_different_registers(obj, mdo_addr.base(), tmp); + + verify_oop(obj); + + ld_ptr(mdo_addr, tmp); + + br_notnull_short(obj, pt, not_null); + or3(tmp, TypeEntries::null_seen, tmp); + ba_short(do_update); + + bind(not_null); + load_klass(obj, obj); + + xor3(obj, tmp, obj); + btst(TypeEntries::type_klass_mask, obj); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + brx(zero, false, pt, do_nothing); + delayed()-> + + btst(TypeEntries::type_unknown, obj); + // already unknown. Nothing to do anymore. + brx(notZero, false, pt, do_nothing); + delayed()-> + + btst(TypeEntries::type_mask, tmp); + brx(zero, true, pt, do_update); + // first time here. Set profile type. + delayed()->or3(tmp, obj, tmp); + + // different than before. Cannot keep accurate profile. + or3(tmp, TypeEntries::type_unknown, tmp); + + bind(do_update); + // update profile + st_ptr(tmp, mdo_addr); + + bind(do_nothing); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + assert_different_registers(callee, tmp1, tmp2, ImethodDataPtr); + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + ldub(ImethodDataPtr, in_bytes(DataLayout::tag_offset()) - off_to_start, tmp1); + cmp_and_br_short(tmp1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag, notEqual, pn, profile_continue); + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + add(ImethodDataPtr, off_to_args, ImethodDataPtr); + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1); + sub(tmp1, i*TypeStackSlotEntries::per_arg_count(), tmp1); + cmp_and_br_short(tmp1, TypeStackSlotEntries::per_arg_count(), less, pn, done); + } + ld_ptr(Address(callee, Method::const_offset()), tmp1); + lduh(Address(tmp1, ConstMethod::size_of_parameters_offset()), tmp1); + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list. But there's an extra slot at + // the stop of the stack. So the offset is n - o from Lesp. + ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args, tmp2); + sub(tmp1, tmp2, tmp1); + + // Can't use MacroAssembler::argument_address() which needs Gargs to be set up + sll(tmp1, Interpreter::logStackElementSize, tmp1); + ld_ptr(Lesp, tmp1, tmp1); + + Address mdo_arg_addr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp1, mdo_arg_addr, tmp2); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + add(ImethodDataPtr, to_add, ImethodDataPtr); + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ld_ptr(ImethodDataPtr, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, tmp1); + sub(tmp1, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(), tmp1); + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp1 is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + sll(tmp1, exact_log2(DataLayout::cell_size), tmp1); + add(ImethodDataPtr, tmp1, ImethodDataPtr); + } + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(in_bytes(ReturnTypeEntry::size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one. + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register ret, Register tmp1, Register tmp2) { + assert_different_registers(ret, tmp1, tmp2); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length. + Label do_profile; + ldub(Lbcp, 0, tmp1); + cmp_and_br_short(tmp1, Bytecodes::_invokedynamic, equal, pn, do_profile); + cmp(tmp1, Bytecodes::_invokehandle); + br(equal, false, pn, do_profile); + delayed()->ldub(Lmethod, Method::intrinsic_id_offset_in_bytes(), tmp1); + cmp_and_br_short(tmp1, vmIntrinsics::_compiledLambdaForm, notEqual, pt, profile_continue); + + bind(do_profile); + } + + Address mdo_ret_addr(ImethodDataPtr, -in_bytes(ReturnTypeEntry::size())); + mov(ret, tmp1); + profile_obj_type(tmp1, mdo_ret_addr, tmp2); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4) { + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters. + lduw(ImethodDataPtr, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()), tmp1); + cmp_and_br_short(tmp1, 0, less, pn, profile_continue); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + + // Pointer to the parameter area in the MDO + Register mdp = tmp1; + add(ImethodDataPtr, tmp1, mdp); + + // offset of the current profile entry to update + Register entry_offset = tmp2; + // entry_offset = array len in number of cells + ld_ptr(mdp, ArrayData::array_len_offset(), entry_offset); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + assert(off_base % DataLayout::cell_size == 0, "should be a number of cells"); + + // entry_offset (number of cells) = array len - size of 1 entry + offset of the stack slot field + sub(entry_offset, TypeStackSlotEntries::per_arg_count() - (off_base / DataLayout::cell_size), entry_offset); + // entry_offset in bytes + sll(entry_offset, exact_log2(DataLayout::cell_size), entry_offset); + + Label loop; + bind(loop); + + // load offset on the stack from the slot for this parameter + ld_ptr(mdp, entry_offset, tmp3); + sll(tmp3,Interpreter::logStackElementSize, tmp3); + neg(tmp3); + // read the parameter from the local area + ld_ptr(Llocals, tmp3, tmp3); + + // make entry_offset now point to the type field for this parameter + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + assert(type_base > off_base, "unexpected"); + add(entry_offset, type_base - off_base, entry_offset); + + // profile the parameter + Address arg_type(mdp, entry_offset); + profile_obj_type(tmp3, arg_type, tmp4); + + // go to next parameter + sub(entry_offset, TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size + (type_base - off_base), entry_offset); + cmp_and_br_short(entry_offset, off_base, greaterEqual, pt, loop); + + bind(profile_continue); + } +} + // add a InterpMonitorElem to stack (see frame_sparc.hpp) void InterpreterMacroAssembler::add_monitor_to_stack( bool stack_is_empty, diff -r d7773b29c65a -r add2caa66e7e src/cpu/sparc/vm/interp_masm_sparc.hpp --- a/src/cpu/sparc/vm/interp_masm_sparc.hpp Tue Jan 14 12:44:12 2014 +0100 +++ b/src/cpu/sparc/vm/interp_masm_sparc.hpp Tue Jan 14 14:51:47 2014 +0100 @@ -323,6 +323,11 @@ Register scratch2, Register scratch3); + void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); + void profile_arguments_type(Register callee, Register tmp1, Register tmp2, bool is_virtual); + void profile_return_type(Register ret, Register tmp1, Register tmp2); + void profile_parameters_type(Register tmp1, Register tmp2, Register tmp3, Register tmp4); + // Debugging void interp_verify_oop(Register reg, TosState state, const char * file, int line); // only if +VerifyOops && state == atos void verify_oop_or_return_address(Register reg, Register rtmp); // for astore diff -r d7773b29c65a -r add2caa66e7e src/cpu/sparc/vm/templateInterpreter_sparc.cpp --- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Tue Jan 14 12:44:12 2014 +0100 +++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp Tue Jan 14 14:51:47 2014 +0100 @@ -156,6 +156,10 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { address entry = __ pc(); + if (state == atos) { + __ profile_return_type(O0, G3_scratch, G1_scratch); + } + #if !defined(_LP64) && defined(COMPILER2) // All return values are where we want them, except for Longs. C2 returns // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. @@ -1333,6 +1337,7 @@ __ movbool(true, G3_scratch); __ stbool(G3_scratch, do_not_unlock_if_synchronized); + __ profile_parameters_type(G1_scratch, G3_scratch, G4_scratch, Lscratch); // increment invocation counter and check for overflow // // Note: checking for negative value instead of overflow diff -r d7773b29c65a -r add2caa66e7e src/cpu/sparc/vm/templateTable_sparc.cpp --- a/src/cpu/sparc/vm/templateTable_sparc.cpp Tue Jan 14 12:44:12 2014 +0100 +++ b/src/cpu/sparc/vm/templateTable_sparc.cpp Tue Jan 14 14:51:47 2014 +0100 @@ -2942,12 +2942,12 @@ void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) { - Register Rtemp = G4_scratch; Register Rcall = Rindex; assert_different_registers(Rcall, G5_method, Gargs, Rret); // get target Method* & entry point __ lookup_virtual_method(Rrecv, Rindex, G5_method); + __ profile_arguments_type(G5_method, Rcall, Gargs, true); __ call_from_interpreter(Rcall, Gargs, Rret); } @@ -3022,6 +3022,7 @@ __ null_check(O0); __ profile_final_call(O4); + __ profile_arguments_type(G5_method, Rscratch, Gargs, true); // get return address AddressLiteral table(Interpreter::invoke_return_entry_table()); @@ -3051,6 +3052,7 @@ // do the call __ profile_call(O4); + __ profile_arguments_type(G5_method, Rscratch, Gargs, false); __ call_from_interpreter(Rscratch, Gargs, Rret); } @@ -3066,6 +3068,7 @@ // do the call __ profile_call(O4); + __ profile_arguments_type(G5_method, Rscratch, Gargs, false); __ call_from_interpreter(Rscratch, Gargs, Rret); } @@ -3091,6 +3094,7 @@ // do the call - the index (f2) contains the Method* assert_different_registers(G5_method, Gargs, Rcall); __ mov(Rindex, G5_method); + __ profile_arguments_type(G5_method, Rcall, Gargs, true); __ call_from_interpreter(Rcall, Gargs, Rret); __ bind(notFinal); @@ -3197,6 +3201,7 @@ Register Rcall = Rinterface; assert_different_registers(Rcall, G5_method, Gargs, Rret); + __ profile_arguments_type(G5_method, Rcall, Gargs, true); __ call_from_interpreter(Rcall, Gargs, Rret); } @@ -3226,6 +3231,7 @@ // do the call __ verify_oop(G4_mtype); __ profile_final_call(O4); // FIXME: profile the LambdaForm also + __ profile_arguments_type(G5_method, Rscratch, Gargs, true); __ call_from_interpreter(Rscratch, Gargs, Rret); } @@ -3262,6 +3268,7 @@ // do the call __ verify_oop(G4_callsite); + __ profile_arguments_type(G5_method, Rscratch, Gargs, false); __ call_from_interpreter(Rscratch, Gargs, Rret); } diff -r d7773b29c65a -r add2caa66e7e src/cpu/x86/vm/interp_masm_x86.cpp --- a/src/cpu/x86/vm/interp_masm_x86.cpp Tue Jan 14 12:44:12 2014 +0100 +++ b/src/cpu/x86/vm/interp_masm_x86.cpp Tue Jan 14 14:51:47 2014 +0100 @@ -127,7 +127,7 @@ if (MethodData::profile_return()) { // We're right after the type profile for the last - // argument. tmp is the number of cell left in the + // argument. tmp is the number of cells left in the // CallTypeData/VirtualCallTypeData to reach its end. Non null // if there's a return to profile. assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); @@ -198,7 +198,7 @@ // parameters. Collect profiling from last parameter down. // mdo start + parameters offset + array length - 1 addptr(mdp, tmp1); - movptr(tmp1, Address(mdp, in_bytes(ArrayData::array_len_offset()))); + movptr(tmp1, Address(mdp, ArrayData::array_len_offset())); decrement(tmp1, TypeStackSlotEntries::per_arg_count()); Label loop; diff -r d7773b29c65a -r add2caa66e7e src/share/vm/c1/c1_LIRGenerator.cpp --- a/src/share/vm/c1/c1_LIRGenerator.cpp Tue Jan 14 12:44:12 2014 +0100 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Tue Jan 14 14:51:47 2014 +0100 @@ -3288,7 +3288,10 @@ ciSignature* signature_at_call = NULL; x->method()->get_method_at_bci(bci, ignored_will_link, &signature_at_call); - ciKlass* exact = profile_type(md, 0, md->byte_offset_of_slot(data, ret->type_offset()), + // The offset within the MDO of the entry to update may be too large + // to be used in load/store instructions on some platforms. So have + // profile_type() compute the address of the profile in a register. + ciKlass* exact = profile_type(md, md->byte_offset_of_slot(data, ret->type_offset()), 0, ret->type(), x->ret(), mdp, !x->needs_null_check(), signature_at_call->return_type()->as_klass(), diff -r d7773b29c65a -r add2caa66e7e src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Tue Jan 14 12:44:12 2014 +0100 +++ b/src/share/vm/runtime/arguments.cpp Tue Jan 14 14:51:47 2014 +0100 @@ -3727,10 +3727,6 @@ // Doing the replace in parent maps helps speculation FLAG_SET_DEFAULT(ReplaceInParentMaps, true); } -#ifndef X86 - // Only on x86 for now - FLAG_SET_DEFAULT(TypeProfileLevel, 0); -#endif #endif if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {