# HG changeset patch # User jrose # Date 1236994762 25200 # Node ID c517646eef23c4974f5fd8a50e70210a04472736 # Parent c771b7f43bbf7b3c131a3a90f15a758ab6b8bf03 6813212: factor duplicated assembly code for general subclass check (for 6655638) Summary: Code in interp_masm, stubGenerator, c1_LIRAssembler, and AD files moved into MacroAssembler. Reviewed-by: kvn diff -r c771b7f43bbf -r c517646eef23 src/cpu/sparc/vm/assembler_sparc.cpp --- a/src/cpu/sparc/vm/assembler_sparc.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -2767,6 +2767,268 @@ } +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label& L_success) { + Label L_failure, L_pop_to_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, + temp_reg, temp2_reg, + &L_success, &L_failure, NULL); + Register sub_2 = sub_klass; + Register sup_2 = super_klass; + if (!sub_2->is_global()) sub_2 = L0; + if (!sup_2->is_global()) sup_2 = L1; + + save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2); + check_klass_subtype_slow_path(sub_2, sup_2, + L2, L3, L4, L5, + NULL, &L_pop_to_failure); + + // on success: + restore(); + ba(false, L_success); + delayed()->nop(); + + // on failure: + bind(L_pop_to_failure); + restore(); + bind(L_failure); +} + + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterConstant super_check_offset, + Register instanceof_hack) { + int sc_offset = (klassOopDesc::header_size() * HeapWordSize + + Klass::secondary_super_cache_offset_in_bytes()); + int sco_offset = (klassOopDesc::header_size() * HeapWordSize + + Klass::super_check_offset_offset_in_bytes()); + + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + bool need_slow_path = (must_load_sco || + super_check_offset.constant_or_zero() == sco_offset); + + assert_different_registers(sub_klass, super_klass, temp_reg); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp2_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1 || instanceof_hack != noreg || + (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), + "at most one NULL in the batch, usually"); + + // Support for the instanceof hack, which uses delay slots to + // set a destination register to zero or one. + bool do_bool_sets = (instanceof_hack != noreg); +#define BOOL_SET(bool_value) \ + if (do_bool_sets && bool_value >= 0) \ + set(bool_value, instanceof_hack) +#define DELAYED_BOOL_SET(bool_value) \ + if (do_bool_sets && bool_value >= 0) \ + delayed()->set(bool_value, instanceof_hack); \ + else delayed()->nop() + // Hacked ba(), which may only be used just before L_fallthrough. +#define FINAL_JUMP(label, bool_value) \ + if (&(label) == &L_fallthrough) { \ + BOOL_SET(bool_value); \ + } else { \ + ba((do_bool_sets && bool_value >= 0), label); \ + DELAYED_BOOL_SET(bool_value); \ + } + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmp(super_klass, sub_klass); + brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); + DELAYED_BOOL_SET(1); + + // Check the supertype display: + if (must_load_sco) { + // The super check offset is always positive... + lduw(super_klass, sco_offset, temp2_reg); + super_check_offset = RegisterConstant(temp2_reg); + } + ld_ptr(sub_klass, super_check_offset, temp_reg); + cmp(super_klass, temp_reg); + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success); + delayed(); if (do_bool_sets) BOOL_SET(1); + // if !do_bool_sets, sneak the next cmp into the delay slot: + cmp(super_check_offset.as_register(), sc_offset); + + if (L_failure == &L_fallthrough) { + brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path); + delayed()->nop(); + BOOL_SET(0); // fallthrough on failure + } else { + brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); + DELAYED_BOOL_SET(0); + FINAL_JUMP(*L_slow_path, -1); // -1 => vanilla delay slot + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); + DELAYED_BOOL_SET(1); + } else { + brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path); + delayed()->nop(); + FINAL_JUMP(*L_success, 1); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success); + DELAYED_BOOL_SET(1); + BOOL_SET(0); + } else { + brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure); + DELAYED_BOOL_SET(0); + FINAL_JUMP(*L_success, 1); + } + } + + bind(L_fallthrough); + +#undef final_jump +#undef bool_set +#undef DELAYED_BOOL_SET +#undef final_jump +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register count_temp, + Register scan_temp, + Register scratch_reg, + Register coop_reg, + Label* L_success, + Label* L_failure) { + assert_different_registers(sub_klass, super_klass, + count_temp, scan_temp, scratch_reg, coop_reg); + + Label L_fallthrough, L_loop; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = (klassOopDesc::header_size() * HeapWordSize + + Klass::secondary_supers_offset_in_bytes()); + int sc_offset = (klassOopDesc::header_size() * HeapWordSize + + Klass::secondary_super_cache_offset_in_bytes()); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + inc_counter((address) pst_counter, count_temp, scan_temp); +#endif + + // We will consult the secondary-super array. + ld_ptr(sub_klass, ss_offset, scan_temp); + + // Compress superclass if necessary. + Register search_key = super_klass; + bool decode_super_klass = false; + if (UseCompressedOops) { + if (coop_reg != noreg) { + encode_heap_oop_not_null(super_klass, coop_reg); + search_key = coop_reg; + } else { + encode_heap_oop_not_null(super_klass); + decode_super_klass = true; // scarce temps! + } + // The superclass is never null; it would be a basic system error if a null + // pointer were to sneak in here. Note that we have already loaded the + // Klass::super_check_offset from the super_klass in the fast path, + // so if there is a null in that register, we are already in the afterlife. + } + + // Load the array length. (Positive movl does right thing on LP64.) + lduw(scan_temp, arrayOopDesc::length_offset_in_bytes(), count_temp); + + // Check for empty secondary super list + tst(count_temp); + + // Top of search loop + bind(L_loop); + br(Assembler::equal, false, Assembler::pn, *L_failure); + delayed()->add(scan_temp, heapOopSize, scan_temp); + assert(heapOopSize != 0, "heapOopSize should be initialized"); + + // Skip the array header in all array accesses. + int elem_offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT); + elem_offset -= heapOopSize; // the scan pointer was pre-incremented also + + // Load next super to check + if (UseCompressedOops) { + // Don't use load_heap_oop; we don't want to decode the element. + lduw( scan_temp, elem_offset, scratch_reg ); + } else { + ld_ptr( scan_temp, elem_offset, scratch_reg ); + } + + // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list + cmp(scratch_reg, search_key); + + // A miss means we are NOT a subtype and need to keep looping + brx(Assembler::notEqual, false, Assembler::pn, L_loop); + delayed()->deccc(count_temp); // decrement trip counter in delay slot + + // Falling out the bottom means we found a hit; we ARE a subtype + if (decode_super_klass) decode_heap_oop(super_klass); + + // Success. Cache the super we found and proceed in triumph. + st_ptr(super_klass, sub_klass, sc_offset); + + if (L_success != &L_fallthrough) { + ba(false, *L_success); + delayed()->nop(); + } + + bind(L_fallthrough); +} + + + + void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg, Label& done, Label* slow_case, diff -r c771b7f43bbf -r c517646eef23 src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Fri Mar 13 18:39:22 2009 -0700 @@ -2327,6 +2327,46 @@ Register temp_reg, Register temp2_reg, Label& no_such_interface); + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg and temp2_reg. + // If super_check_offset is not -1, temp2_reg is not used and can be noreg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterConstant super_check_offset = RegisterConstant(-1), + Register instanceof_hack = noreg); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg can be noreg, if no temps are available. + // It can also be sub_klass or super_klass, meaning it's OK to kill that one. + // Updates the sub's secondary super cache as necessary. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register temp4_reg, + Label* L_success, + Label* L_failure); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label& L_success); + + // Stack overflow checking // Note: this clobbers G3_scratch diff -r c771b7f43bbf -r c517646eef23 src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -2393,23 +2393,11 @@ // get instance klass load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL); - // get super_check_offset - load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL); - // See if we get an immediate positive hit - __ ld_ptr(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr->as_register()); - __ cmp(k_RInfo, O7); - __ br(Assembler::equal, false, Assembler::pn, done); - __ delayed()->nop(); - // check for immediate negative hit - __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); - __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry()); - __ delayed()->nop(); - // check for self - __ cmp(klass_RInfo, k_RInfo); - __ br(Assembler::equal, false, Assembler::pn, done); - __ delayed()->nop(); - - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done, stub->entry(), NULL); + + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup"); __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); __ delayed()->nop(); __ cmp(G3, 0); @@ -2493,58 +2481,30 @@ __ delayed()->nop(); __ bind(done); } else { + bool need_slow_path = true; if (k->is_loaded()) { - load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL); - - if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) { - // See if we get an immediate positive hit - __ cmp(Rtmp1, k_RInfo ); - __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry()); - __ delayed()->nop(); - } else { - // See if we get an immediate positive hit - assert_different_registers(Rtmp1, k_RInfo, klass_RInfo); - __ cmp(Rtmp1, k_RInfo ); - __ br(Assembler::equal, false, Assembler::pn, done); - // check for self - __ delayed()->cmp(klass_RInfo, k_RInfo); - __ br(Assembler::equal, false, Assembler::pn, done); - __ delayed()->nop(); - - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); - __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); - __ delayed()->nop(); - __ cmp(G3, 0); - __ br(Assembler::equal, false, Assembler::pn, *stub->entry()); - __ delayed()->nop(); - } - __ bind(done); + if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()) + need_slow_path = false; + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg, + (need_slow_path ? &done : NULL), + stub->entry(), NULL, + RegisterConstant(k->super_check_offset())); } else { - assert_different_registers(Rtmp1, klass_RInfo, k_RInfo); - - load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL); - // See if we get an immediate positive hit - load(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr, T_OBJECT); - __ cmp(k_RInfo, O7); - __ br(Assembler::equal, false, Assembler::pn, done); - __ delayed()->nop(); - // check for immediate negative hit - __ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); - __ br(Assembler::notEqual, false, Assembler::pn, *stub->entry()); - // check for self - __ delayed()->cmp(klass_RInfo, k_RInfo); - __ br(Assembler::equal, false, Assembler::pn, done); - __ delayed()->nop(); - - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, + &done, stub->entry(), NULL); + } + if (need_slow_path) { + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup"); __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); __ delayed()->nop(); __ cmp(G3, 0); __ br(Assembler::equal, false, Assembler::pn, *stub->entry()); __ delayed()->nop(); - __ bind(done); } - + __ bind(done); } __ mov(obj, dst); } else if (code == lir_instanceof) { @@ -2582,58 +2542,32 @@ __ set(0, dst); __ bind(done); } else { + bool need_slow_path = true; if (k->is_loaded()) { - assert_different_registers(Rtmp1, klass_RInfo, k_RInfo); - load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL); - - if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) { - // See if we get an immediate positive hit - __ cmp(Rtmp1, k_RInfo ); - __ br(Assembler::equal, true, Assembler::pt, done); - __ delayed()->set(1, dst); - __ set(0, dst); - __ bind(done); - } else { - // See if we get an immediate positive hit - assert_different_registers(Rtmp1, k_RInfo, klass_RInfo); - __ cmp(Rtmp1, k_RInfo ); - __ br(Assembler::equal, true, Assembler::pt, done); - __ delayed()->set(1, dst); - // check for self - __ cmp(klass_RInfo, k_RInfo); - __ br(Assembler::equal, true, Assembler::pt, done); - __ delayed()->set(1, dst); - - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); - __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); - __ delayed()->nop(); - __ mov(G3, dst); - __ bind(done); - } + if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()) + need_slow_path = false; + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg, + (need_slow_path ? &done : NULL), + (need_slow_path ? &done : NULL), NULL, + RegisterConstant(k->super_check_offset()), + dst); } else { assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers"); - - load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), dst, T_INT, NULL); - // See if we get an immediate positive hit - load(klass_RInfo, dst, FrameMap::O7_oop_opr, T_OBJECT); - __ cmp(k_RInfo, O7); - __ br(Assembler::equal, true, Assembler::pt, done); - __ delayed()->set(1, dst); - // check for immediate negative hit - __ cmp(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); - __ br(Assembler::notEqual, true, Assembler::pt, done); - __ delayed()->set(0, dst); - // check for self - __ cmp(klass_RInfo, k_RInfo); - __ br(Assembler::equal, true, Assembler::pt, done); - __ delayed()->set(1, dst); - - // assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst, + &done, &done, NULL, + RegisterConstant(-1), + dst); + } + if (need_slow_path) { + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup"); __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); __ delayed()->nop(); __ mov(G3, dst); - __ bind(done); } + __ bind(done); } } else { ShouldNotReachHere(); diff -r c771b7f43bbf -r c517646eef23 src/cpu/sparc/vm/c1_Runtime1_sparc.cpp --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -714,38 +714,19 @@ // sub : G3, argument, destroyed // super: G1, argument, not changed // raddr: O7, blown by call - Label loop, miss; + Label miss; __ save_frame(0); // Blow no registers! - __ ld_ptr( G3, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 ); - __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0); // length in l0 - __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1); // ptr into array - __ clr(L4); // Index - // Load a little early; will load 1 off the end of the array. - // Ok for now; revisit if we have other uses of this routine. - __ ld_ptr(L1,0,L2); // Will load a little early - - // The scan loop - __ bind(loop); - __ add(L1,wordSize,L1); // Bump by OOP size - __ cmp(L4,L0); - __ br(Assembler::equal,false,Assembler::pn,miss); - __ delayed()->inc(L4); // Bump index - __ subcc(L2,G1,L3); // Check for match; zero in L3 for a hit - __ brx( Assembler::notEqual, false, Assembler::pt, loop ); - __ delayed()->ld_ptr(L1,0,L2); // Will load a little early - - // Got a hit; report success; set cache - __ st_ptr( G1, G3, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); + __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss); __ mov(1, G3); - __ ret(); // Result in G5 is ok; flags set + __ ret(); // Result in G5 is 'true' __ delayed()->restore(); // free copy or add can go here __ bind(miss); __ mov(0, G3); - __ ret(); // Result in G5 is ok; flags set + __ ret(); // Result in G5 is 'false' __ delayed()->restore(); // free copy or add can go here } diff -r c771b7f43bbf -r c517646eef23 src/cpu/sparc/vm/interp_masm_sparc.cpp --- a/src/cpu/sparc/vm/interp_masm_sparc.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -866,65 +866,18 @@ Register Rtmp2, Register Rtmp3, Label &ok_is_subtype ) { - Label not_subtype, loop; + Label not_subtype; // Profile the not-null value's klass. profile_typecheck(Rsub_klass, Rtmp1); - // Load the super-klass's check offset into Rtmp1 - ld( Rsuper_klass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1 ); - // Load from the sub-klass's super-class display list, or a 1-word cache of - // the secondary superclass list, or a failing value with a sentinel offset - // if the super-klass is an interface or exceptionally deep in the Java - // hierarchy and we have to scan the secondary superclass list the hard way. - ld_ptr( Rsub_klass, Rtmp1, Rtmp2 ); - // See if we get an immediate positive hit - cmp( Rtmp2, Rsuper_klass ); - brx( Assembler::equal, false, Assembler::pt, ok_is_subtype ); - // In the delay slot, check for immediate negative hit - delayed()->cmp( Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); - br( Assembler::notEqual, false, Assembler::pt, not_subtype ); - // In the delay slot, check for self - delayed()->cmp( Rsub_klass, Rsuper_klass ); - brx( Assembler::equal, false, Assembler::pt, ok_is_subtype ); - - // Now do a linear scan of the secondary super-klass chain. - delayed()->ld_ptr( Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), Rtmp2 ); - - // compress superclass - if (UseCompressedOops) encode_heap_oop(Rsuper_klass); - - // Rtmp2 holds the objArrayOop of secondary supers. - ld( Rtmp2, arrayOopDesc::length_offset_in_bytes(), Rtmp1 );// Load the array length - // Check for empty secondary super list - tst(Rtmp1); - - // Top of search loop - bind( loop ); - br( Assembler::equal, false, Assembler::pn, not_subtype ); - delayed()->nop(); - - // load next super to check - if (UseCompressedOops) { - lduw( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3); - // Bump array pointer forward one oop - add( Rtmp2, 4, Rtmp2 ); - } else { - ld_ptr( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3); - // Bump array pointer forward one oop - add( Rtmp2, wordSize, Rtmp2); - } - // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list - cmp( Rtmp3, Rsuper_klass ); - // A miss means we are NOT a subtype and need to keep looping - brx( Assembler::notEqual, false, Assembler::pt, loop ); - delayed()->deccc( Rtmp1 ); // dec trip counter in delay slot - // Falling out the bottom means we found a hit; we ARE a subtype - if (UseCompressedOops) decode_heap_oop(Rsuper_klass); - br( Assembler::always, false, Assembler::pt, ok_is_subtype ); - // Update the cache - delayed()->st_ptr( Rsuper_klass, Rsub_klass, - sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); + check_klass_subtype_fast_path(Rsub_klass, Rsuper_klass, + Rtmp1, Rtmp2, + &ok_is_subtype, ¬_subtype, NULL); + + check_klass_subtype_slow_path(Rsub_klass, Rsuper_klass, + Rtmp1, Rtmp2, Rtmp3, /*hack:*/ noreg, + &ok_is_subtype, NULL); bind(not_subtype); profile_typecheck_failed(Rtmp1); diff -r c771b7f43bbf -r c517646eef23 src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -900,19 +900,7 @@ __ align(CodeEntryAlignment); StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); address start = __ pc(); - Label loop, miss; - - // Compare super with sub directly, since super is not in its own SSA. - // The compiler used to emit this test, but we fold it in here, - // to increase overall code density, with no real loss of speed. - { Label L; - __ cmp(O1, O2); - __ brx(Assembler::notEqual, false, Assembler::pt, L); - __ delayed()->nop(); - __ retl(); - __ delayed()->addcc(G0,0,O0); // set Z flags, zero result - __ bind(L); - } + Label miss; #if defined(COMPILER2) && !defined(_LP64) // Do not use a 'save' because it blows the 64-bit O registers. @@ -936,56 +924,12 @@ Register L2_super = L2; Register L3_index = L3; -#ifdef _LP64 - Register L4_ooptmp = L4; - - if (UseCompressedOops) { - // this must be under UseCompressedOops check, as we rely upon fact - // that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save - // on stack, see several lines above - __ encode_heap_oop(Rsuper, L4_ooptmp); - } -#endif - - inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1); - - __ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 ); - __ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len); - __ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr); - __ clr(L3_index); // zero index - // Load a little early; will load 1 off the end of the array. - // Ok for now; revisit if we have other uses of this routine. - if (UseCompressedOops) { - __ lduw(L1_ary_ptr,0,L2_super);// Will load a little early - } else { - __ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early - } - - assert(heapOopSize != 0, "heapOopSize should be initialized"); - // The scan loop - __ BIND(loop); - __ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size - __ cmp(L3_index,L0_ary_len); - __ br(Assembler::equal,false,Assembler::pn,miss); - __ delayed()->inc(L3_index); // Bump index - - if (UseCompressedOops) { -#ifdef _LP64 - __ subcc(L2_super,L4_ooptmp,Rret); // Check for match; zero in Rret for a hit - __ br( Assembler::notEqual, false, Assembler::pt, loop ); - __ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early -#else - ShouldNotReachHere(); -#endif - } else { - __ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit - __ brx( Assembler::notEqual, false, Assembler::pt, loop ); - __ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early - } - - // Got a hit; report success; set cache. Cache load doesn't - // happen here; for speed it is directly emitted by the compiler. - __ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); + __ check_klass_subtype_slow_path(Rsub, Rsuper, + L0, L1, L2, L3, + NULL, &miss); + + // Match falls through here. + __ addcc(G0,0,Rret); // set Z flags, Z result #if defined(COMPILER2) && !defined(_LP64) __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); @@ -999,7 +943,6 @@ __ delayed()->restore(); #endif - // Hit or miss falls through here __ BIND(miss); __ addcc(G0,1,Rret); // set NZ flags, NZ result @@ -2330,51 +2273,31 @@ Register super_check_offset, Register super_klass, Register temp, - Label& L_success, - Register deccc_hack = noreg) { + Label& L_success) { assert_different_registers(sub_klass, super_check_offset, super_klass, temp); BLOCK_COMMENT("type_check:"); - Label L_miss; + Label L_miss, L_pop_to_miss; assert_clean_int(super_check_offset, temp); - // maybe decrement caller's trip count: -#define DELAY_SLOT delayed(); \ - { if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); } - - // if the pointers are equal, we are done (e.g., String[] elements) - __ cmp(sub_klass, super_klass); - __ brx(Assembler::equal, true, Assembler::pt, L_success); - __ DELAY_SLOT; - - // check the supertype display: - __ ld_ptr(sub_klass, super_check_offset, temp); // query the super type - __ cmp(super_klass, temp); // test the super type - __ brx(Assembler::equal, true, Assembler::pt, L_success); - __ DELAY_SLOT; - - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); - __ cmp(super_klass, sc_offset); - __ brx(Assembler::notEqual, true, Assembler::pt, L_miss); - __ delayed()->nop(); - + __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg, + &L_success, &L_miss, NULL, + super_check_offset); + + BLOCK_COMMENT("type_check_slow_path:"); __ save_frame(0); - __ mov(sub_klass->after_save(), O1); - // mov(super_klass->after_save(), O2); //fill delay slot - assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation"); - __ call(StubRoutines::Sparc::_partial_subtype_check); - __ delayed()->mov(super_klass->after_save(), O2); + __ check_klass_subtype_slow_path(sub_klass->after_save(), + super_klass->after_save(), + L0, L1, L2, L4, + NULL, &L_pop_to_miss); + __ ba(false, L_success); + __ delayed()->restore(); + + __ bind(L_pop_to_miss); __ restore(); - // Upon return, the condition codes are already set. - __ brx(Assembler::equal, true, Assembler::pt, L_success); - __ DELAY_SLOT; - -#undef DELAY_SLOT - // Fall through on failure! __ BIND(L_miss); } @@ -2411,7 +2334,7 @@ gen_write_ref_array_pre_barrier(O1, O2); #ifdef ASSERT - // We sometimes save a frame (see partial_subtype_check below). + // We sometimes save a frame (see generate_type_check below). // If this will cause trouble, let's fail now instead of later. __ save_frame(0); __ restore(); @@ -2455,41 +2378,39 @@ // G3, G4, G5 --- current oop, oop.klass, oop.klass.super __ align(16); - __ bind(store_element); - // deccc(G1_remain); // decrement the count (hoisted) + __ BIND(store_element); + __ deccc(G1_remain); // decrement the count __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop __ inc(O5_offset, heapOopSize); // step to next offset __ brx(Assembler::zero, true, Assembler::pt, do_card_marks); __ delayed()->set(0, O0); // return -1 on success // ======== loop entry is here ======== - __ bind(load_element); + __ BIND(load_element); __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop __ br_null(G3_oop, true, Assembler::pt, store_element); - __ delayed()->deccc(G1_remain); // decrement the count + __ delayed()->nop(); __ load_klass(G3_oop, G4_klass); // query the object klass generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, // branch to this on success: - store_element, - // decrement this on success: - G1_remain); + store_element); // ======== end loop ======== // It was a real error; we must depend on the caller to finish the job. // Register G1 has number of *remaining* oops, O2 number of *total* oops. // Emit GC store barriers for the oops we have copied (O2 minus G1), // and report their number to the caller. - __ bind(fail); + __ BIND(fail); __ subcc(O2_count, G1_remain, O2_count); __ brx(Assembler::zero, false, Assembler::pt, done); __ delayed()->not1(O2_count, O0); // report (-1^K) to caller - __ bind(do_card_marks); + __ BIND(do_card_marks); gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] - __ bind(done); + __ BIND(done); inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); __ retl(); __ delayed()->nop(); // return value in 00 diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -7286,6 +7286,225 @@ } +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterConstant super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg); + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = (klassOopDesc::header_size() * HeapWordSize + + Klass::secondary_super_cache_offset_in_bytes()); + int sco_offset = (klassOopDesc::header_size() * HeapWordSize + + Klass::super_check_offset_offset_in_bytes()); + Address super_check_offset_addr(super_klass, sco_offset); + + // Hacked jcc, which "knows" that L_fallthrough, at least, is in + // range of a jccb. If this routine grows larger, reconsider at + // least some of these. +#define local_jcc(assembler_cond, label) \ + if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ + else jcc( assembler_cond, label) /*omit semi*/ + + // Hacked jmp, which may only be used just before L_fallthrough. +#define final_jmp(label) \ + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ + else jmp(label) /*omit semi*/ + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmpptr(sub_klass, super_klass); + local_jcc(Assembler::equal, *L_success); + + // Check the supertype display: + if (must_load_sco) { + // Positive movl does right thing on LP64. + movl(temp_reg, super_check_offset_addr); + super_check_offset = RegisterConstant(temp_reg); + } + Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); + cmpptr(super_klass, super_check_addr); // load displayed supertype + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + local_jcc(Assembler::equal, *L_success); + cmpl(super_check_offset.as_register(), sc_offset); + if (L_failure == &L_fallthrough) { + local_jcc(Assembler::equal, *L_slow_path); + } else { + local_jcc(Assembler::notEqual, *L_failure); + final_jmp(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + local_jcc(Assembler::equal, *L_success); + } else { + local_jcc(Assembler::notEqual, *L_slow_path); + final_jmp(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + local_jcc(Assembler::equal, *L_success); + } else { + local_jcc(Assembler::notEqual, *L_failure); + final_jmp(*L_success); + } + } + + bind(L_fallthrough); + +#undef local_jcc +#undef final_jmp +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + assert_different_registers(sub_klass, super_klass, temp_reg); + if (temp2_reg != noreg) + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = (klassOopDesc::header_size() * HeapWordSize + + Klass::secondary_supers_offset_in_bytes()); + int sc_offset = (klassOopDesc::header_size() * HeapWordSize + + Klass::secondary_super_cache_offset_in_bytes()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connections with the input regs. + + assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) + assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) + + // Get super_klass value into rax (even if it was in rdi or rcx). + bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; + if (super_klass != rax || UseCompressedOops) { + if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } + mov(rax, super_klass); + } + if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } + if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + ExternalAddress pst_counter_addr((address) pst_counter); + NOT_LP64( incrementl(pst_counter_addr) ); + LP64_ONLY( lea(rcx, pst_counter_addr) ); + LP64_ONLY( incrementl(Address(rcx, 0)) ); +#endif //PRODUCT + + // We will consult the secondary-super array. + movptr(rdi, secondary_supers_addr); + // Load the array length. (Positive movl does right thing on LP64.) + movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); + // Skip to start of data. + addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + + // Scan RCX words at [RDI] for an occurrence of RAX. + // Set NZ/Z based on last compare. +#ifdef _LP64 + // This part is tricky, as values in supers array could be 32 or 64 bit wide + // and we store values in objArrays always encoded, thus we need to encode + // the value of rax before repne. Note that rax is dead after the repne. + if (UseCompressedOops) { + encode_heap_oop_not_null(rax); + // The superclass is never null; it would be a basic system error if a null + // pointer were to sneak in here. Note that we have already loaded the + // Klass::super_check_offset from the super_klass in the fast path, + // so if there is a null in that register, we are already in the afterlife. + repne_scanl(); + } else +#endif // _LP64 + repne_scan(); + + // Unspill the temp. registers: + if (pushed_rdi) pop(rdi); + if (pushed_rcx) pop(rcx); + if (pushed_rax) pop(rax); + + if (set_cond_codes) { + // Special hack for the AD files: rdi is guaranteed non-zero. + assert(!pushed_rdi, "rdi must be left non-NULL"); + // Also, the condition codes are properly set Z/NZ on succeed/failure. + } + + if (L_failure == &L_fallthrough) + jccb(Assembler::notEqual, *L_failure); + else jcc(Assembler::notEqual, *L_failure); + + // Success. Cache the super we found and proceed in triumph. + movptr(super_cache_addr, super_klass); + + if (L_success != &L_fallthrough) { + jmp(*L_success); + } + +#undef IS_A_TEMP + + bind(L_fallthrough); +} + + void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { ucomisd(dst, as_Address(src)); } diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/assembler_x86.hpp Fri Mar 13 18:39:22 2009 -0700 @@ -1807,6 +1807,40 @@ Register scan_temp, Label& no_such_interface); + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterConstant super_check_offset = RegisterConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg and temp2_reg can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes, condition codes will be Z on success, NZ on failure. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success); + //---- void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -1598,18 +1598,9 @@ // get instance klass __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); - // get super_check_offset - __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes())); - // See if we get an immediate positive hit - __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1)); - __ jcc(Assembler::equal, done); - // check for immediate negative hit - __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); - __ jcc(Assembler::notEqual, *stub->entry()); - // check for self - __ cmpptr(klass_RInfo, k_RInfo); - __ jcc(Assembler::equal, done); - + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): __ push(klass_RInfo); __ push(k_RInfo); __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); @@ -1735,17 +1726,9 @@ } __ bind(done); } else { - __ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes())); - // See if we get an immediate positive hit - __ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1)); - __ jcc(Assembler::equal, done); - // check for immediate negative hit - __ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); - __ jcc(Assembler::notEqual, *stub->entry()); - // check for self - __ cmpptr(klass_RInfo, k_RInfo); - __ jcc(Assembler::equal, done); - + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): __ push(klass_RInfo); __ push(k_RInfo); __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); @@ -1821,23 +1804,15 @@ __ pop(dst); __ jmp(done); } - } else { -#else - { // YUCK + } + else // next block is unconditional if LP64: #endif // LP64 + { assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers"); - __ movl(dst, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes())); - // See if we get an immediate positive hit - __ cmpptr(k_RInfo, Address(klass_RInfo, dst, Address::times_1)); - __ jcc(Assembler::equal, one); - // check for immediate negative hit - __ cmpl(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); - __ jcc(Assembler::notEqual, zero); - // check for self - __ cmpptr(klass_RInfo, k_RInfo); - __ jcc(Assembler::equal, one); - + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, dst, &one, &zero, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): __ push(klass_RInfo); __ push(k_RInfo); __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/c1_Runtime1_x86.cpp --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -1354,6 +1354,13 @@ case slow_subtype_check_id: { + // Typical calling sequence: + // __ push(klass_RInfo); // object klass or other subclass + // __ push(sup_k_RInfo); // array element klass or other superclass + // __ call(slow_subtype_check); + // Note that the subclass is pushed first, and is therefore deepest. + // Previous versions of this code reversed the names 'sub' and 'super'. + // This was operationally harmless but made the code unreadable. enum layout { rax_off, SLOT2(raxH_off) rcx_off, SLOT2(rcxH_off) @@ -1361,9 +1368,10 @@ rdi_off, SLOT2(rdiH_off) // saved_rbp_off, SLOT2(saved_rbpH_off) return_off, SLOT2(returnH_off) - sub_off, SLOT2(subH_off) - super_off, SLOT2(superH_off) - framesize + sup_k_off, SLOT2(sup_kH_off) + klass_off, SLOT2(superH_off) + framesize, + result_off = klass_off // deepest argument is also the return value }; __ set_info("slow_subtype_check", dont_gc_arguments); @@ -1373,19 +1381,14 @@ __ push(rax); // This is called by pushing args and not with C abi - __ movptr(rsi, Address(rsp, (super_off) * VMRegImpl::stack_slot_size)); // super - __ movptr(rax, Address(rsp, (sub_off ) * VMRegImpl::stack_slot_size)); // sub - - __ movptr(rdi,Address(rsi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())); - // since size is postive movl does right thing on 64bit - __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); - __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + __ movptr(rsi, Address(rsp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass + __ movptr(rax, Address(rsp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass Label miss; - __ repne_scan(); - __ jcc(Assembler::notEqual, miss); - __ movptr(Address(rsi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax); - __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 1); // result + __ check_klass_subtype_slow_path(rsi, rax, rcx, rdi, NULL, &miss); + + // fallthrough on success: + __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), 1); // result __ pop(rax); __ pop(rcx); __ pop(rsi); @@ -1393,7 +1396,7 @@ __ ret(0); __ bind(miss); - __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result + __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result __ pop(rax); __ pop(rcx); __ pop(rsi); diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/interp_masm_x86_32.cpp --- a/src/cpu/x86/vm/interp_masm_x86_32.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -219,47 +219,16 @@ // Resets EDI to locals. Register sub_klass cannot be any of the above. void InterpreterMacroAssembler::gen_subtype_check( Register Rsub_klass, Label &ok_is_subtype ) { assert( Rsub_klass != rax, "rax, holds superklass" ); - assert( Rsub_klass != rcx, "rcx holds 2ndary super array length" ); - assert( Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr" ); - Label not_subtype, loop; + assert( Rsub_klass != rcx, "used as a temp" ); + assert( Rsub_klass != rdi, "used as a temp, restored from locals" ); // Profile the not-null value's klass. - profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi - - // Load the super-klass's check offset into ECX - movl( rcx, Address(rax, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ) ); - // Load from the sub-klass's super-class display list, or a 1-word cache of - // the secondary superclass list, or a failing value with a sentinel offset - // if the super-klass is an interface or exceptionally deep in the Java - // hierarchy and we have to scan the secondary superclass list the hard way. - // See if we get an immediate positive hit - cmpptr( rax, Address(Rsub_klass,rcx,Address::times_1) ); - jcc( Assembler::equal,ok_is_subtype ); + profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi - // Check for immediate negative hit - cmpl( rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() ); - jcc( Assembler::notEqual, not_subtype ); - // Check for self - cmpptr( Rsub_klass, rax ); - jcc( Assembler::equal, ok_is_subtype ); + // Do the check. + check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx - // Now do a linear scan of the secondary super-klass chain. - movptr( rdi, Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()) ); - // EDI holds the objArrayOop of secondary supers. - movl( rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));// Load the array length - // Skip to start of data; also clear Z flag incase ECX is zero - addptr( rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT) ); - // Scan ECX words at [EDI] for occurance of EAX - // Set NZ/Z based on last compare - repne_scan(); - restore_locals(); // Restore EDI; Must not blow flags - // Not equal? - jcc( Assembler::notEqual, not_subtype ); - // Must be equal but missed in cache. Update cache. - movptr( Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax ); - jmp( ok_is_subtype ); - - bind(not_subtype); + // Profile the failure of the check. profile_typecheck_failed(rcx); // blows rcx } diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/interp_masm_x86_64.cpp --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -232,65 +232,13 @@ assert(Rsub_klass != rcx, "rcx holds 2ndary super array length"); assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr"); - Label not_subtype, not_subtype_pop, loop; - // Profile the not-null value's klass. - profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi - - // Load the super-klass's check offset into rcx - movl(rcx, Address(rax, sizeof(oopDesc) + - Klass::super_check_offset_offset_in_bytes())); - // Load from the sub-klass's super-class display list, or a 1-word - // cache of the secondary superclass list, or a failing value with a - // sentinel offset if the super-klass is an interface or - // exceptionally deep in the Java hierarchy and we have to scan the - // secondary superclass list the hard way. See if we get an - // immediate positive hit - cmpptr(rax, Address(Rsub_klass, rcx, Address::times_1)); - jcc(Assembler::equal,ok_is_subtype); - - // Check for immediate negative hit - cmpl(rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); - jcc( Assembler::notEqual, not_subtype ); - // Check for self - cmpptr(Rsub_klass, rax); - jcc(Assembler::equal, ok_is_subtype); + profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi - // Now do a linear scan of the secondary super-klass chain. - movptr(rdi, Address(Rsub_klass, sizeof(oopDesc) + - Klass::secondary_supers_offset_in_bytes())); - // rdi holds the objArrayOop of secondary supers. - // Load the array length - movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); - // Skip to start of data; also clear Z flag incase rcx is zero - addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - // Scan rcx words at [rdi] for occurance of rax - // Set NZ/Z based on last compare + // Do the check. + check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx - // this part is kind tricky, as values in supers array could be 32 or 64 bit wide - // and we store values in objArrays always encoded, thus we need to encode value - // before repne - if (UseCompressedOops) { - push(rax); - encode_heap_oop(rax); - repne_scanl(); - // Not equal? - jcc(Assembler::notEqual, not_subtype_pop); - // restore heap oop here for movq - pop(rax); - } else { - repne_scan(); - jcc(Assembler::notEqual, not_subtype); - } - // Must be equal but missed in cache. Update cache. - movptr(Address(Rsub_klass, sizeof(oopDesc) + - Klass::secondary_super_cache_offset_in_bytes()), rax); - jmp(ok_is_subtype); - - bind(not_subtype_pop); - // restore heap oop here for miss - if (UseCompressedOops) pop(rax); - bind(not_subtype); + // Profile the failure of the check. profile_typecheck_failed(rcx); // blows rcx } diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -1310,81 +1310,51 @@ Address& super_check_offset_addr, Address& super_klass_addr, Register temp, - Label* L_success_ptr, Label* L_failure_ptr) { + Label* L_success, Label* L_failure) { BLOCK_COMMENT("type_check:"); Label L_fallthrough; - bool fall_through_on_success = (L_success_ptr == NULL); - if (fall_through_on_success) { - L_success_ptr = &L_fallthrough; - } else { - L_failure_ptr = &L_fallthrough; - } - Label& L_success = *L_success_ptr; - Label& L_failure = *L_failure_ptr; +#define LOCAL_JCC(assembler_con, label_ptr) \ + if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \ + else __ jcc(assembler_con, L_fallthrough) /*omit semi*/ + // The following is a strange variation of the fast path which requires + // one less register, because needed values are on the argument stack. + // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp, + // L_success, L_failure, NULL); assert_different_registers(sub_klass, temp); - // a couple of useful fields in sub_klass: - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_supers_offset_in_bytes()); int sc_offset = (klassOopDesc::header_size() * HeapWordSize + Klass::secondary_super_cache_offset_in_bytes()); - Address secondary_supers_addr(sub_klass, ss_offset); - Address super_cache_addr( sub_klass, sc_offset); // if the pointers are equal, we are done (e.g., String[] elements) __ cmpptr(sub_klass, super_klass_addr); - __ jcc(Assembler::equal, L_success); + LOCAL_JCC(Assembler::equal, L_success); // check the supertype display: __ movl2ptr(temp, super_check_offset_addr); Address super_check_addr(sub_klass, temp, Address::times_1, 0); __ movptr(temp, super_check_addr); // load displayed supertype __ cmpptr(temp, super_klass_addr); // test the super type - __ jcc(Assembler::equal, L_success); + LOCAL_JCC(Assembler::equal, L_success); // if it was a primary super, we can just fail immediately __ cmpl(super_check_offset_addr, sc_offset); - __ jcc(Assembler::notEqual, L_failure); - - // Now do a linear scan of the secondary super-klass chain. - // This code is rarely used, so simplicity is a virtue here. - inc_counter_np(SharedRuntime::_partial_subtype_ctr); - { - // The repne_scan instruction uses fixed registers, which we must spill. - // (We need a couple more temps in any case.) - __ push(rax); - __ push(rcx); - __ push(rdi); - assert_different_registers(sub_klass, rax, rcx, rdi); + LOCAL_JCC(Assembler::notEqual, L_failure); - __ movptr(rdi, secondary_supers_addr); - // Load the array length. - __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); - // Skip to start of data. - __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - // Scan rcx words at [edi] for occurance of rax, - // Set NZ/Z based on last compare - __ movptr(rax, super_klass_addr); - __ repne_scan(); + // The repne_scan instruction uses fixed registers, which will get spilled. + // We happen to know this works best when super_klass is in rax. + Register super_klass = temp; + __ movptr(super_klass, super_klass_addr); + __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, + L_success, L_failure); - // Unspill the temp. registers: - __ pop(rdi); - __ pop(rcx); - __ pop(rax); - } - __ jcc(Assembler::notEqual, L_failure); + __ bind(L_fallthrough); - // Success. Cache the super we found and proceed in triumph. - __ movptr(temp, super_klass_addr); // note: rax, is dead - __ movptr(super_cache_addr, temp); + if (L_success == NULL) { BLOCK_COMMENT("L_success:"); } + if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); } - if (!fall_through_on_success) - __ jmp(L_success); - - // Fall through on failure! - __ bind(L_fallthrough); +#undef LOCAL_JCC } // diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -2091,66 +2091,9 @@ Label L_miss; - // a couple of useful fields in sub_klass: - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_supers_offset_in_bytes()); - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); - Address secondary_supers_addr(sub_klass, ss_offset); - Address super_cache_addr( sub_klass, sc_offset); - - // if the pointers are equal, we are done (e.g., String[] elements) - __ cmpptr(super_klass, sub_klass); - __ jcc(Assembler::equal, L_success); - - // check the supertype display: - Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); - __ cmpptr(super_klass, super_check_addr); // test the super type - __ jcc(Assembler::equal, L_success); - - // if it was a primary super, we can just fail immediately - __ cmpl(super_check_offset, sc_offset); - __ jcc(Assembler::notEqual, L_miss); - - // Now do a linear scan of the secondary super-klass chain. - // The repne_scan instruction uses fixed registers, which we must spill. - // (We need a couple more temps in any case.) - // This code is rarely used, so simplicity is a virtue here. - inc_counter_np(SharedRuntime::_partial_subtype_ctr); - { - __ push(rax); - __ push(rcx); - __ push(rdi); - assert_different_registers(sub_klass, super_klass, rax, rcx, rdi); - - __ movptr(rdi, secondary_supers_addr); - // Load the array length. - __ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes())); - // Skip to start of data. - __ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - // Scan rcx words at [rdi] for occurance of rax - // Set NZ/Z based on last compare - __ movptr(rax, super_klass); - if (UseCompressedOops) { - // Compare against compressed form. Don't need to uncompress because - // looks like orig rax is restored in popq below. - __ encode_heap_oop(rax); - __ repne_scanl(); - } else { - __ repne_scan(); - } - - // Unspill the temp. registers: - __ pop(rdi); - __ pop(rcx); - __ pop(rax); - - __ jcc(Assembler::notEqual, L_miss); - } - - // Success. Cache the super we found and proceed in triumph. - __ movptr(super_cache_addr, super_klass); // note: rax is dead - __ jmp(L_success); + __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, + super_check_offset); + __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); // Fall through on failure! __ BIND(L_miss); diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Fri Mar 13 18:39:22 2009 -0700 @@ -1692,26 +1692,15 @@ Register Reax = as_Register(EAX_enc); // super class Register Recx = as_Register(ECX_enc); // killed Register Resi = as_Register(ESI_enc); // sub class - Label hit, miss; + Label miss; MacroAssembler _masm(&cbuf); - // Compare super with sub directly, since super is not in its own SSA. - // The compiler used to emit this test, but we fold it in here, - // to allow platform-specific tweaking on sparc. - __ cmpptr(Reax, Resi); - __ jcc(Assembler::equal, hit); -#ifndef PRODUCT - __ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); -#endif //PRODUCT - __ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())); - __ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes())); - __ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - __ repne_scan(); - __ jcc(Assembler::notEqual, miss); - __ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax); - __ bind(hit); - if( $primary ) - __ xorptr(Redi,Redi); + __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, + NULL, &miss, + /*set_cond_codes:*/ true); + if ($primary) { + __ xorptr(Redi, Redi); + } __ bind(miss); %} @@ -12566,15 +12555,12 @@ effect( KILL rcx, KILL cr ); ins_cost(1100); // slightly larger than the next version - format %{ "CMPL EAX,ESI\n\t" - "JEQ,s hit\n\t" - "MOV EDI,[$sub+Klass::secondary_supers]\n\t" + format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" "JNE,s miss\t\t# Missed: EDI not-zero\n\t" "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" - "hit:\n\t" "XOR $result,$result\t\t Hit: EDI zero\n\t" "miss:\t" %} @@ -12588,9 +12574,7 @@ effect( KILL rcx, KILL result ); ins_cost(1000); - format %{ "CMPL EAX,ESI\n\t" - "JEQ,s miss\t# Actually a hit; we are done.\n\t" - "MOV EDI,[$sub+Klass::secondary_supers]\n\t" + format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" diff -r c771b7f43bbf -r c517646eef23 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Fri Mar 13 11:35:17 2009 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Fri Mar 13 18:39:22 2009 -0700 @@ -2575,43 +2575,13 @@ Register Rrax = as_Register(RAX_enc); // super class Register Rrcx = as_Register(RCX_enc); // killed Register Rrsi = as_Register(RSI_enc); // sub class - Label hit, miss, cmiss; + Label miss; + const bool set_cond_codes = true; MacroAssembler _masm(&cbuf); - // Compare super with sub directly, since super is not in its own SSA. - // The compiler used to emit this test, but we fold it in here, - // to allow platform-specific tweaking on sparc. - __ cmpptr(Rrax, Rrsi); - __ jcc(Assembler::equal, hit); -#ifndef PRODUCT - __ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr)); - __ incrementl(Address(Rrcx, 0)); -#endif //PRODUCT - __ movptr(Rrdi, Address(Rrsi, - sizeof(oopDesc) + - Klass::secondary_supers_offset_in_bytes())); - __ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes())); - __ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); - if (UseCompressedOops) { - __ push(Rrax); - __ encode_heap_oop(Rrax); - __ repne_scanl(); - __ pop(Rrax); - __ jccb(Assembler::notEqual, miss); - __ movptr(Address(Rrsi, - sizeof(oopDesc) + - Klass::secondary_super_cache_offset_in_bytes()), - Rrax); - __ jmp(hit); - } else { - __ repne_scan(); - __ jccb(Assembler::notEqual, miss); - __ movptr(Address(Rrsi, - sizeof(oopDesc) + - Klass::secondary_super_cache_offset_in_bytes()), - Rrax); - } - __ bind(hit); + __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi, + NULL, &miss, + /*set_cond_codes:*/ true); if ($primary) { __ xorptr(Rrdi, Rrdi); } @@ -12179,15 +12149,12 @@ effect(KILL rcx, KILL cr); ins_cost(1100); // slightly larger than the next version - format %{ "cmpq rax, rsi\n\t" - "jeq,s hit\n\t" - "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" + format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t" "jne,s miss\t\t# Missed: rdi not-zero\n\t" "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t" - "hit:\n\t" "xorq $result, $result\t\t Hit: rdi zero\n\t" "miss:\t" %} @@ -12205,9 +12172,7 @@ effect(KILL rcx, KILL result); ins_cost(1000); - format %{ "cmpq rax, rsi\n\t" - "jeq,s miss\t# Actually a hit; we are done.\n\t" - "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" + format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t" "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" "repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t" diff -r c771b7f43bbf -r c517646eef23 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Fri Mar 13 11:35:17 2009 -0700 +++ b/src/share/vm/opto/graphKit.cpp Fri Mar 13 18:39:22 2009 -0700 @@ -2277,7 +2277,7 @@ r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) ); set_control( _gvn.transform( new (C, 1) IfFalseNode(iff2) ) ); - // Check for self. Very rare to get here, but its taken 1/3 the time. + // Check for self. Very rare to get here, but it is taken 1/3 the time. // No performance impact (too rare) but allows sharing of secondary arrays // which has some footprint reduction. Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) ); @@ -2286,11 +2286,27 @@ r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) ); set_control( _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) ); + // -- Roads not taken here: -- + // We could also have chosen to perform the self-check at the beginning + // of this code sequence, as the assembler does. This would not pay off + // the same way, since the optimizer, unlike the assembler, can perform + // static type analysis to fold away many successful self-checks. + // Non-foldable self checks work better here in second position, because + // the initial primary superclass check subsumes a self-check for most + // types. An exception would be a secondary type like array-of-interface, + // which does not appear in its own primary supertype display. + // Finally, we could have chosen to move the self-check into the + // PartialSubtypeCheckNode, and from there out-of-line in a platform + // dependent manner. But it is worthwhile to have the check here, + // where it can be perhaps be optimized. The cost in code space is + // small (register compare, branch). + // Now do a linear scan of the secondary super-klass array. Again, no real // performance impact (too rare) but it's gotta be done. - // (The stub also contains the self-check of subklass == superklass. // Since the code is rarely used, there is no penalty for moving it - // out of line, and it can only improve I-cache density.) + // out of line, and it can only improve I-cache density. + // The decision to inline or out-of-line this final check is platform + // dependent, and is found in the AD file definition of PartialSubtypeCheck. Node* psc = _gvn.transform( new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) );