# HG changeset patch # User kvn # Date 1247778642 25200 # Node ID fc4be448891fd9cb97db8b97547f4b5d6efcb399 # Parent fd50a67f97d1d87857536b52bd62d6801659dd25 6851742: (EA) allocation elimination doesn't work with UseG1GC Summary: Fix eliminate_card_mark() to eliminate G1 pre/post barriers. Reviewed-by: never diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/escape.cpp Thu Jul 16 14:10:42 2009 -0700 @@ -578,11 +578,24 @@ if (phi_alias_idx == alias_idx) { return orig_phi; } - // have we already created a Phi for this alias index? + // Have we recently created a Phi for this alias index? PhiNode *result = get_map_phi(orig_phi->_idx); if (result != NULL && C->get_alias_index(result->adr_type()) == alias_idx) { return result; } + // Previous check may fail when the same wide memory Phi was split into Phis + // for different memory slices. Search all Phis for this region. + if (result != NULL) { + Node* region = orig_phi->in(0); + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { + Node* phi = region->fast_out(i); + if (phi->is_Phi() && + C->get_alias_index(phi->as_Phi()->adr_type()) == alias_idx) { + assert(phi->_idx >= nodes_size(), "only new Phi per instance memory slice"); + return phi->as_Phi(); + } + } + } if ((int)C->unique() + 2*NodeLimitFudgeFactor > MaxNodeLimit) { if (C->do_escape_analysis() == true && !C->failing()) { // Retry compilation without escape analysis. @@ -595,6 +608,7 @@ orig_phi_worklist.append_if_missing(orig_phi); const TypePtr *atype = C->get_adr_type(alias_idx); result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype); + C->copy_node_notes_to(result, orig_phi); set_map_phi(orig_phi->_idx, result); igvn->set_type(result, result->bottom_type()); record_for_optimizer(result); diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/graphKit.cpp Thu Jul 16 14:10:42 2009 -0700 @@ -1373,11 +1373,12 @@ return st; } + void GraphKit::pre_barrier(Node* ctl, Node* obj, Node* adr, - uint adr_idx, - Node *val, + uint adr_idx, + Node* val, const TypeOopPtr* val_type, BasicType bt) { BarrierSet* bs = Universe::heap()->barrier_set(); @@ -1385,7 +1386,7 @@ switch (bs->kind()) { case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: - g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt); + g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt); break; case BarrierSet::CardTableModRef: @@ -1404,8 +1405,8 @@ Node* store, Node* obj, Node* adr, - uint adr_idx, - Node *val, + uint adr_idx, + Node* val, BasicType bt, bool use_precise) { BarrierSet* bs = Universe::heap()->barrier_set(); @@ -1413,7 +1414,7 @@ switch (bs->kind()) { case BarrierSet::G1SATBCT: case BarrierSet::G1SATBCTLogging: - g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise); + g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise); break; case BarrierSet::CardTableModRef: @@ -1431,42 +1432,36 @@ } } -Node* GraphKit::store_oop_to_object(Node* ctl, - Node* obj, - Node* adr, - const TypePtr* adr_type, - Node *val, - const TypeOopPtr* val_type, - BasicType bt) { +Node* GraphKit::store_oop(Node* ctl, + Node* obj, + Node* adr, + const TypePtr* adr_type, + Node* val, + const TypeOopPtr* val_type, + BasicType bt, + bool use_precise) { + + set_control(ctl); + if (stopped()) return top(); // Dead path ? + + assert(bt == T_OBJECT, "sanity"); + assert(val != NULL, "not dead path"); uint adr_idx = C->get_alias_index(adr_type); - Node* store; - pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt); - store = store_to_memory(control(), adr, val, bt, adr_idx); - post_barrier(control(), store, obj, adr, adr_idx, val, bt, false); + assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); + + pre_barrier(control(), obj, adr, adr_idx, val, val_type, bt); + Node* store = store_to_memory(control(), adr, val, bt, adr_idx); + post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise); return store; } -Node* GraphKit::store_oop_to_array(Node* ctl, - Node* obj, - Node* adr, - const TypePtr* adr_type, - Node *val, - const TypeOopPtr* val_type, - BasicType bt) { - uint adr_idx = C->get_alias_index(adr_type); - Node* store; - pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt); - store = store_to_memory(control(), adr, val, bt, adr_idx); - post_barrier(control(), store, obj, adr, adr_idx, val, bt, true); - return store; -} - +// Could be an array or object we don't know at compile time (unsafe ref.) Node* GraphKit::store_oop_to_unknown(Node* ctl, - Node* obj, - Node* adr, - const TypePtr* adr_type, - Node *val, - BasicType bt) { + Node* obj, // containing obj + Node* adr, // actual adress to store val at + const TypePtr* adr_type, + Node* val, + BasicType bt) { Compile::AliasType* at = C->alias_type(adr_type); const TypeOopPtr* val_type = NULL; if (adr_type->isa_instptr()) { @@ -1485,12 +1480,7 @@ if (val_type == NULL) { val_type = TypeInstPtr::BOTTOM; } - - uint adr_idx = at->index(); - pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt); - Node* store = store_to_memory(control(), adr, val, bt, adr_idx); - post_barrier(control(), store, obj, adr, adr_idx, val, bt, true); - return store; + return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true); } @@ -1804,93 +1794,6 @@ } -//------------------------------store_barrier---------------------------------- -// Insert a write-barrier store. This is to let generational GC work; we have -// to flag all oop-stores before the next GC point. -void GraphKit::write_barrier_post(Node* oop_store, Node* obj, Node* adr, - Node* val, bool use_precise) { - // No store check needed if we're storing a NULL or an old object - // (latter case is probably a string constant). The concurrent - // mark sweep garbage collector, however, needs to have all nonNull - // oop updates flagged via card-marks. - if (val != NULL && val->is_Con()) { - // must be either an oop or NULL - const Type* t = val->bottom_type(); - if (t == TypePtr::NULL_PTR || t == Type::TOP) - // stores of null never (?) need barriers - return; - ciObject* con = t->is_oopptr()->const_oop(); - if (con != NULL - && con->is_perm() - && Universe::heap()->can_elide_permanent_oop_store_barriers()) - // no store barrier needed, because no old-to-new ref created - return; - } - - if (use_ReduceInitialCardMarks() - && obj == just_allocated_object(control())) { - // We can skip marks on a freshly-allocated object. - // Keep this code in sync with do_eager_card_mark in runtime.cpp. - // That routine eagerly marks the occasional object which is produced - // by the slow path, so that we don't have to do it here. - return; - } - - if (!use_precise) { - // All card marks for a (non-array) instance are in one place: - adr = obj; - } - // (Else it's an array (or unknown), and we want more precise card marks.) - assert(adr != NULL, ""); - - // Get the alias_index for raw card-mark memory - int adr_type = Compile::AliasIdxRaw; - // Convert the pointer to an int prior to doing math on it - Node* cast = _gvn.transform(new (C, 2) CastP2XNode(control(), adr)); - // Divide by card size - assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef, - "Only one we handle so far."); - CardTableModRefBS* ct = - (CardTableModRefBS*)(Universe::heap()->barrier_set()); - Node *b = _gvn.transform(new (C, 3) URShiftXNode( cast, _gvn.intcon(CardTableModRefBS::card_shift) )); - // We store into a byte array, so do not bother to left-shift by zero - Node *c = byte_map_base_node(); - // Combine - Node *sb_ctl = control(); - Node *sb_adr = _gvn.transform(new (C, 4) AddPNode( top()/*no base ptr*/, c, b )); - Node *sb_val = _gvn.intcon(0); - // Smash zero into card - if( !UseConcMarkSweepGC ) { - BasicType bt = T_BYTE; - store_to_memory(sb_ctl, sb_adr, sb_val, bt, adr_type); - } else { - // Specialized path for CM store barrier - cms_card_mark( sb_ctl, sb_adr, sb_val, oop_store); - } -} - -// Specialized path for CMS store barrier -void GraphKit::cms_card_mark(Node* ctl, Node* adr, Node* val, Node *oop_store) { - BasicType bt = T_BYTE; - int adr_idx = Compile::AliasIdxRaw; - Node* mem = memory(adr_idx); - - // The type input is NULL in PRODUCT builds - const TypePtr* type = NULL; - debug_only(type = C->get_adr_type(adr_idx)); - - // Add required edge to oop_store, optimizer does not support precedence edges. - // Convert required edge to precedence edge before allocation. - Node *store = _gvn.transform( new (C, 5) StoreCMNode(ctl, mem, adr, type, val, oop_store) ); - set_memory(store, adr_idx); - - // For CMS, back-to-back card-marks can only remove the first one - // and this requires DU info. Push on worklist for optimizer. - if (mem->req() > MemNode::Address && adr == mem->in(MemNode::Address)) - record_for_igvn(store); -} - - void GraphKit::round_double_arguments(ciMethod* dest_method) { // (Note: TypeFunc::make has a cache that makes this fast.) const TypeFunc* tf = TypeFunc::make(dest_method); @@ -3215,6 +3118,79 @@ return NULL; } +//----------------------------- store barriers ---------------------------- +#define __ ideal. + +void GraphKit::sync_kit(IdealKit& ideal) { + // Final sync IdealKit and graphKit. + __ drain_delay_transform(); + set_all_memory(__ merged_memory()); + set_control(__ ctrl()); +} + +// vanilla/CMS post barrier +// Insert a write-barrier store. This is to let generational GC work; we have +// to flag all oop-stores before the next GC point. +void GraphKit::write_barrier_post(Node* oop_store, + Node* obj, + Node* adr, + Node* val, + bool use_precise) { + // No store check needed if we're storing a NULL or an old object + // (latter case is probably a string constant). The concurrent + // mark sweep garbage collector, however, needs to have all nonNull + // oop updates flagged via card-marks. + if (val != NULL && val->is_Con()) { + // must be either an oop or NULL + const Type* t = val->bottom_type(); + if (t == TypePtr::NULL_PTR || t == Type::TOP) + // stores of null never (?) need barriers + return; + ciObject* con = t->is_oopptr()->const_oop(); + if (con != NULL + && con->is_perm() + && Universe::heap()->can_elide_permanent_oop_store_barriers()) + // no store barrier needed, because no old-to-new ref created + return; + } + + if (!use_precise) { + // All card marks for a (non-array) instance are in one place: + adr = obj; + } + // (Else it's an array (or unknown), and we want more precise card marks.) + assert(adr != NULL, ""); + + IdealKit ideal(gvn(), control(), merged_memory(), true); + + // Convert the pointer to an int prior to doing math on it + Node* cast = __ CastPX(__ ctrl(), adr); + + // Divide by card size + assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef, + "Only one we handle so far."); + Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) ); + + // Combine card table base and card offset + Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset ); + + // Get the alias_index for raw card-mark memory + int adr_type = Compile::AliasIdxRaw; + // Smash zero into card + Node* zero = __ ConI(0); + BasicType bt = T_BYTE; + if( !UseConcMarkSweepGC ) { + __ store(__ ctrl(), card_adr, zero, bt, adr_type); + } else { + // Specialized path for CM store barrier + __ storeCM(__ ctrl(), card_adr, zero, oop_store, bt, adr_type); + } + + // Final sync IdealKit and GraphKit. + sync_kit(ideal); +} + +// G1 pre/post barriers void GraphKit::g1_write_barrier_pre(Node* obj, Node* adr, uint alias_idx, @@ -3222,10 +3198,8 @@ const TypeOopPtr* val_type, BasicType bt) { IdealKit ideal(gvn(), control(), merged_memory(), true); -#define __ ideal. - __ declares_done(); - - Node* thread = __ thread(); + + Node* tls = __ thread(); // ThreadLocalStorage Node* no_ctrl = NULL; Node* no_base = __ top(); @@ -3248,9 +3222,9 @@ // set_control( ctl); - Node* marking_adr = __ AddP(no_base, thread, __ ConX(marking_offset)); - Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset)); - Node* index_adr = __ AddP(no_base, thread, __ ConX(index_offset)); + Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset)); + Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); + Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); // Now some of the values @@ -3278,55 +3252,52 @@ Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t))); Node* next_indexX = next_index; #ifdef _LP64 - // We could refine the type for what it's worth - // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue); - next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) ); -#endif // _LP64 + // We could refine the type for what it's worth + // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue); + next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) ); +#endif // Now get the buffer location we will log the original value into and store it - Node *log_addr = __ AddP(no_base, buffer, next_indexX); - // __ store(__ ctrl(), log_addr, orig, T_OBJECT, C->get_alias_index(TypeOopPtr::BOTTOM)); __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw); - // update the index - // __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw); - // This is a hack to force this store to occur before the oop store that is coming up - __ store(__ ctrl(), index_adr, next_index, T_INT, C->get_alias_index(TypeOopPtr::BOTTOM)); + __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw); } __ else_(); { // logging buffer is full, call the runtime const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type(); - // __ make_leaf_call(tf, OptoRuntime::g1_wb_pre_Java(), "g1_wb_pre", orig, thread); - __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, thread); - } __ end_if(); - } __ end_if(); - } __ end_if(); - - __ drain_delay_transform(); - set_control( __ ctrl()); - set_all_memory( __ merged_memory()); - -#undef __ + __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, tls); + } __ end_if(); // (!index) + } __ end_if(); // (orig != NULL) + } __ end_if(); // (!marking) + + // Final sync IdealKit and GraphKit. + sync_kit(ideal); } // // Update the card table and add card address to the queue // -void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store, Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf) { -#define __ ideal-> +void GraphKit::g1_mark_card(IdealKit& ideal, + Node* card_adr, + Node* oop_store, + Node* index, + Node* index_adr, + Node* buffer, + const TypeFunc* tf) { + Node* zero = __ ConI(0); Node* no_base = __ top(); BasicType card_bt = T_BYTE; // Smash zero into card. MUST BE ORDERED WRT TO STORE - __ storeCM(__ ctrl(), card_adr, zero, store, card_bt, Compile::AliasIdxRaw); + __ storeCM(__ ctrl(), card_adr, zero, oop_store, card_bt, Compile::AliasIdxRaw); // Now do the queue work __ if_then(index, BoolTest::ne, zero); { - Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t))); + Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t))); Node* next_indexX = next_index; #ifdef _LP64 // We could refine the type for what it's worth @@ -3341,10 +3312,10 @@ } __ else_(); { __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread()); } __ end_if(); -#undef __ + } -void GraphKit::g1_write_barrier_post(Node* store, +void GraphKit::g1_write_barrier_post(Node* oop_store, Node* obj, Node* adr, uint alias_idx, @@ -3369,10 +3340,8 @@ assert(adr != NULL, ""); IdealKit ideal(gvn(), control(), merged_memory(), true); -#define __ ideal. - __ declares_done(); - - Node* thread = __ thread(); + + Node* tls = __ thread(); // ThreadLocalStorage Node* no_ctrl = NULL; Node* no_base = __ top(); @@ -3394,8 +3363,8 @@ // Pointers into the thread - Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset)); - Node* index_adr = __ AddP(no_base, thread, __ ConX(index_offset)); + Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); + Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); // Now some values @@ -3404,18 +3373,14 @@ // Convert the store obj pointer to an int prior to doing math on it - // Use addr not obj gets accurate card marks - - // Node* cast = __ CastPX(no_ctrl, adr /* obj */); - // Must use ctrl to prevent "integerized oop" existing across safepoint - Node* cast = __ CastPX(__ ctrl(), ( use_precise ? adr : obj )); + Node* cast = __ CastPX(__ ctrl(), adr); // Divide pointer by card size Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) ); // Combine card table base and card offset - Node *card_adr = __ AddP(no_base, byte_map_base_node(), card_offset ); + Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset ); // If we know the value being stored does it cross regions? @@ -3439,18 +3404,17 @@ Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); __ if_then(card_val, BoolTest::ne, zero); { - g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf); + g1_mark_card(ideal, card_adr, oop_store, index, index_adr, buffer, tf); } __ end_if(); } __ end_if(); } __ end_if(); } else { - g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf); + // Object.clone() instrinsic uses this path. + g1_mark_card(ideal, card_adr, oop_store, index, index_adr, buffer, tf); } - - __ drain_delay_transform(); - set_control( __ ctrl()); - set_all_memory( __ merged_memory()); + // Final sync IdealKit and GraphKit. + sync_kit(ideal); +} #undef __ -} diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/graphKit.hpp --- a/src/share/vm/opto/graphKit.hpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/graphKit.hpp Thu Jul 16 14:10:42 2009 -0700 @@ -449,13 +449,24 @@ // // If val==NULL, it is taken to be a completely unknown value. QQQ + Node* store_oop(Node* ctl, + Node* obj, // containing obj + Node* adr, // actual adress to store val at + const TypePtr* adr_type, + Node* val, + const TypeOopPtr* val_type, + BasicType bt, + bool use_precise); + Node* store_oop_to_object(Node* ctl, Node* obj, // containing obj Node* adr, // actual adress to store val at const TypePtr* adr_type, Node* val, const TypeOopPtr* val_type, - BasicType bt); + BasicType bt) { + return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false); + } Node* store_oop_to_array(Node* ctl, Node* obj, // containing obj @@ -463,7 +474,9 @@ const TypePtr* adr_type, Node* val, const TypeOopPtr* val_type, - BasicType bt); + BasicType bt) { + return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true); + } // Could be an array or object we don't know at compile time (unsafe ref.) Node* store_oop_to_unknown(Node* ctl, @@ -488,9 +501,6 @@ // Return a load of array element at idx. Node* load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype); - // CMS card-marks have an input from the corresponding oop_store - void cms_card_mark(Node* ctl, Node* adr, Node* val, Node* oop_store); - //---------------- Dtrace support -------------------- void make_dtrace_method_entry_exit(ciMethod* method, bool is_entry); void make_dtrace_method_entry(ciMethod* method) { @@ -582,9 +592,6 @@ return C->too_many_recompiles(method(), bci(), reason); } - // vanilla/CMS post barrier - void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise); - // Returns the object (if any) which was created the moment before. Node* just_allocated_object(Node* current_control); @@ -593,6 +600,11 @@ && Universe::heap()->can_elide_tlab_store_barriers()); } + void sync_kit(IdealKit& ideal); + + // vanilla/CMS post barrier + void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise); + // G1 pre/post barriers void g1_write_barrier_pre(Node* obj, Node* adr, @@ -610,7 +622,7 @@ bool use_precise); // Helper function for g1 private: - void g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store, Node* index, Node* index_adr, + void g1_mark_card(IdealKit& ideal, Node* card_adr, Node* store, Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf); public: diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/idealKit.cpp --- a/src/share/vm/opto/idealKit.cpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/idealKit.cpp Thu Jul 16 14:10:42 2009 -0700 @@ -34,7 +34,7 @@ const uint IdealKit::first_var = TypeFunc::Parms + 1; //----------------------------IdealKit----------------------------------------- -IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms) : +IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms, bool has_declarations) : _gvn(gvn), C(gvn.C) { _initial_ctrl = control; _initial_memory = mem; @@ -47,6 +47,9 @@ _pending_cvstates = new (C->node_arena()) GrowableArray(C->node_arena(), init_size, 0, 0); _delay_transform = new (C->node_arena()) GrowableArray(C->node_arena(), init_size, 0, 0); DEBUG_ONLY(_state = new (C->node_arena()) GrowableArray(C->node_arena(), init_size, 0, 0)); + if (!has_declarations) { + declarations_done(); + } } //-------------------------------if_then------------------------------------- @@ -97,7 +100,7 @@ //-------------------------------end_if------------------------------------- // Merge the "then" and "else" cvstates. // -// The if_then() pushed the current state for later use +// The if_then() pushed a copy of the current state for later use // as the initial state for a future "else" clause. The // current state then became the initial state for the // then clause. If an "else" clause was encountered, it will @@ -258,8 +261,8 @@ return delay_transform(PhiNode::make(reg, n, ct)); } -//-----------------------------declares_done----------------------------------- -void IdealKit::declares_done() { +//-----------------------------declarations_done------------------------------- +void IdealKit::declarations_done() { _cvstate = new_cvstate(); // initialize current cvstate set_ctrl(_initial_ctrl); // initialize control in current cvstate set_all_memory(_initial_memory);// initialize memory in current cvstate @@ -277,7 +280,9 @@ //-----------------------------delay_transform----------------------------------- Node* IdealKit::delay_transform(Node* n) { - gvn().set_type(n, n->bottom_type()); + if (!gvn().is_IterGVN() || !gvn().is_IterGVN()->delay_transform()) { + gvn().set_type(n, n->bottom_type()); + } _delay_transform->push(n); return n; } @@ -321,7 +326,9 @@ Node* IdealKit::memory(uint alias_idx) { MergeMemNode* mem = merged_memory(); Node* p = mem->memory_at(alias_idx); - _gvn.set_type(p, Type::MEMORY); // must be mapped + if (!gvn().is_IterGVN() || !gvn().is_IterGVN()->delay_transform()) { + _gvn.set_type(p, Type::MEMORY); // must be mapped + } return p; } @@ -462,9 +469,6 @@ const TypePtr* adr_type = TypeRawPtr::BOTTOM; uint adr_idx = C->get_alias_index(adr_type); - // Clone initial memory - MergeMemNode* cloned_mem = MergeMemNode::make(C, merged_memory()); - // Slow-path leaf call int size = slow_call_type->domain()->cnt(); CallNode *call = (CallNode*)new (C, size) CallLeafNode( slow_call_type, slow_call, leaf_name, adr_type); @@ -489,9 +493,6 @@ set_ctrl(transform( new (C, 1) ProjNode(call,TypeFunc::Control) )); - // Set the incoming clone of memory as current memory - set_all_memory(cloned_mem); - // Make memory for the call Node* mem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) ); diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/idealKit.hpp --- a/src/share/vm/opto/idealKit.hpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/idealKit.hpp Thu Jul 16 14:10:42 2009 -0700 @@ -49,7 +49,7 @@ // Example: // Node* limit = ?? // IdealVariable i(kit), j(kit); -// declares_done(); +// declarations_done(); // Node* exit = make_label(1); // 1 goto // set(j, ConI(0)); // loop(i, ConI(0), BoolTest::lt, limit); { @@ -101,10 +101,7 @@ Node* new_cvstate(); // Create a new cvstate Node* cvstate() { return _cvstate; } // current cvstate Node* copy_cvstate(); // copy current cvstate - void set_ctrl(Node* ctrl) { _cvstate->set_req(TypeFunc::Control, ctrl); } - // Should this assert this is a MergeMem??? - void set_all_memory(Node* mem){ _cvstate->set_req(TypeFunc::Memory, mem); } void set_memory(Node* mem, uint alias_idx ); void do_memory_merge(Node* merging, Node* join); void clear(Node* m); // clear a cvstate @@ -132,15 +129,17 @@ Node* memory(uint alias_idx); public: - IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false); + IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false, bool has_declarations = false); ~IdealKit() { stop(); drain_delay_transform(); } // Control Node* ctrl() { return _cvstate->in(TypeFunc::Control); } + void set_ctrl(Node* ctrl) { _cvstate->set_req(TypeFunc::Control, ctrl); } Node* top() { return C->top(); } MergeMemNode* merged_memory() { return _cvstate->in(TypeFunc::Memory)->as_MergeMem(); } + void set_all_memory(Node* mem) { _cvstate->set_req(TypeFunc::Memory, mem); } void set(IdealVariable& v, Node* rhs) { _cvstate->set_req(first_var + v.id(), rhs); } Node* value(IdealVariable& v) { return _cvstate->in(first_var + v.id()); } void dead(IdealVariable& v) { set(v, (Node*)NULL); } @@ -155,7 +154,7 @@ Node* make_label(int goto_ct); void bind(Node* lab); void goto_(Node* lab, bool bind = false); - void declares_done(); + void declarations_done(); void drain_delay_transform(); Node* IfTrue(IfNode* iff) { return transform(new (C,1) IfTrueNode(iff)); } diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/ifnode.cpp --- a/src/share/vm/opto/ifnode.cpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/ifnode.cpp Thu Jul 16 14:10:42 2009 -0700 @@ -378,7 +378,18 @@ // Force the original merge dead igvn->hash_delete(r); - r->set_req_X(0,NULL,igvn); + // First, remove region's dead users. + for (DUIterator_Last lmin, l = r->last_outs(lmin); l >= lmin;) { + Node* u = r->last_out(l); + if( u == r ) { + r->set_req(0, NULL); + } else { + assert(u->outcnt() == 0, "only dead users"); + igvn->remove_dead_node(u); + } + l -= 1; + } + igvn->remove_dead_node(r); // Now remove the bogus extra edges used to keep things alive igvn->remove_dead_node( hook ); diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/library_call.cpp Thu Jul 16 14:10:42 2009 -0700 @@ -1030,7 +1030,7 @@ const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin)); const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot); - IdealKit kit(gvn(), control(), merged_memory()); + IdealKit kit(gvn(), control(), merged_memory(), false, true); #define __ kit. Node* zero = __ ConI(0); Node* one = __ ConI(1); @@ -1042,7 +1042,7 @@ Node* targetOffset = __ ConI(targetOffset_i); Node* sourceEnd = __ SubI(__ AddI(sourceOffset, sourceCount), targetCountLess1); - IdealVariable rtn(kit), i(kit), j(kit); __ declares_done(); + IdealVariable rtn(kit), i(kit), j(kit); __ declarations_done(); Node* outer_loop = __ make_label(2 /* goto */); Node* return_ = __ make_label(1); @@ -1079,9 +1079,9 @@ __ bind(outer_loop); }__ end_loop(); __ dead(i); __ bind(return_); - __ drain_delay_transform(); - - set_control(__ ctrl()); + + // Final sync IdealKit and GraphKit. + sync_kit(kit); Node* result = __ value(rtn); #undef __ C->set_has_loops(true); @@ -2183,14 +2183,23 @@ // of it. So we need to emit code to conditionally do the proper type of // store. - IdealKit kit(gvn(), control(), merged_memory()); - kit.declares_done(); + IdealKit ideal(gvn(), control(), merged_memory()); +#define __ ideal. // QQQ who knows what probability is here?? - kit.if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); { - (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type); - } kit.else_(); { - (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile); - } kit.end_if(); + __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); { + // Sync IdealKit and graphKit. + set_all_memory( __ merged_memory()); + set_control(__ ctrl()); + Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type); + // Update IdealKit memory. + __ set_all_memory(merged_memory()); + __ set_ctrl(control()); + } __ else_(); { + __ store(__ ctrl(), adr, val, type, alias_type->index(), is_volatile); + } __ end_if(); + // Final sync IdealKit and GraphKit. + sync_kit(ideal); +#undef __ } } } diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/machnode.cpp --- a/src/share/vm/opto/machnode.cpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/machnode.cpp Thu Jul 16 14:10:42 2009 -0700 @@ -300,6 +300,12 @@ } } adr_type = t_disp->add_offset(offset); + } else if( base == NULL && offset != 0 && offset != Type::OffsetBot ) { + // Use ideal type if it is oop ptr. + const TypePtr *tp = oper->type()->isa_ptr(); + if( tp != NULL) { + adr_type = tp; + } } } diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/macro.cpp Thu Jul 16 14:10:42 2009 -0700 @@ -198,14 +198,79 @@ } // Eliminate a card mark sequence. p2x is a ConvP2XNode -void PhaseMacroExpand::eliminate_card_mark(Node *p2x) { +void PhaseMacroExpand::eliminate_card_mark(Node* p2x) { assert(p2x->Opcode() == Op_CastP2X, "ConvP2XNode required"); - Node *shift = p2x->unique_out(); - Node *addp = shift->unique_out(); - for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) { - Node *st = addp->last_out(j); - assert(st->is_Store(), "store required"); - _igvn.replace_node(st, st->in(MemNode::Memory)); + if (!UseG1GC) { + // vanilla/CMS post barrier + Node *shift = p2x->unique_out(); + Node *addp = shift->unique_out(); + for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) { + Node *st = addp->last_out(j); + assert(st->is_Store(), "store required"); + _igvn.replace_node(st, st->in(MemNode::Memory)); + } + } else { + // G1 pre/post barriers + assert(p2x->outcnt() == 2, "expects 2 users: Xor and URShift nodes"); + // It could be only one user, URShift node, in Object.clone() instrinsic + // but the new allocation is passed to arraycopy stub and it could not + // be scalar replaced. So we don't check the case. + + // Remove G1 post barrier. + + // Search for CastP2X->Xor->URShift->Cmp path which + // checks if the store done to a different from the value's region. + // And replace Cmp with #0 (false) to collapse G1 post barrier. + Node* xorx = NULL; + for (DUIterator_Fast imax, i = p2x->fast_outs(imax); i < imax; i++) { + Node* u = p2x->fast_out(i); + if (u->Opcode() == Op_XorX) { + xorx = u; + break; + } + } + assert(xorx != NULL, "missing G1 post barrier"); + Node* shift = xorx->unique_out(); + Node* cmpx = shift->unique_out(); + assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && + cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, + "missing region check in G1 post barrier"); + _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ)); + + // Remove G1 pre barrier. + + // Search "if (marking != 0)" check and set it to "false". + Node* this_region = p2x->in(0); + assert(this_region != NULL, ""); + // There is no G1 pre barrier if previous stored value is NULL + // (for example, after initialization). + if (this_region->is_Region() && this_region->req() == 3) { + int ind = 1; + if (!this_region->in(ind)->is_IfFalse()) { + ind = 2; + } + if (this_region->in(ind)->is_IfFalse()) { + Node* bol = this_region->in(ind)->in(0)->in(1); + assert(bol->is_Bool(), ""); + cmpx = bol->in(1); + if (bol->as_Bool()->_test._test == BoolTest::ne && + cmpx->is_Cmp() && cmpx->in(2) == intcon(0) && + cmpx->in(1)->is_Load()) { + Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address); + const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()); + if (adr->is_AddP() && adr->in(AddPNode::Base) == top() && + adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal && + adr->in(AddPNode::Offset) == MakeConX(marking_offset)) { + _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ)); + } + } + } + } + // Now CastP2X can be removed since it is used only on dead path + // which currently still alive until igvn optimize it. + assert(p2x->unique_out()->Opcode() == Op_URShiftX, ""); + _igvn.replace_node(p2x, top()); } } @@ -760,14 +825,11 @@ if (n->is_Store()) { _igvn.replace_node(n, n->in(MemNode::Memory)); } else { - assert( n->Opcode() == Op_CastP2X, "CastP2X required"); eliminate_card_mark(n); } k -= (oc2 - use->outcnt()); } } else { - assert( !use->is_SafePoint(), "safepoint uses must have been already elimiated"); - assert( use->Opcode() == Op_CastP2X, "CastP2X required"); eliminate_card_mark(use); } j -= (oc1 - res->outcnt()); diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/matcher.cpp Thu Jul 16 14:10:42 2009 -0700 @@ -1489,8 +1489,7 @@ #ifdef ASSERT // Verify adr type after matching memory operation const MachOper* oper = mach->memory_operand(); - if (oper != NULL && oper != (MachOper*)-1 && - mach->adr_type() != TypeRawPtr::BOTTOM) { // non-direct addressing mode + if (oper != NULL && oper != (MachOper*)-1) { // It has a unique memory operand. Find corresponding ideal mem node. Node* m = NULL; if (leaf->is_Mem()) { diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/phaseX.hpp --- a/src/share/vm/opto/phaseX.hpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/phaseX.hpp Thu Jul 16 14:10:42 2009 -0700 @@ -450,6 +450,8 @@ subsume_node(old, nn); } + bool delay_transform() const { return _delay_transform; } + void set_delay_transform(bool delay) { _delay_transform = delay; } diff -r fd50a67f97d1 -r fc4be448891f src/share/vm/opto/type.hpp --- a/src/share/vm/opto/type.hpp Wed Jul 15 13:37:35 2009 -0700 +++ b/src/share/vm/opto/type.hpp Thu Jul 16 14:10:42 2009 -0700 @@ -1216,6 +1216,8 @@ #define Op_AndX Op_AndL #define Op_AddX Op_AddL #define Op_SubX Op_SubL +#define Op_XorX Op_XorL +#define Op_URShiftX Op_URShiftL // conversions #define ConvI2X(x) ConvI2L(x) #define ConvL2X(x) (x) @@ -1258,6 +1260,8 @@ #define Op_AndX Op_AndI #define Op_AddX Op_AddI #define Op_SubX Op_SubI +#define Op_XorX Op_XorI +#define Op_URShiftX Op_URShiftI // conversions #define ConvI2X(x) (x) #define ConvL2X(x) ConvL2I(x)