# HG changeset patch # User kvn # Date 1206051104 25200 # Node ID 2a9af0b9cb1c0c23b636b0594800871f8343ca8f # Parent a8880a78d35535d03c90fcdbc40ee2e40b537e8f 6674600: (Escape Analysis) Optimize memory graph for instance's fields Summary: EA gives opportunite to do more aggressive memory optimizations. Reviewed-by: never, jrose diff -r a8880a78d355 -r 2a9af0b9cb1c src/share/vm/opto/callnode.cpp --- a/src/share/vm/opto/callnode.cpp Thu Mar 20 13:51:55 2008 -0700 +++ b/src/share/vm/opto/callnode.cpp Thu Mar 20 15:11:44 2008 -0700 @@ -625,8 +625,8 @@ } // -// Determine whether the call could modify a memory value of the -// specified address type +// Determine whether the call could modify the field of the specified +// instance at the specified offset. // bool CallNode::may_modify(const TypePtr *addr_t, PhaseTransform *phase) { const TypeOopPtr *adrInst_t = addr_t->isa_oopptr(); @@ -638,9 +638,24 @@ Compile *C = phase->C; int offset = adrInst_t->offset(); assert(offset >= 0, "should be valid offset"); - assert(addr_t->isa_instptr() || addr_t->isa_aryptr(), "only instances or arrays are expected"); + ciKlass* adr_k = adrInst_t->klass(); + assert(adr_k->is_loaded() && + adr_k->is_java_klass() && + !adr_k->is_interface(), + "only non-abstract classes are expected"); int base_idx = C->get_alias_index(adrInst_t); + int size = BytesPerLong; // If we don't know the size, assume largest. + if (adrInst_t->isa_instptr()) { + ciField* field = C->alias_type(base_idx)->field(); + if (field != NULL) { + size = field->size_in_bytes(); + } + } else { + assert(adrInst_t->isa_aryptr(), "only arrays are expected"); + size = type2aelembytes(adr_k->as_array_klass()->element_type()->basic_type()); + } + ciMethod * meth = is_CallStaticJava() ? as_CallStaticJava()->method() : NULL; BCEscapeAnalyzer *bcea = (meth != NULL) ? meth->get_bcea() : NULL; @@ -656,14 +671,19 @@ if (!arg->is_top() && (t->isa_oopptr() != NULL || t->isa_ptr() && at_ptr != NULL)) { assert(at_ptr != NULL, "expecting an OopPtr"); - // If we have found an argument matching adr_base_t, check if the field - // at the specified offset is modified. Since we don't know the size, - // assume 8. - int at_idx = C->get_alias_index(at_ptr->add_offset(offset)->isa_oopptr()); - if (base_idx == at_idx && - (bcea == NULL || - bcea->is_arg_modified(i - TypeFunc::Parms, offset, 8))) { - return true; + ciKlass* at_k = at_ptr->klass(); + if ((adrInst_t->base() == at_ptr->base()) && + at_k->is_loaded() && + at_k->is_java_klass() && + !at_k->is_interface()) { + // If we have found an argument matching addr_t, check if the field + // at the specified offset is modified. + int at_idx = C->get_alias_index(at_ptr->add_offset(offset)->isa_oopptr()); + if (base_idx == at_idx && + (bcea == NULL || + bcea->is_arg_modified(i - TypeFunc::Parms, offset, size))) { + return true; + } } } } diff -r a8880a78d355 -r 2a9af0b9cb1c src/share/vm/opto/callnode.hpp --- a/src/share/vm/opto/callnode.hpp Thu Mar 20 13:51:55 2008 -0700 +++ b/src/share/vm/opto/callnode.hpp Thu Mar 20 15:11:44 2008 -0700 @@ -419,7 +419,7 @@ uint _first_index; // First input edge index of a SafePoint node where // states of the scalarized object fields are collected. uint _n_fields; // Number of non-static fields of the scalarized object. - DEBUG_ONLY(AllocateNode* _alloc); + DEBUG_ONLY(AllocateNode* _alloc;) public: SafePointScalarObjectNode(const TypeOopPtr* tp, #ifdef ASSERT diff -r a8880a78d355 -r 2a9af0b9cb1c src/share/vm/opto/cfgnode.cpp --- a/src/share/vm/opto/cfgnode.cpp Thu Mar 20 13:51:55 2008 -0700 +++ b/src/share/vm/opto/cfgnode.cpp Thu Mar 20 15:11:44 2008 -0700 @@ -704,6 +704,61 @@ return mem; } +//------------------------split_out_instance----------------------------------- +// Split out an instance type from a bottom phi. +PhiNode* PhiNode::split_out_instance(const TypePtr* at, PhaseIterGVN *igvn) const { + assert(type() == Type::MEMORY && (adr_type() == TypePtr::BOTTOM || + adr_type() == TypeRawPtr::BOTTOM) , "bottom or raw memory required"); + + // Check if an appropriate node already exists. + Node *region = in(0); + for (DUIterator_Fast kmax, k = region->fast_outs(kmax); k < kmax; k++) { + Node* use = region->fast_out(k); + if( use->is_Phi()) { + PhiNode *phi2 = use->as_Phi(); + if (phi2->type() == Type::MEMORY && phi2->adr_type() == at) { + return phi2; + } + } + } + Compile *C = igvn->C; + Arena *a = Thread::current()->resource_area(); + Node_Array node_map = new Node_Array(a); + Node_Stack stack(a, C->unique() >> 4); + PhiNode *nphi = slice_memory(at); + igvn->register_new_node_with_optimizer( nphi ); + node_map.map(_idx, nphi); + stack.push((Node *)this, 1); + while(!stack.is_empty()) { + PhiNode *ophi = stack.node()->as_Phi(); + uint i = stack.index(); + assert(i >= 1, "not control edge"); + stack.pop(); + nphi = node_map[ophi->_idx]->as_Phi(); + for (; i < ophi->req(); i++) { + Node *in = ophi->in(i); + if (in == NULL || igvn->type(in) == Type::TOP) + continue; + Node *opt = MemNode::optimize_simple_memory_chain(in, at, igvn); + PhiNode *optphi = opt->is_Phi() ? opt->as_Phi() : NULL; + if (optphi != NULL && optphi->adr_type() == TypePtr::BOTTOM) { + opt = node_map[optphi->_idx]; + if (opt == NULL) { + stack.push(ophi, i); + nphi = optphi->slice_memory(at); + igvn->register_new_node_with_optimizer( nphi ); + node_map.map(optphi->_idx, nphi); + ophi = optphi; + i = 0; // will get incremented at top of loop + continue; + } + } + nphi->set_req(i, opt); + } + } + return nphi; +} + //------------------------verify_adr_type-------------------------------------- #ifdef ASSERT void PhiNode::verify_adr_type(VectorSet& visited, const TypePtr* at) const { @@ -1736,6 +1791,18 @@ return result; } } + // + // Other optimizations on the memory chain + // + const TypePtr* at = adr_type(); + for( uint i=1; ias_instance_type(); // Now generate allocation code + + // With escape analysis, the entire memory state is needed to be able to + // eliminate the allocation. If the allocations cannot be eliminated, this + // will be optimized to the raw slice when the allocation is expanded. + Node *mem; + if (C->do_escape_analysis()) { + mem = reset_memory(); + set_all_memory(mem); + } else { + mem = memory(Compile::AliasIdxRaw); + } + AllocateNode* alloc = new (C, AllocateNode::ParmLimit) AllocateNode(C, AllocateNode::alloc_type(), - control(), memory(Compile::AliasIdxRaw), i_o(), + control(), mem, i_o(), size, klass_node, initial_slow_test); @@ -3056,11 +3068,23 @@ } // Now generate allocation code + + // With escape analysis, the entire memory state is needed to be able to + // eliminate the allocation. If the allocations cannot be eliminated, this + // will be optimized to the raw slice when the allocation is expanded. + Node *mem; + if (C->do_escape_analysis()) { + mem = reset_memory(); + set_all_memory(mem); + } else { + mem = memory(Compile::AliasIdxRaw); + } + // Create the AllocateArrayNode and its result projections AllocateArrayNode* alloc = new (C, AllocateArrayNode::ParmLimit) AllocateArrayNode(C, AllocateArrayNode::alloc_type(), - control(), memory(Compile::AliasIdxRaw), i_o(), + control(), mem, i_o(), size, klass_node, initial_slow_test, length); diff -r a8880a78d355 -r 2a9af0b9cb1c src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Thu Mar 20 13:51:55 2008 -0700 +++ b/src/share/vm/opto/memnode.cpp Thu Mar 20 15:11:44 2008 -0700 @@ -29,6 +29,8 @@ #include "incls/_precompiled.incl" #include "incls/_memnode.cpp.incl" +static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem, const TypePtr *tp, const TypePtr *adr_check, outputStream *st); + //============================================================================= uint MemNode::size_of() const { return sizeof(*this); } @@ -87,6 +89,60 @@ #endif +Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase) { + const TypeOopPtr *tinst = t_adr->isa_oopptr(); + if (tinst == NULL || !tinst->is_instance_field()) + return mchain; // don't try to optimize non-instance types + uint instance_id = tinst->instance_id(); + Node *prev = NULL; + Node *result = mchain; + while (prev != result) { + prev = result; + // skip over a call which does not affect this memory slice + if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) { + Node *proj_in = result->in(0); + if (proj_in->is_Call()) { + CallNode *call = proj_in->as_Call(); + if (!call->may_modify(t_adr, phase)) { + result = call->in(TypeFunc::Memory); + } + } else if (proj_in->is_Initialize()) { + AllocateNode* alloc = proj_in->as_Initialize()->allocation(); + // Stop if this is the initialization for the object instance which + // which contains this memory slice, otherwise skip over it. + if (alloc != NULL && alloc->_idx != instance_id) { + result = proj_in->in(TypeFunc::Memory); + } + } else if (proj_in->is_MemBar()) { + result = proj_in->in(TypeFunc::Memory); + } + } else if (result->is_MergeMem()) { + result = step_through_mergemem(phase, result->as_MergeMem(), t_adr, NULL, tty); + } + } + return result; +} + +Node *MemNode::optimize_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase) { + const TypeOopPtr *t_oop = t_adr->isa_oopptr(); + bool is_instance = (t_oop != NULL) && t_oop->is_instance_field(); + PhaseIterGVN *igvn = phase->is_IterGVN(); + Node *result = mchain; + result = optimize_simple_memory_chain(result, t_adr, phase); + if (is_instance && igvn != NULL && result->is_Phi()) { + PhiNode *mphi = result->as_Phi(); + assert(mphi->bottom_type() == Type::MEMORY, "memory phi required"); + const TypePtr *t = mphi->adr_type(); + if (t == TypePtr::BOTTOM || t == TypeRawPtr::BOTTOM) { + // clone the Phi with our address type + result = mphi->split_out_instance(t_adr, igvn); + } else { + assert(phase->C->get_alias_index(t) == phase->C->get_alias_index(t_adr), "correct memory chain"); + } + } + return result; +} + static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem, const TypePtr *tp, const TypePtr *adr_check, outputStream *st) { uint alias_idx = phase->C->get_alias_index(tp); Node *mem = mmem; @@ -266,6 +322,8 @@ if (offset == Type::OffsetBot) return NULL; // cannot unalias unless there are precise offsets + const TypeOopPtr *addr_t = adr->bottom_type()->isa_oopptr(); + intptr_t size_in_bytes = memory_size(); Node* mem = in(MemNode::Memory); // start searching here... @@ -345,6 +403,22 @@ return mem; // let caller handle steps (c), (d) } + } else if (addr_t != NULL && addr_t->is_instance_field()) { + // Can't use optimize_simple_memory_chain() since it needs PhaseGVN. + if (mem->is_Proj() && mem->in(0)->is_Call()) { + CallNode *call = mem->in(0)->as_Call(); + if (!call->may_modify(addr_t, phase)) { + mem = call->in(TypeFunc::Memory); + continue; // (a) advance through independent call memory + } + } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) { + mem = mem->in(0)->in(TypeFunc::Memory); + continue; // (a) advance through independent MemBar memory + } else if (mem->is_MergeMem()) { + int alias_idx = phase->C->get_alias_index(adr_type()); + mem = mem->as_MergeMem()->memory_at(alias_idx); + continue; // (a) advance through independent MergeMem memory + } } // Unless there is an explicit 'continue', we must bail out here, @@ -1011,6 +1085,122 @@ } } + Node* mem = in(MemNode::Memory); + const TypePtr *addr_t = phase->type(address)->isa_ptr(); + + if (addr_t != NULL) { + // try to optimize our memory input + Node* opt_mem = MemNode::optimize_memory_chain(mem, addr_t, phase); + if (opt_mem != mem) { + set_req(MemNode::Memory, opt_mem); + return this; + } + const TypeOopPtr *t_oop = addr_t->isa_oopptr(); + if (can_reshape && opt_mem->is_Phi() && + (t_oop != NULL) && t_oop->is_instance_field()) { + assert(t_oop->offset() != Type::OffsetBot && t_oop->offset() != Type::OffsetTop, ""); + Node *region = opt_mem->in(0); + uint cnt = opt_mem->req(); + for( uint i = 1; i < cnt; i++ ) { + Node *in = opt_mem->in(i); + if( in == NULL ) { + region = NULL; // Wait stable graph + break; + } + } + if (region != NULL) { + // Check for loop invariant. + if (cnt == 3) { + for( uint i = 1; i < cnt; i++ ) { + Node *in = opt_mem->in(i); + Node* m = MemNode::optimize_memory_chain(in, addr_t, phase); + if (m == opt_mem) { + set_req(MemNode::Memory, opt_mem->in(cnt - i)); // Skip this phi. + return this; + } + } + } + // Split through Phi (see original code in loopopts.cpp). + assert(phase->C->have_alias_type(addr_t), "instance should have alias type"); + const Type* this_type = this->bottom_type(); + int this_index = phase->C->get_alias_index(addr_t); + int this_offset = addr_t->offset(); + int this_iid = addr_t->is_oopptr()->instance_id(); + int wins = 0; + PhaseIterGVN *igvn = phase->is_IterGVN(); + Node *phi = new (igvn->C, region->req()) PhiNode(region, this_type, NULL, this_iid, this_index, this_offset); + for( uint i = 1; i < region->req(); i++ ) { + Node *x; + Node* the_clone = NULL; + if( region->in(i) == phase->C->top() ) { + x = phase->C->top(); // Dead path? Use a dead data op + } else { + x = this->clone(); // Else clone up the data op + the_clone = x; // Remember for possible deletion. + // Alter data node to use pre-phi inputs + if( this->in(0) == region ) { + x->set_req( 0, region->in(i) ); + } else { + x->set_req( 0, NULL ); + } + for( uint j = 1; j < this->req(); j++ ) { + Node *in = this->in(j); + if( in->is_Phi() && in->in(0) == region ) + x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone + } + } + // Check for a 'win' on some paths + const Type *t = x->Value(igvn); + + bool singleton = t->singleton(); + + // See comments in PhaseIdealLoop::split_thru_phi(). + if( singleton && t == Type::TOP ) { + singleton &= region->is_Loop() && (i != LoopNode::EntryControl); + } + + if( singleton ) { + wins++; + x = igvn->makecon(t); + } else { + // We now call Identity to try to simplify the cloned node. + // Note that some Identity methods call phase->type(this). + // Make sure that the type array is big enough for + // our new node, even though we may throw the node away. + // (This tweaking with igvn only works because x is a new node.) + igvn->set_type(x, t); + Node *y = x->Identity(igvn); + if( y != x ) { + wins++; + x = y; + } else { + y = igvn->hash_find(x); + if( y ) { + wins++; + x = y; + } else { + // Else x is a new node we are keeping + // We do not need register_new_node_with_optimizer + // because set_type has already been called. + igvn->_worklist.push(x); + } + } + } + if (x != the_clone && the_clone != NULL) + igvn->remove_dead_node(the_clone); + phi->set_req(i, x); + } + if( wins > 0 ) { + // Record Phi + igvn->register_new_node_with_optimizer(phi); + return phi; + } else { + igvn->remove_dead_node(phi); + } + } + } + } + // Check for prior store with a different base or offset; make Load // independent. Skip through any number of them. Bail out if the stores // are in an endless dead cycle and report no progress. This is a key diff -r a8880a78d355 -r 2a9af0b9cb1c src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp Thu Mar 20 13:51:55 2008 -0700 +++ b/src/share/vm/opto/memnode.hpp Thu Mar 20 15:11:44 2008 -0700 @@ -67,6 +67,8 @@ PhaseTransform* phase); static bool adr_phi_is_loop_invariant(Node* adr_phi, Node* cast); + static Node *optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase); + static Node *optimize_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase); // This one should probably be a phase-specific function: static bool detect_dominating_control(Node* dom, Node* sub);