# HG changeset patch # User coleenp # Date 1264201613 28800 # Node ID 2718ec34c69914ad7649938707e73f5aaec18f60 # Parent 3908ad1248385c1db42bb10e95197769fbf76352# Parent 3d6016e040d634fea74ef24b51d9503bc8a2368f Merge diff -r 3908ad124838 -r 2718ec34c699 .hgtags --- a/.hgtags Wed Jan 20 11:32:41 2010 -0700 +++ b/.hgtags Fri Jan 22 15:06:53 2010 -0800 @@ -53,3 +53,4 @@ 9174bb32e934965288121f75394874eeb1fcb649 jdk7-b76 455105fc81d941482f8f8056afaa7aa0949c9300 jdk7-b77 e703499b4b51e3af756ae77c3d5e8b3058a14e4e jdk7-b78 +a5a6adfca6ecefb5894a848debabfe442ff50e25 jdk7-b79 diff -r 3908ad124838 -r 2718ec34c699 make/hotspot_version --- a/make/hotspot_version Wed Jan 20 11:32:41 2010 -0700 +++ b/make/hotspot_version Fri Jan 22 15:06:53 2010 -0800 @@ -35,7 +35,7 @@ HS_MAJOR_VER=17 HS_MINOR_VER=0 -HS_BUILD_NUMBER=06 +HS_BUILD_NUMBER=07 JDK_MAJOR_VER=1 JDK_MINOR_VER=7 diff -r 3908ad124838 -r 2718ec34c699 src/cpu/sparc/vm/interpreter_sparc.cpp --- a/src/cpu/sparc/vm/interpreter_sparc.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/cpu/sparc/vm/interpreter_sparc.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -394,6 +394,11 @@ } +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + // No special entry points that preclude compilation + return true; +} + // This method tells the deoptimizer how big an interpreted frame must be: int AbstractInterpreter::size_activation(methodOop method, int tempcount, diff -r 3908ad124838 -r 2718ec34c699 src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2862,6 +2862,9 @@ // arraycopy stubs used by compilers generate_arraycopy_stubs(); + + // Don't initialize the platform math functions since sparc + // doesn't have intrinsics for these operations. } diff -r 3908ad124838 -r 2718ec34c699 src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1999-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2030,6 +2030,54 @@ entry_checkcast_arraycopy); } + void generate_math_stubs() { + { + StubCodeMark mark(this, "StubRoutines", "log"); + StubRoutines::_intrinsic_log = (double (*)(double)) __ pc(); + + __ fld_d(Address(rsp, 4)); + __ flog(); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "log10"); + StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc(); + + __ fld_d(Address(rsp, 4)); + __ flog10(); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "sin"); + StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc(); + + __ fld_d(Address(rsp, 4)); + __ trigfunc('s'); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "cos"); + StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc(); + + __ fld_d(Address(rsp, 4)); + __ trigfunc('c'); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "tan"); + StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); + + __ fld_d(Address(rsp, 4)); + __ trigfunc('t'); + __ ret(0); + } + + // The intrinsic version of these seem to return the same value as + // the strict version. + StubRoutines::_intrinsic_exp = SharedRuntime::dexp; + StubRoutines::_intrinsic_pow = SharedRuntime::dpow; + } + public: // Information about frame layout at time of blocking runtime call. // Note that we only have to preserve callee-saved registers since @@ -2228,6 +2276,8 @@ MethodHandles::generate_method_handle_stub(_masm, ek); } } + + generate_math_stubs(); } diff -r 3908ad124838 -r 2718ec34c699 src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 2003-2009 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2003-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2731,6 +2731,79 @@ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; } + void generate_math_stubs() { + { + StubCodeMark mark(this, "StubRoutines", "log"); + StubRoutines::_intrinsic_log = (double (*)(double)) __ pc(); + + __ subq(rsp, 8); + __ movdbl(Address(rsp, 0), xmm0); + __ fld_d(Address(rsp, 0)); + __ flog(); + __ fstp_d(Address(rsp, 0)); + __ movdbl(xmm0, Address(rsp, 0)); + __ addq(rsp, 8); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "log10"); + StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc(); + + __ subq(rsp, 8); + __ movdbl(Address(rsp, 0), xmm0); + __ fld_d(Address(rsp, 0)); + __ flog10(); + __ fstp_d(Address(rsp, 0)); + __ movdbl(xmm0, Address(rsp, 0)); + __ addq(rsp, 8); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "sin"); + StubRoutines::_intrinsic_sin = (double (*)(double)) __ pc(); + + __ subq(rsp, 8); + __ movdbl(Address(rsp, 0), xmm0); + __ fld_d(Address(rsp, 0)); + __ trigfunc('s'); + __ fstp_d(Address(rsp, 0)); + __ movdbl(xmm0, Address(rsp, 0)); + __ addq(rsp, 8); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "cos"); + StubRoutines::_intrinsic_cos = (double (*)(double)) __ pc(); + + __ subq(rsp, 8); + __ movdbl(Address(rsp, 0), xmm0); + __ fld_d(Address(rsp, 0)); + __ trigfunc('c'); + __ fstp_d(Address(rsp, 0)); + __ movdbl(xmm0, Address(rsp, 0)); + __ addq(rsp, 8); + __ ret(0); + } + { + StubCodeMark mark(this, "StubRoutines", "tan"); + StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc(); + + __ subq(rsp, 8); + __ movdbl(Address(rsp, 0), xmm0); + __ fld_d(Address(rsp, 0)); + __ trigfunc('t'); + __ fstp_d(Address(rsp, 0)); + __ movdbl(xmm0, Address(rsp, 0)); + __ addq(rsp, 8); + __ ret(0); + } + + // The intrinsic version of these seem to return the same value as + // the strict version. + StubRoutines::_intrinsic_exp = SharedRuntime::dexp; + StubRoutines::_intrinsic_pow = SharedRuntime::dpow; + } + #undef __ #define __ masm-> @@ -2945,6 +3018,8 @@ MethodHandles::generate_method_handle_stub(_masm, ek); } } + + generate_math_stubs(); } public: diff -r 3908ad124838 -r 2718ec34c699 src/cpu/x86/vm/templateInterpreter_x86_32.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1431,6 +1431,23 @@ } +// These should never be compiled since the interpreter will prefer +// the compiled version to the intrinsic version. +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + switch (method_kind(m)) { + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_sqrt : + return false; + default: + return true; + } +} + // How much stack a method activation needs in words. int AbstractInterpreter::size_top_interpreter_activation(methodOop method) { diff -r 3908ad124838 -r 2718ec34c699 src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 2003-2009 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2003-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1456,6 +1456,23 @@ generate_normal_entry(synchronized); } +// These should never be compiled since the interpreter will prefer +// the compiled version to the intrinsic version. +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + switch (method_kind(m)) { + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_sqrt : + return false; + default: + return true; + } +} + // How much stack a method activation needs in words. int AbstractInterpreter::size_top_interpreter_activation(methodOop method) { const int entry_size = frame::interpreter_frame_monitor_size(); diff -r 3908ad124838 -r 2718ec34c699 src/os_cpu/linux_zero/vm/os_linux_zero.cpp --- a/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/os_cpu/linux_zero/vm/os_linux_zero.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,6 +1,6 @@ /* * Copyright 2003-2007 Sun Microsystems, Inc. All Rights Reserved. - * Copyright 2007, 2008 Red Hat, Inc. + * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -239,7 +239,21 @@ } bool os::is_allocatable(size_t bytes) { - ShouldNotCallThis(); +#ifdef _LP64 + return true; +#else + if (bytes < 2 * G) { + return true; + } + + char* addr = reserve_memory(bytes, NULL); + + if (addr != NULL) { + release_memory(addr, bytes); + } + + return addr != NULL; +#endif // _LP64 } /////////////////////////////////////////////////////////////////////////////// diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/c1/c1_LIR.hpp --- a/src/share/vm/c1/c1_LIR.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/c1/c1_LIR.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 2000-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2000-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2000,7 +2000,7 @@ typedef enum { inputMode, firstMode = inputMode, tempMode, outputMode, numModes, invalidMode = -1 } OprMode; enum { - maxNumberOfOperands = 14, + maxNumberOfOperands = 16, maxNumberOfInfos = 4 }; diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/ci/ciField.cpp --- a/src/share/vm/ci/ciField.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/ci/ciField.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -161,6 +161,18 @@ "bootstrap classes must not create & cache unshared fields"); } +static bool trust_final_non_static_fields(ciInstanceKlass* holder) { + if (holder == NULL) + return false; + if (holder->name() == ciSymbol::java_lang_System()) + // Never trust strangely unstable finals: System.out, etc. + return false; + // Even if general trusting is disabled, trust system-built closures in these packages. + if (holder->is_in_package("java/dyn") || holder->is_in_package("sun/dyn")) + return true; + return TrustFinalNonStaticFields; +} + void ciField::initialize_from(fieldDescriptor* fd) { // Get the flags, offset, and canonical holder of the field. _flags = ciFlags(fd->access_flags()); @@ -172,7 +184,7 @@ if (!this->is_static()) { // A field can be constant if it's a final static field or if it's // a final non-static field of a trusted class ({java,sun}.dyn). - if (_holder->is_in_package("java/dyn") || _holder->is_in_package("sun/dyn")) { + if (trust_final_non_static_fields(_holder)) { _is_constant = true; return; } diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1441,6 +1441,7 @@ } jint G1CollectedHeap::initialize() { + CollectedHeap::pre_initialize(); os::enable_vtime(); // Necessary to satisfy locking discipline assertions. diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -1007,6 +1007,10 @@ return true; } + virtual bool card_mark_must_follow_store() const { + return true; + } + bool is_in_young(oop obj) { HeapRegion* hr = heap_region_containing(obj); return hr != NULL && hr->is_young(); diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/g1/ptrQueue.cpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -73,7 +73,12 @@ void PtrQueue::locking_enqueue_completed_buffer(void** buf) { assert(_lock->owned_by_self(), "Required."); + + // We have to unlock _lock (which may be Shared_DirtyCardQ_lock) before + // we acquire DirtyCardQ_CBL_mon inside enqeue_complete_buffer as they + // have the same rank and we may get the "possible deadlock" message _lock->unlock(); + qset()->enqueue_complete_buffer(buf); // We must relock only because the caller will unlock, for the normal // case. @@ -140,7 +145,36 @@ // holding the lock if there is one). if (_buf != NULL) { if (_lock) { - locking_enqueue_completed_buffer(_buf); + assert(_lock->owned_by_self(), "Required."); + + // The current PtrQ may be the shared dirty card queue and + // may be being manipulated by more than one worker thread + // during a pause. Since the enqueuing of the completed + // buffer unlocks the Shared_DirtyCardQ_lock more than one + // worker thread can 'race' on reading the shared queue attributes + // (_buf and _index) and multiple threads can call into this + // routine for the same buffer. This will cause the completed + // buffer to be added to the CBL multiple times. + + // We "claim" the current buffer by caching value of _buf in + // a local and clearing the field while holding _lock. When + // _lock is released (while enqueueing the completed buffer) + // the thread that acquires _lock will skip this code, + // preventing the subsequent the multiple enqueue, and + // install a newly allocated buffer below. + + void** buf = _buf; // local pointer to completed buffer + _buf = NULL; // clear shared _buf field + + locking_enqueue_completed_buffer(buf); // enqueue completed buffer + + // While the current thread was enqueuing the buffer another thread + // may have a allocated a new buffer and inserted it into this pointer + // queue. If that happens then we just return so that the current + // thread doesn't overwrite the buffer allocated by the other thread + // and potentially losing some dirtied cards. + + if (_buf != NULL) return; } else { if (qset()->process_or_enqueue_complete_buffer(_buf)) { // Recycle the buffer. No allocation. diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp Wed Jan 20 11:32:41 2010 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -/* - * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -void PtrQueue::handle_zero_index() { - assert(0 == _index, "Precondition."); - // This thread records the full buffer and allocates a new one (while - // holding the lock if there is one). - void** buf = _buf; - _buf = qset()->allocate_buffer(); - _sz = qset()->buffer_size(); - _index = _sz; - assert(0 <= _index && _index <= _sz, "Invariant."); - if (buf != NULL) { - if (_lock) { - locking_enqueue_completed_buffer(buf); - } else { - qset()->enqueue_complete_buffer(buf); - } - } -} diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -51,6 +51,8 @@ } jint ParallelScavengeHeap::initialize() { + CollectedHeap::pre_initialize(); + // Cannot be initialized until after the flags are parsed GenerationSizer flag_parser; @@ -717,10 +719,6 @@ return young_gen()->allocate(size, true); } -void ParallelScavengeHeap::fill_all_tlabs(bool retire) { - CollectedHeap::fill_all_tlabs(retire); -} - void ParallelScavengeHeap::accumulate_statistics_all_tlabs() { CollectedHeap::accumulate_statistics_all_tlabs(); } diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -54,7 +54,6 @@ protected: static inline size_t total_invocations(); HeapWord* allocate_new_tlab(size_t size); - void fill_all_tlabs(bool retire); public: ParallelScavengeHeap() : CollectedHeap() { @@ -191,6 +190,10 @@ return true; } + virtual bool card_mark_must_follow_store() const { + return false; + } + // Return true if we don't we need a store barrier for // initializing stores to an object at this address. virtual bool can_elide_initializing_store_barrier(oop new_obj); diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -59,8 +59,18 @@ PerfDataManager::create_string_variable(SUN_GC, "lastCause", 80, GCCause::to_string(_gc_lastcause), CHECK); } + _defer_initial_card_mark = false; // strengthened by subclass in pre_initialize() below. } +void CollectedHeap::pre_initialize() { + // Used for ReduceInitialCardMarks (when COMPILER2 is used); + // otherwise remains unused. +#ifdef COMPLER2 + _defer_initial_card_mark = ReduceInitialCardMarks && (DeferInitialCardMark || card_mark_must_follow_store()); +#else + assert(_defer_initial_card_mark == false, "Who would set it?"); +#endif +} #ifndef PRODUCT void CollectedHeap::check_for_bad_heap_word_value(HeapWord* addr, size_t size) { @@ -140,12 +150,13 @@ void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) { MemRegion deferred = thread->deferred_card_mark(); if (!deferred.is_empty()) { + assert(_defer_initial_card_mark, "Otherwise should be empty"); { // Verify that the storage points to a parsable object in heap DEBUG_ONLY(oop old_obj = oop(deferred.start());) assert(is_in(old_obj), "Not in allocated heap"); assert(!can_elide_initializing_store_barrier(old_obj), - "Else should have been filtered in defer_store_barrier()"); + "Else should have been filtered in new_store_pre_barrier()"); assert(!is_in_permanent(old_obj), "Sanity: not expected"); assert(old_obj->is_oop(true), "Not an oop"); assert(old_obj->is_parsable(), "Will not be concurrently parsable"); @@ -174,9 +185,7 @@ // so long as the card-mark is completed before the next // scavenge. For all these cases, we can do a card mark // at the point at which we do a slow path allocation -// in the old gen. For uniformity, however, we end -// up using the same scheme (see below) for all three -// cases (deferring the card-mark appropriately). +// in the old gen, i.e. in this call. // (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires // in addition that the card-mark for an old gen allocated // object strictly follow any associated initializing stores. @@ -199,12 +208,13 @@ // but, like in CMS, because of the presence of concurrent refinement // (much like CMS' precleaning), must strictly follow the oop-store. // Thus, using the same protocol for maintaining the intended -// invariants turns out, serendepitously, to be the same for all -// three collectors/heap types above. +// invariants turns out, serendepitously, to be the same for both +// G1 and CMS. // -// For each future collector, this should be reexamined with -// that specific collector in mind. -oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) { +// For any future collector, this code should be reexamined with +// that specific collector in mind, and the documentation above suitably +// extended and updated. +oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) { // If a previous card-mark was deferred, flush it now. flush_deferred_store_barrier(thread); if (can_elide_initializing_store_barrier(new_obj)) { @@ -212,10 +222,17 @@ // following the flush above. assert(thread->deferred_card_mark().is_empty(), "Error"); } else { - // Remember info for the newly deferred store barrier - MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size()); - assert(!deferred.is_empty(), "Error"); - thread->set_deferred_card_mark(deferred); + MemRegion mr((HeapWord*)new_obj, new_obj->size()); + assert(!mr.is_empty(), "Error"); + if (_defer_initial_card_mark) { + // Defer the card mark + thread->set_deferred_card_mark(mr); + } else { + // Do the card mark + BarrierSet* bs = barrier_set(); + assert(bs->has_write_region_opt(), "No write_region() on BarrierSet"); + bs->write_region(mr); + } } return new_obj; } @@ -241,9 +258,9 @@ assert(Universe::heap()->is_in_reserved(start + words - 1), "not in heap"); } -void CollectedHeap::zap_filler_array(HeapWord* start, size_t words) +void CollectedHeap::zap_filler_array(HeapWord* start, size_t words, bool zap) { - if (ZapFillerObjects) { + if (ZapFillerObjects && zap) { Copy::fill_to_words(start + filler_array_hdr_size(), words - filler_array_hdr_size(), 0XDEAFBABE); } @@ -251,7 +268,7 @@ #endif // ASSERT void -CollectedHeap::fill_with_array(HeapWord* start, size_t words) +CollectedHeap::fill_with_array(HeapWord* start, size_t words, bool zap) { assert(words >= filler_array_min_size(), "too small for an array"); assert(words <= filler_array_max_size(), "too big for a single object"); @@ -262,16 +279,16 @@ // Set the length first for concurrent GC. ((arrayOop)start)->set_length((int)len); post_allocation_setup_common(Universe::intArrayKlassObj(), start, words); - DEBUG_ONLY(zap_filler_array(start, words);) + DEBUG_ONLY(zap_filler_array(start, words, zap);) } void -CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words) +CollectedHeap::fill_with_object_impl(HeapWord* start, size_t words, bool zap) { assert(words <= filler_array_max_size(), "too big for a single object"); if (words >= filler_array_min_size()) { - fill_with_array(start, words); + fill_with_array(start, words, zap); } else if (words > 0) { assert(words == min_fill_size(), "unaligned size"); post_allocation_setup_common(SystemDictionary::Object_klass(), start, @@ -279,14 +296,14 @@ } } -void CollectedHeap::fill_with_object(HeapWord* start, size_t words) +void CollectedHeap::fill_with_object(HeapWord* start, size_t words, bool zap) { DEBUG_ONLY(fill_args_check(start, words);) HandleMark hm; // Free handles before leaving. - fill_with_object_impl(start, words); + fill_with_object_impl(start, words, zap); } -void CollectedHeap::fill_with_objects(HeapWord* start, size_t words) +void CollectedHeap::fill_with_objects(HeapWord* start, size_t words, bool zap) { DEBUG_ONLY(fill_args_check(start, words);) HandleMark hm; // Free handles before leaving. @@ -299,13 +316,13 @@ const size_t max = filler_array_max_size(); while (words > max) { const size_t cur = words - max >= min ? max : max - min; - fill_with_array(start, cur); + fill_with_array(start, cur, zap); start += cur; words -= cur; } #endif - fill_with_object_impl(start, words); + fill_with_object_impl(start, words, zap); } HeapWord* CollectedHeap::allocate_new_tlab(size_t size) { @@ -313,22 +330,6 @@ return NULL; } -void CollectedHeap::fill_all_tlabs(bool retire) { - assert(UseTLAB, "should not reach here"); - // See note in ensure_parsability() below. - assert(SafepointSynchronize::is_at_safepoint() || - !is_init_completed(), - "should only fill tlabs at safepoint"); - // The main thread starts allocating via a TLAB even before it - // has added itself to the threads list at vm boot-up. - assert(Threads::first() != NULL, - "Attempt to fill tlabs before main thread has been added" - " to threads list is doomed to failure!"); - for(JavaThread *thread = Threads::first(); thread; thread = thread->next()) { - thread->tlab().make_parsable(retire); - } -} - void CollectedHeap::ensure_parsability(bool retire_tlabs) { // The second disjunct in the assertion below makes a concession // for the start-up verification done while the VM is being @@ -343,8 +344,24 @@ "Should only be called at a safepoint or at start-up" " otherwise concurrent mutator activity may make heap " " unparsable again"); - if (UseTLAB) { - fill_all_tlabs(retire_tlabs); + const bool use_tlab = UseTLAB; + const bool deferred = _defer_initial_card_mark; + // The main thread starts allocating via a TLAB even before it + // has added itself to the threads list at vm boot-up. + assert(!use_tlab || Threads::first() != NULL, + "Attempt to fill tlabs before main thread has been added" + " to threads list is doomed to failure!"); + for (JavaThread *thread = Threads::first(); thread; thread = thread->next()) { + if (use_tlab) thread->tlab().make_parsable(retire_tlabs); +#ifdef COMPILER2 + // The deferred store barriers must all have been flushed to the + // card-table (or other remembered set structure) before GC starts + // processing the card-table (or other remembered set). + if (deferred) flush_deferred_store_barrier(thread); +#else + assert(!deferred, "Should be false"); + assert(thread->deferred_card_mark().is_empty(), "Should be empty"); +#endif } } diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/gc_interface/collectedHeap.hpp --- a/src/share/vm/gc_interface/collectedHeap.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -51,6 +51,9 @@ // Used for filler objects (static, but initialized in ctor). static size_t _filler_array_max_size; + // Used in support of ReduceInitialCardMarks; only consulted if COMPILER2 is being used + bool _defer_initial_card_mark; + protected: MemRegion _reserved; BarrierSet* _barrier_set; @@ -70,13 +73,16 @@ // Constructor CollectedHeap(); + // Do common initializations that must follow instance construction, + // for example, those needing virtual calls. + // This code could perhaps be moved into initialize() but would + // be slightly more awkward because we want the latter to be a + // pure virtual. + void pre_initialize(); + // Create a new tlab virtual HeapWord* allocate_new_tlab(size_t size); - // Fix up tlabs to make the heap well-formed again, - // optionally retiring the tlabs. - virtual void fill_all_tlabs(bool retire); - // Accumulate statistics on all tlabs. virtual void accumulate_statistics_all_tlabs(); @@ -127,14 +133,14 @@ static inline size_t filler_array_max_size(); DEBUG_ONLY(static void fill_args_check(HeapWord* start, size_t words);) - DEBUG_ONLY(static void zap_filler_array(HeapWord* start, size_t words);) + DEBUG_ONLY(static void zap_filler_array(HeapWord* start, size_t words, bool zap = true);) // Fill with a single array; caller must ensure filler_array_min_size() <= // words <= filler_array_max_size(). - static inline void fill_with_array(HeapWord* start, size_t words); + static inline void fill_with_array(HeapWord* start, size_t words, bool zap = true); // Fill with a single object (either an int array or a java.lang.Object). - static inline void fill_with_object_impl(HeapWord* start, size_t words); + static inline void fill_with_object_impl(HeapWord* start, size_t words, bool zap = true); // Verification functions virtual void check_for_bad_heap_word_value(HeapWord* addr, size_t size) @@ -338,14 +344,14 @@ return size_t(align_object_size(oopDesc::header_size())); } - static void fill_with_objects(HeapWord* start, size_t words); + static void fill_with_objects(HeapWord* start, size_t words, bool zap = true); - static void fill_with_object(HeapWord* start, size_t words); - static void fill_with_object(MemRegion region) { - fill_with_object(region.start(), region.word_size()); + static void fill_with_object(HeapWord* start, size_t words, bool zap = true); + static void fill_with_object(MemRegion region, bool zap = true) { + fill_with_object(region.start(), region.word_size(), zap); } - static void fill_with_object(HeapWord* start, HeapWord* end) { - fill_with_object(start, pointer_delta(end, start)); + static void fill_with_object(HeapWord* start, HeapWord* end, bool zap = true) { + fill_with_object(start, pointer_delta(end, start), zap); } // Some heaps may offer a contiguous region for shared non-blocking @@ -431,14 +437,25 @@ // promises to call this function on such a slow-path-allocated // object before performing initializations that have elided // store barriers. Returns new_obj, or maybe a safer copy thereof. - virtual oop defer_store_barrier(JavaThread* thread, oop new_obj); + virtual oop new_store_pre_barrier(JavaThread* thread, oop new_obj); // Answers whether an initializing store to a new object currently - // allocated at the given address doesn't need a (deferred) store + // allocated at the given address doesn't need a store // barrier. Returns "true" if it doesn't need an initializing // store barrier; answers "false" if it does. virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0; + // If a compiler is eliding store barriers for TLAB-allocated objects, + // we will be informed of a slow-path allocation by a call + // to new_store_pre_barrier() above. Such a call precedes the + // initialization of the object itself, and no post-store-barriers will + // be issued. Some heap types require that the barrier strictly follows + // the initializing stores. (This is currently implemented by deferring the + // barrier until the next slow-path allocation or gc-related safepoint.) + // This interface answers whether a particular heap type needs the card + // mark to be thus strictly sequenced after the stores. + virtual bool card_mark_must_follow_store() const = 0; + // If the CollectedHeap was asked to defer a store barrier above, // this informs it to flush such a deferred store barrier to the // remembered set. diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/includeDB_compiler2 --- a/src/share/vm/includeDB_compiler2 Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/includeDB_compiler2 Fri Jan 22 15:06:53 2010 -0800 @@ -601,6 +601,7 @@ loopTransform.cpp addnode.hpp loopTransform.cpp allocation.inline.hpp +loopTransform.cpp callnode.hpp loopTransform.cpp connode.hpp loopTransform.cpp compileLog.hpp loopTransform.cpp divnode.hpp diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/interpreter/abstractInterpreter.hpp --- a/src/share/vm/interpreter/abstractInterpreter.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/interpreter/abstractInterpreter.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -109,6 +109,8 @@ static void print_method_kind(MethodKind kind) PRODUCT_RETURN; + static bool can_be_compiled(methodHandle m); + // Runtime support // length = invoke bytecode length (to advance to next bytecode) diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/memory/genCollectedHeap.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -51,6 +51,8 @@ } jint GenCollectedHeap::initialize() { + CollectedHeap::pre_initialize(); + int i; _n_gens = gen_policy()->number_of_generations(); @@ -129,6 +131,7 @@ _rem_set = collector_policy()->create_rem_set(_reserved, n_covered_regions); set_barrier_set(rem_set()->bs()); + _gch = this; for (i = 0; i < _n_gens; i++) { diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/memory/genCollectedHeap.hpp --- a/src/share/vm/memory/genCollectedHeap.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/memory/genCollectedHeap.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -260,6 +260,10 @@ return true; } + virtual bool card_mark_must_follow_store() const { + return UseConcMarkSweepGC; + } + // We don't need barriers for stores to objects in the // young gen and, a fortiori, for initializing stores to // objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS} diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/memory/threadLocalAllocBuffer.cpp --- a/src/share/vm/memory/threadLocalAllocBuffer.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/memory/threadLocalAllocBuffer.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -100,7 +100,7 @@ void ThreadLocalAllocBuffer::make_parsable(bool retire) { if (end() != NULL) { invariants(); - CollectedHeap::fill_with_object(top(), hard_end()); + CollectedHeap::fill_with_object(top(), hard_end(), retire); if (retire || ZeroTLAB) { // "Reset" the TLAB set_start(NULL); diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/memory/threadLocalAllocBuffer.inline.hpp --- a/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/memory/threadLocalAllocBuffer.inline.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,8 +27,13 @@ HeapWord* obj = top(); if (pointer_delta(end(), obj) >= size) { // successful thread-local allocation - - DEBUG_ONLY(Copy::fill_to_words(obj, size, badHeapWordVal)); +#ifdef ASSERT + // Skip mangling the space corresponding to the object header to + // ensure that the returned space is not considered parsable by + // any concurrent GC thread. + size_t hdr_size = CollectedHeap::min_fill_size(); + Copy::fill_to_words(obj + hdr_size, size - hdr_size, badHeapWordVal); +#endif // ASSERT // This addition is safe because we know that top is // at least size below end, so the add can't wrap. set_top(obj + size); diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -154,6 +154,12 @@ notproduct(bool, TraceProfileTripCount, false, \ "Trace profile loop trip count information") \ \ + product(bool, UseLoopPredicate, true, \ + "Generate a predicate to select fast/slow loop versions") \ + \ + develop(bool, TraceLoopPredicate, false, \ + "Trace generation of loop predicates") \ + \ develop(bool, OptoCoalesce, true, \ "Use Conservative Copy Coalescing in the Register Allocator") \ \ diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/compile.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -932,6 +932,7 @@ _intrinsics = NULL; _macro_nodes = new GrowableArray(comp_arena(), 8, 0, NULL); + _predicate_opaqs = new GrowableArray(comp_arena(), 8, 0, NULL); register_library_intrinsics(); } @@ -1553,6 +1554,19 @@ } } +//---------------------cleanup_loop_predicates----------------------- +// Remove the opaque nodes that protect the predicates so that all unused +// checks and uncommon_traps will be eliminated from the ideal graph +void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) { + if (predicate_count()==0) return; + for (int i = predicate_count(); i > 0; i--) { + Node * n = predicate_opaque1_node(i-1); + assert(n->Opcode() == Op_Opaque1, "must be"); + igvn.replace_node(n, n->in(1)); + } + assert(predicate_count()==0, "should be clean!"); + igvn.optimize(); +} //------------------------------Optimize--------------------------------------- // Given a graph, optimize it. @@ -1594,7 +1608,7 @@ if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) { { TracePhase t2("idealLoop", &_t_idealLoop, true); - PhaseIdealLoop ideal_loop( igvn, true ); + PhaseIdealLoop ideal_loop( igvn, true, UseLoopPredicate); loop_opts_cnt--; if (major_progress()) print_method("PhaseIdealLoop 1", 2); if (failing()) return; @@ -1602,7 +1616,7 @@ // Loop opts pass if partial peeling occurred in previous pass if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) { TracePhase t3("idealLoop", &_t_idealLoop, true); - PhaseIdealLoop ideal_loop( igvn, false ); + PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate); loop_opts_cnt--; if (major_progress()) print_method("PhaseIdealLoop 2", 2); if (failing()) return; @@ -1610,7 +1624,7 @@ // Loop opts pass for loop-unrolling before CCP if(major_progress() && (loop_opts_cnt > 0)) { TracePhase t4("idealLoop", &_t_idealLoop, true); - PhaseIdealLoop ideal_loop( igvn, false ); + PhaseIdealLoop ideal_loop( igvn, false, UseLoopPredicate); loop_opts_cnt--; if (major_progress()) print_method("PhaseIdealLoop 3", 2); } @@ -1648,13 +1662,21 @@ // peeling, unrolling, etc. if(loop_opts_cnt > 0) { debug_only( int cnt = 0; ); + bool loop_predication = UseLoopPredicate; while(major_progress() && (loop_opts_cnt > 0)) { TracePhase t2("idealLoop", &_t_idealLoop, true); assert( cnt++ < 40, "infinite cycle in loop optimization" ); - PhaseIdealLoop ideal_loop( igvn, true ); + PhaseIdealLoop ideal_loop( igvn, true, loop_predication); loop_opts_cnt--; if (major_progress()) print_method("PhaseIdealLoop iterations", 2); if (failing()) return; + // Perform loop predication optimization during first iteration after CCP. + // After that switch it off and cleanup unused loop predicates. + if (loop_predication) { + loop_predication = false; + cleanup_loop_predicates(igvn); + if (failing()) return; + } } } diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/compile.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -38,6 +38,7 @@ class OptoReg; class PhaseCFG; class PhaseGVN; +class PhaseIterGVN; class PhaseRegAlloc; class PhaseCCP; class PhaseCCP_DCE; @@ -172,6 +173,7 @@ const char* _failure_reason; // for record_failure/failing pattern GrowableArray* _intrinsics; // List of intrinsics. GrowableArray* _macro_nodes; // List of nodes which need to be expanded before matching. + GrowableArray* _predicate_opaqs; // List of Opaque1 nodes for the loop predicates. ConnectionGraph* _congraph; #ifndef PRODUCT IdealGraphPrinter* _printer; @@ -351,7 +353,9 @@ } int macro_count() { return _macro_nodes->length(); } + int predicate_count() { return _predicate_opaqs->length();} Node* macro_node(int idx) { return _macro_nodes->at(idx); } + Node* predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);} ConnectionGraph* congraph() { return _congraph;} void add_macro_node(Node * n) { //assert(n->is_macro(), "must be a macro node"); @@ -363,7 +367,19 @@ // that the node is in the array before attempting to remove it if (_macro_nodes->contains(n)) _macro_nodes->remove(n); + // remove from _predicate_opaqs list also if it is there + if (predicate_count() > 0 && _predicate_opaqs->contains(n)){ + _predicate_opaqs->remove(n); + } } + void add_predicate_opaq(Node * n) { + assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1"); + assert(_macro_nodes->contains(n), "should have already been in macro list"); + _predicate_opaqs->append(n); + } + // remove the opaque nodes that protect the predicates so that the unused checks and + // uncommon traps will be eliminated from the graph. + void cleanup_loop_predicates(PhaseIterGVN &igvn); // Compilation environment. Arena* comp_arena() { return &_comp_arena; } diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/graphKit.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -3259,9 +3259,10 @@ if (use_ReduceInitialCardMarks() && obj == just_allocated_object(control())) { // We can skip marks on a freshly-allocated object in Eden. - // Keep this code in sync with maybe_defer_card_mark() in runtime.cpp. - // That routine informs GC to take appropriate compensating steps - // so as to make this card-mark elision safe. + // Keep this code in sync with new_store_pre_barrier() in runtime.cpp. + // That routine informs GC to take appropriate compensating steps, + // upon a slow-path allocation, so as to make this card-mark + // elision safe. return; } diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/loopTransform.cpp --- a/src/share/vm/opto/loopTransform.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/loopTransform.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -549,6 +549,10 @@ // Comparing trip+off vs limit Node *bol = iff->in(1); if( bol->req() != 2 ) continue; // dead constant test + if (!bol->is_Bool()) { + assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only"); + continue; + } Node *cmp = bol->in(1); Node *rc_exp = cmp->in(1); @@ -875,7 +879,7 @@ //------------------------------is_invariant----------------------------- // Return true if n is invariant bool IdealLoopTree::is_invariant(Node* n) const { - Node *n_c = _phase->get_ctrl(n); + Node *n_c = _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; if (n_c->is_top()) return false; return !is_member(_phase->get_loop(n_c)); } @@ -1594,7 +1598,7 @@ bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) { // Check and remove empty loops (spam micro-benchmarks) if( policy_do_remove_empty_loop(phase) ) - return true; // Here we removed an empty loop + return true; // Here we removed an empty loop bool should_peel = policy_peeling(phase); // Should we peel? @@ -1688,8 +1692,8 @@ // an even number of trips). If we are peeling, we might enable some RCE // and we'd rather unroll the post-RCE'd loop SO... do not unroll if // peeling. - if( should_unroll && !should_peel ) - phase->do_unroll(this,old_new, true); + if( should_unroll && !should_peel ) + phase->do_unroll(this,old_new, true); // Adjust the pre-loop limits to align the main body // iterations. @@ -1731,9 +1735,9 @@ _allow_optimizations && !tail()->is_top() ) { // Also ignore the occasional dead backedge if (!_has_call) { - if (!iteration_split_impl( phase, old_new )) { - return false; - } + if (!iteration_split_impl( phase, old_new )) { + return false; + } } else if (policy_unswitching(phase)) { phase->do_unswitching(this, old_new); } @@ -1746,3 +1750,576 @@ return false; return true; } + +//-------------------------------is_uncommon_trap_proj---------------------------- +// Return true if proj is the form of "proj->[region->..]call_uct" +bool PhaseIdealLoop::is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate) { + int path_limit = 10; + assert(proj, "invalid argument"); + Node* out = proj; + for (int ct = 0; ct < path_limit; ct++) { + out = out->unique_ctrl_out(); + if (out == NULL || out->is_Root() || out->is_Start()) + return false; + if (out->is_CallStaticJava()) { + int req = out->as_CallStaticJava()->uncommon_trap_request(); + if (req != 0) { + Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(req); + if (!must_reason_predicate || reason == Deoptimization::Reason_predicate){ + return true; + } + } + return false; // don't do further after call + } + } + return false; +} + +//-------------------------------is_uncommon_trap_if_pattern------------------------- +// Return true for "if(test)-> proj -> ... +// | +// V +// other_proj->[region->..]call_uct" +// +// "must_reason_predicate" means the uct reason must be Reason_predicate +bool PhaseIdealLoop::is_uncommon_trap_if_pattern(ProjNode *proj, bool must_reason_predicate) { + Node *in0 = proj->in(0); + if (!in0->is_If()) return false; + IfNode* iff = in0->as_If(); + + // we need "If(Conv2B(Opaque1(...)))" pattern for must_reason_predicate + if (must_reason_predicate) { + if (iff->in(1)->Opcode() != Op_Conv2B || + iff->in(1)->in(1)->Opcode() != Op_Opaque1) { + return false; + } + } + + ProjNode* other_proj = iff->proj_out(1-proj->_con)->as_Proj(); + return is_uncommon_trap_proj(other_proj, must_reason_predicate); +} + +//------------------------------create_new_if_for_predicate------------------------ +// create a new if above the uct_if_pattern for the predicate to be promoted. +// +// before after +// ---------- ---------- +// ctrl ctrl +// | | +// | | +// v v +// iff new_iff +// / \ / \ +// / \ / \ +// v v v v +// uncommon_proj cont_proj if_uct if_cont +// \ | | | | +// \ | | | | +// v v v | v +// rgn loop | iff +// | | / \ +// | | / \ +// v | v v +// uncommon_trap | uncommon_proj cont_proj +// \ \ | | +// \ \ | | +// v v v v +// rgn loop +// | +// | +// v +// uncommon_trap +// +// +// We will create a region to guard the uct call if there is no one there. +// The true projecttion (if_cont) of the new_iff is returned. +ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj) { + assert(is_uncommon_trap_if_pattern(cont_proj, true), "must be a uct if pattern!"); + IfNode* iff = cont_proj->in(0)->as_If(); + + ProjNode *uncommon_proj = iff->proj_out(1 - cont_proj->_con); + Node *rgn = uncommon_proj->unique_ctrl_out(); + assert(rgn->is_Region() || rgn->is_Call(), "must be a region or call uct"); + + if (!rgn->is_Region()) { // create a region to guard the call + assert(rgn->is_Call(), "must be call uct"); + CallNode* call = rgn->as_Call(); + rgn = new (C, 1) RegionNode(1); + _igvn.set_type(rgn, rgn->bottom_type()); + rgn->add_req(uncommon_proj); + set_idom(rgn, idom(uncommon_proj), dom_depth(uncommon_proj)+1); + _igvn.hash_delete(call); + call->set_req(0, rgn); + } + + // Create new_iff + uint iffdd = dom_depth(iff); + IdealLoopTree* lp = get_loop(iff); + IfNode *new_iff = new (C, 2) IfNode(iff->in(0), NULL, iff->_prob, iff->_fcnt); + register_node(new_iff, lp, idom(iff), iffdd); + Node *if_cont = new (C, 1) IfTrueNode(new_iff); + Node *if_uct = new (C, 1) IfFalseNode(new_iff); + if (cont_proj->is_IfFalse()) { + // Swap + Node* tmp = if_uct; if_uct = if_cont; if_cont = tmp; + } + register_node(if_cont, lp, new_iff, iffdd); + register_node(if_uct, get_loop(rgn), new_iff, iffdd); + + // if_cont to iff + _igvn.hash_delete(iff); + iff->set_req(0, if_cont); + set_idom(iff, if_cont, dom_depth(iff)); + + // if_uct to rgn + _igvn.hash_delete(rgn); + rgn->add_req(if_uct); + Node* ridom = idom(rgn); + Node* nrdom = dom_lca(ridom, new_iff); + set_idom(rgn, nrdom, dom_depth(rgn)); + + // rgn must have no phis + assert(!rgn->as_Region()->has_phi(), "region must have no phis"); + + return if_cont->as_Proj(); +} + +//------------------------------find_predicate_insertion_point-------------------------- +// Find a good location to insert a predicate +ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c) { + if (start_c == C->root() || !start_c->is_Proj()) + return NULL; + if (is_uncommon_trap_if_pattern(start_c->as_Proj(), true/*Reason_Predicate*/)) { + return start_c->as_Proj(); + } + return NULL; +} + +//------------------------------Invariance----------------------------------- +// Helper class for loop_predication_impl to compute invariance on the fly and +// clone invariants. +class Invariance : public StackObj { + VectorSet _visited, _invariant; + Node_Stack _stack; + VectorSet _clone_visited; + Node_List _old_new; // map of old to new (clone) + IdealLoopTree* _lpt; + PhaseIdealLoop* _phase; + + // Helper function to set up the invariance for invariance computation + // If n is a known invariant, set up directly. Otherwise, look up the + // the possibility to push n onto the stack for further processing. + void visit(Node* use, Node* n) { + if (_lpt->is_invariant(n)) { // known invariant + _invariant.set(n->_idx); + } else if (!n->is_CFG()) { + Node *n_ctrl = _phase->ctrl_or_self(n); + Node *u_ctrl = _phase->ctrl_or_self(use); // self if use is a CFG + if (_phase->is_dominator(n_ctrl, u_ctrl)) { + _stack.push(n, n->in(0) == NULL ? 1 : 0); + } + } + } + + // Compute invariance for "the_node" and (possibly) all its inputs recursively + // on the fly + void compute_invariance(Node* n) { + assert(_visited.test(n->_idx), "must be"); + visit(n, n); + while (_stack.is_nonempty()) { + Node* n = _stack.node(); + uint idx = _stack.index(); + if (idx == n->req()) { // all inputs are processed + _stack.pop(); + // n is invariant if it's inputs are all invariant + bool all_inputs_invariant = true; + for (uint i = 0; i < n->req(); i++) { + Node* in = n->in(i); + if (in == NULL) continue; + assert(_visited.test(in->_idx), "must have visited input"); + if (!_invariant.test(in->_idx)) { // bad guy + all_inputs_invariant = false; + break; + } + } + if (all_inputs_invariant) { + _invariant.set(n->_idx); // I am a invariant too + } + } else { // process next input + _stack.set_index(idx + 1); + Node* m = n->in(idx); + if (m != NULL && !_visited.test_set(m->_idx)) { + visit(n, m); + } + } + } + } + + // Helper function to set up _old_new map for clone_nodes. + // If n is a known invariant, set up directly ("clone" of n == n). + // Otherwise, push n onto the stack for real cloning. + void clone_visit(Node* n) { + assert(_invariant.test(n->_idx), "must be invariant"); + if (_lpt->is_invariant(n)) { // known invariant + _old_new.map(n->_idx, n); + } else{ // to be cloned + assert (!n->is_CFG(), "should not see CFG here"); + _stack.push(n, n->in(0) == NULL ? 1 : 0); + } + } + + // Clone "n" and (possibly) all its inputs recursively + void clone_nodes(Node* n, Node* ctrl) { + clone_visit(n); + while (_stack.is_nonempty()) { + Node* n = _stack.node(); + uint idx = _stack.index(); + if (idx == n->req()) { // all inputs processed, clone n! + _stack.pop(); + // clone invariant node + Node* n_cl = n->clone(); + _old_new.map(n->_idx, n_cl); + _phase->register_new_node(n_cl, ctrl); + for (uint i = 0; i < n->req(); i++) { + Node* in = n_cl->in(i); + if (in == NULL) continue; + n_cl->set_req(i, _old_new[in->_idx]); + } + } else { // process next input + _stack.set_index(idx + 1); + Node* m = n->in(idx); + if (m != NULL && !_clone_visited.test_set(m->_idx)) { + clone_visit(m); // visit the input + } + } + } + } + + public: + Invariance(Arena* area, IdealLoopTree* lpt) : + _lpt(lpt), _phase(lpt->_phase), + _visited(area), _invariant(area), _stack(area, 10 /* guess */), + _clone_visited(area), _old_new(area) + {} + + // Map old to n for invariance computation and clone + void map_ctrl(Node* old, Node* n) { + assert(old->is_CFG() && n->is_CFG(), "must be"); + _old_new.map(old->_idx, n); // "clone" of old is n + _invariant.set(old->_idx); // old is invariant + _clone_visited.set(old->_idx); + } + + // Driver function to compute invariance + bool is_invariant(Node* n) { + if (!_visited.test_set(n->_idx)) + compute_invariance(n); + return (_invariant.test(n->_idx) != 0); + } + + // Driver function to clone invariant + Node* clone(Node* n, Node* ctrl) { + assert(ctrl->is_CFG(), "must be"); + assert(_invariant.test(n->_idx), "must be an invariant"); + if (!_clone_visited.test(n->_idx)) + clone_nodes(n, ctrl); + return _old_new[n->_idx]; + } +}; + +//------------------------------is_range_check_if ----------------------------------- +// Returns true if the predicate of iff is in "scale*iv + offset u< load_range(ptr)" format +// Note: this function is particularly designed for loop predication. We require load_range +// and offset to be loop invariant computed on the fly by "invar" +bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const { + if (!is_loop_exit(iff)) { + return false; + } + if (!iff->in(1)->is_Bool()) { + return false; + } + const BoolNode *bol = iff->in(1)->as_Bool(); + if (bol->_test._test != BoolTest::lt) { + return false; + } + if (!bol->in(1)->is_Cmp()) { + return false; + } + const CmpNode *cmp = bol->in(1)->as_Cmp(); + if (cmp->Opcode() != Op_CmpU ) { + return false; + } + if (cmp->in(2)->Opcode() != Op_LoadRange) { + return false; + } + LoadRangeNode* lr = (LoadRangeNode*)cmp->in(2); + if (!invar.is_invariant(lr)) { // loadRange must be invariant + return false; + } + Node *iv = _head->as_CountedLoop()->phi(); + int scale = 0; + Node *offset = NULL; + if (!phase->is_scaled_iv_plus_offset(cmp->in(1), iv, &scale, &offset)) { + return false; + } + if(offset && !invar.is_invariant(offset)) { // offset must be invariant + return false; + } + return true; +} + +//------------------------------rc_predicate----------------------------------- +// Create a range check predicate +// +// for (i = init; i < limit; i += stride) { +// a[scale*i+offset] +// } +// +// Compute max(scale*i + offset) for init <= i < limit and build the predicate +// as "max(scale*i + offset) u< a.length". +// +// There are two cases for max(scale*i + offset): +// (1) stride*scale > 0 +// max(scale*i + offset) = scale*(limit-stride) + offset +// (2) stride*scale < 0 +// max(scale*i + offset) = scale*init + offset +BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl, + int scale, Node* offset, + Node* init, Node* limit, Node* stride, + Node* range) { + Node* max_idx_expr = init; + int stride_con = stride->get_int(); + if ((stride_con > 0) == (scale > 0)) { + max_idx_expr = new (C, 3) SubINode(limit, stride); + register_new_node(max_idx_expr, ctrl); + } + + if (scale != 1) { + ConNode* con_scale = _igvn.intcon(scale); + max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale); + register_new_node(max_idx_expr, ctrl); + } + + if (offset && (!offset->is_Con() || offset->get_int() != 0)){ + max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset); + register_new_node(max_idx_expr, ctrl); + } + + CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range); + register_new_node(cmp, ctrl); + BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt); + register_new_node(bol, ctrl); + return bol; +} + +//------------------------------ loop_predication_impl-------------------------- +// Insert loop predicates for null checks and range checks +bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) { + if (!UseLoopPredicate) return false; + + // Too many traps seen? + bool tmt = C->too_many_traps(C->method(), 0, Deoptimization::Reason_predicate); + int tc = C->trap_count(Deoptimization::Reason_predicate); + if (tmt || tc > 0) { + if (TraceLoopPredicate) { + tty->print_cr("too many predicate traps: %d", tc); + C->method()->print(); // which method has too many predicate traps + tty->print_cr(""); + } + return false; + } + + CountedLoopNode *cl = NULL; + if (loop->_head->is_CountedLoop()) { + cl = loop->_head->as_CountedLoop(); + // do nothing for iteration-splitted loops + if(!cl->is_normal_loop()) return false; + } + + LoopNode *lpn = loop->_head->as_Loop(); + Node* entry = lpn->in(LoopNode::EntryControl); + + ProjNode *predicate_proj = find_predicate_insertion_point(entry); + if (!predicate_proj){ +#ifndef PRODUCT + if (TraceLoopPredicate) { + tty->print("missing predicate:"); + loop->dump_head(); + } +#endif + return false; + } + + ConNode* zero = _igvn.intcon(0); + set_ctrl(zero, C->root()); + Node *cond_false = new (C, 2) Conv2BNode(zero); + register_new_node(cond_false, C->root()); + ConNode* one = _igvn.intcon(1); + set_ctrl(one, C->root()); + Node *cond_true = new (C, 2) Conv2BNode(one); + register_new_node(cond_true, C->root()); + + ResourceArea *area = Thread::current()->resource_area(); + Invariance invar(area, loop); + + // Create list of if-projs such that a newer proj dominates all older + // projs in the list, and they all dominate loop->tail() + Node_List if_proj_list(area); + LoopNode *head = loop->_head->as_Loop(); + Node *current_proj = loop->tail(); //start from tail + while ( current_proj != head ) { + if (loop == get_loop(current_proj) && // still in the loop ? + current_proj->is_Proj() && // is a projection ? + current_proj->in(0)->Opcode() == Op_If) { // is a if projection ? + if_proj_list.push(current_proj); + } + current_proj = idom(current_proj); + } + + bool hoisted = false; // true if at least one proj is promoted + while (if_proj_list.size() > 0) { + // Following are changed to nonnull when a predicate can be hoisted + ProjNode* new_predicate_proj = NULL; + BoolNode* new_predicate_bol = NULL; + + ProjNode* proj = if_proj_list.pop()->as_Proj(); + IfNode* iff = proj->in(0)->as_If(); + + if (!is_uncommon_trap_if_pattern(proj)) { + if (loop->is_loop_exit(iff)) { + // stop processing the remaining projs in the list because the execution of them + // depends on the condition of "iff" (iff->in(1)). + break; + } else { + // Both arms are inside the loop. There are two cases: + // (1) there is one backward branch. In this case, any remaining proj + // in the if_proj list post-dominates "iff". So, the condition of "iff" + // does not determine the execution the remining projs directly, and we + // can safely continue. + // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj" + // does not dominate loop->tail(), so it can not be in the if_proj list. + continue; + } + } + + Node* test = iff->in(1); + if (!test->is_Bool()){ //Conv2B, ... + continue; + } + BoolNode* bol = test->as_Bool(); + if (invar.is_invariant(bol)) { + // Invariant test + new_predicate_proj = create_new_if_for_predicate(predicate_proj); + Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0); + new_predicate_bol = invar.clone(bol, ctrl)->as_Bool(); + if (TraceLoopPredicate) tty->print("invariant"); + } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) { + // Range check (only for counted loops) + new_predicate_proj = create_new_if_for_predicate(predicate_proj); + Node *ctrl = new_predicate_proj->in(0)->as_If()->in(0); + const Node* cmp = bol->in(1)->as_Cmp(); + Node* idx = cmp->in(1); + assert(!invar.is_invariant(idx), "index is variant"); + assert(cmp->in(2)->Opcode() == Op_LoadRange, "must be"); + LoadRangeNode* ld_rng = (LoadRangeNode*)cmp->in(2); // LoadRangeNode + assert(invar.is_invariant(ld_rng), "load range must be invariant"); + ld_rng = (LoadRangeNode*)invar.clone(ld_rng, ctrl); + int scale = 1; + Node* offset = zero; + bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset); + assert(ok, "must be index expression"); + if (offset && offset != zero) { + assert(invar.is_invariant(offset), "offset must be loop invariant"); + offset = invar.clone(offset, ctrl); + } + Node* init = cl->init_trip(); + Node* limit = cl->limit(); + Node* stride = cl->stride(); + new_predicate_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng); + if (TraceLoopPredicate) tty->print("range check"); + } + + if (new_predicate_proj == NULL) { + // The other proj of the "iff" is a uncommon trap projection, and we can assume + // the other proj will not be executed ("executed" means uct raised). + continue; + } else { + // Success - attach condition (new_predicate_bol) to predicate if + invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate + IfNode* new_iff = new_predicate_proj->in(0)->as_If(); + + // Negate test if necessary + if (proj->_con != predicate_proj->_con) { + new_predicate_bol = new (C, 2) BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate()); + register_new_node(new_predicate_bol, new_iff->in(0)); + if (TraceLoopPredicate) tty->print_cr(" if negated: %d", iff->_idx); + } else { + if (TraceLoopPredicate) tty->print_cr(" if: %d", iff->_idx); + } + + _igvn.hash_delete(new_iff); + new_iff->set_req(1, new_predicate_bol); + + _igvn.hash_delete(iff); + iff->set_req(1, proj->is_IfFalse() ? cond_false : cond_true); + + Node* ctrl = new_predicate_proj; // new control + ProjNode* dp = proj; // old control + assert(get_loop(dp) == loop, "guarenteed at the time of collecting proj"); + // Find nodes (depends only on the test) off the surviving projection; + // move them outside the loop with the control of proj_clone + for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) { + Node* cd = dp->fast_out(i); // Control-dependent node + if (cd->depends_only_on_test()) { + assert(cd->in(0) == dp, ""); + _igvn.hash_delete(cd); + cd->set_req(0, ctrl); // ctrl, not NULL + set_early_ctrl(cd); + _igvn._worklist.push(cd); + IdealLoopTree *new_loop = get_loop(get_ctrl(cd)); + if (new_loop != loop) { + if (!loop->_child) loop->_body.yank(cd); + if (!new_loop->_child ) new_loop->_body.push(cd); + } + --i; + --imax; + } + } + + hoisted = true; + C->set_major_progress(); + } + } // end while + +#ifndef PRODUCT + // report that the loop predication has been actually performed + // for this loop + if (TraceLoopPredicate && hoisted) { + tty->print("Loop Predication Performed:"); + loop->dump_head(); + } +#endif + + return hoisted; +} + +//------------------------------loop_predication-------------------------------- +// driver routine for loop predication optimization +bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) { + bool hoisted = false; + // Recursively promote predicates + if ( _child ) { + hoisted = _child->loop_predication( phase); + } + + // self + if (!_irreducible && !tail()->is_top()) { + hoisted |= phase->loop_predication_impl(this); + } + + if ( _next ) { //sibling + hoisted |= _next->loop_predication( phase); + } + + return hoisted; +} diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/loopnode.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1420,11 +1420,57 @@ } } +//---------------------collect_potentially_useful_predicates----------------------- +// Helper function to collect potentially useful predicates to prevent them from +// being eliminated by PhaseIdealLoop::eliminate_useless_predicates +void PhaseIdealLoop::collect_potentially_useful_predicates( + IdealLoopTree * loop, Unique_Node_List &useful_predicates) { + if (loop->_child) { // child + collect_potentially_useful_predicates(loop->_child, useful_predicates); + } + + // self (only loops that we can apply loop predication may use their predicates) + if (loop->_head->is_Loop() && + !loop->_irreducible && + !loop->tail()->is_top()) { + LoopNode *lpn = loop->_head->as_Loop(); + Node* entry = lpn->in(LoopNode::EntryControl); + ProjNode *predicate_proj = find_predicate_insertion_point(entry); + if (predicate_proj != NULL ) { // right pattern that can be used by loop predication + assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be"); + useful_predicates.push(entry->in(0)->in(1)->in(1)); // good one + } + } + + if ( loop->_next ) { // sibling + collect_potentially_useful_predicates(loop->_next, useful_predicates); + } +} + +//------------------------eliminate_useless_predicates----------------------------- +// Eliminate all inserted predicates if they could not be used by loop predication. +void PhaseIdealLoop::eliminate_useless_predicates() { + if (C->predicate_count() == 0) return; // no predicate left + + Unique_Node_List useful_predicates; // to store useful predicates + if (C->has_loops()) { + collect_potentially_useful_predicates(_ltree_root->_child, useful_predicates); + } + + for (int i = C->predicate_count(); i > 0; i--) { + Node * n = C->predicate_opaque1_node(i-1); + assert(n->Opcode() == Op_Opaque1, "must be"); + if (!useful_predicates.member(n)) { // not in the useful list + _igvn.replace_node(n, n->in(1)); + } + } +} + //============================================================================= //----------------------------build_and_optimize------------------------------- // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to // its corresponding LoopNode. If 'optimize' is true, do some loop cleanups. -void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) { +void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) { int old_progress = C->major_progress(); // Reset major-progress flag for the driver's heuristics @@ -1577,6 +1623,12 @@ return; } + // some parser-inserted loop predicates could never be used by loop + // predication. Eliminate them before loop optimization + if (UseLoopPredicate) { + eliminate_useless_predicates(); + } + // clear out the dead code while(_deadlist.size()) { _igvn.remove_globally_dead_node(_deadlist.pop()); @@ -1603,7 +1655,7 @@ // Because RCE opportunities can be masked by split_thru_phi, // look for RCE candidates and inhibit split_thru_phi // on just their loop-phi's for this pass of loop opts - if( SplitIfBlocks && do_split_ifs ) { + if (SplitIfBlocks && do_split_ifs) { if (lpt->policy_range_check(this)) { lpt->_rce_candidate = 1; // = true } @@ -1619,12 +1671,17 @@ NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); ); } + // Perform loop predication before iteration splitting + if (do_loop_pred && C->has_loops() && !C->major_progress()) { + _ltree_root->_child->loop_predication(this); + } + // Perform iteration-splitting on inner loops. Split iterations to avoid // range checks or one-shot null checks. // If split-if's didn't hack the graph too bad (no CFG changes) // then do loop opts. - if( C->has_loops() && !C->major_progress() ) { + if (C->has_loops() && !C->major_progress()) { memset( worklist.adr(), 0, worklist.Size()*sizeof(Node*) ); _ltree_root->_child->iteration_split( this, worklist ); // No verify after peeling! GCM has hoisted code out of the loop. @@ -1636,7 +1693,7 @@ // Do verify graph edges in any case NOT_PRODUCT( C->verify_graph_edges(); ); - if( !do_split_ifs ) { + if (!do_split_ifs) { // We saw major progress in Split-If to get here. We forced a // pass with unrolling and not split-if, however more split-if's // might make progress. If the unrolling didn't make progress @@ -2763,6 +2820,22 @@ Node *legal = LCA; // Walk 'legal' up the IDOM chain Node *least = legal; // Best legal position so far while( early != legal ) { // While not at earliest legal +#ifdef ASSERT + if (legal->is_Start() && !early->is_Root()) { + // Bad graph. Print idom path and fail. + tty->print_cr( "Bad graph detected in build_loop_late"); + tty->print("n: ");n->dump(); tty->cr(); + tty->print("early: ");early->dump(); tty->cr(); + int ct = 0; + Node *dbg_legal = LCA; + while(!dbg_legal->is_Start() && ct < 100) { + tty->print("idom[%d] ",ct); dbg_legal->dump(); tty->cr(); + ct++; + dbg_legal = idom(dbg_legal); + } + assert(false, "Bad graph detected in build_loop_late"); + } +#endif // Find least loop nesting depth legal = idom(legal); // Bump up the IDOM tree // Check for lower nesting depth diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/loopnode.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -30,6 +30,7 @@ class Node; class PhaseIdealLoop; class VectorSet; +class Invariance; struct small_cache; // @@ -325,6 +326,10 @@ // Returns TRUE if loop tree is structurally changed. bool beautify_loops( PhaseIdealLoop *phase ); + // Perform optimization to use the loop predicates for null checks and range checks. + // Applies to any loop level (not just the innermost one) + bool loop_predication( PhaseIdealLoop *phase); + // Perform iteration-splitting on inner loops. Split iterations to // avoid range checks or one-shot null checks. Returns false if the // current round of loop opts should stop. @@ -395,6 +400,9 @@ // into longer memory ops, we may want to increase alignment. bool policy_align( PhaseIdealLoop *phase ) const; + // Return TRUE if "iff" is a range check. + bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const; + // Compute loop trip count from profile data void compute_profile_trip_cnt( PhaseIdealLoop *phase ); @@ -521,9 +529,6 @@ } Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag ); - // true if CFG node d dominates CFG node n - bool is_dominator(Node *d, Node *n); - // Helper function for directing control inputs away from CFG split // points. Node *find_non_split_ctrl( Node *ctrl ) const { @@ -572,6 +577,17 @@ assert(n == find_non_split_ctrl(n), "must return legal ctrl" ); return n; } + // true if CFG node d dominates CFG node n + bool is_dominator(Node *d, Node *n); + // return get_ctrl for a data node and self(n) for a CFG node + Node* ctrl_or_self(Node* n) { + if (has_ctrl(n)) + return get_ctrl(n); + else { + assert (n->is_CFG(), "must be a CFG node"); + return n; + } + } private: Node *get_ctrl_no_update( Node *i ) const { @@ -600,7 +616,7 @@ // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms. Replace // the 'old_node' with 'new_node'. Kill old-node. Add a reference // from old_node to new_node to support the lazy update. Reference - // replaces loop reference, since that is not neede for dead node. + // replaces loop reference, since that is not needed for dead node. public: void lazy_update( Node *old_node, Node *new_node ) { assert( old_node != new_node, "no cycles please" ); @@ -679,11 +695,11 @@ _dom_lca_tags(C->comp_arena()), _verify_me(NULL), _verify_only(true) { - build_and_optimize(false); + build_and_optimize(false, false); } // build the loop tree and perform any requested optimizations - void build_and_optimize(bool do_split_if); + void build_and_optimize(bool do_split_if, bool do_loop_pred); public: // Dominators for the sea of nodes @@ -694,13 +710,13 @@ Node *dom_lca_internal( Node *n1, Node *n2 ) const; // Compute the Ideal Node to Loop mapping - PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) : + PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool do_loop_pred) : PhaseTransform(Ideal_Loop), _igvn(igvn), _dom_lca_tags(C->comp_arena()), _verify_me(NULL), _verify_only(false) { - build_and_optimize(do_split_ifs); + build_and_optimize(do_split_ifs, do_loop_pred); } // Verify that verify_me made the same decisions as a fresh run. @@ -710,7 +726,7 @@ _dom_lca_tags(C->comp_arena()), _verify_me(verify_me), _verify_only(false) { - build_and_optimize(false); + build_and_optimize(false, false); } // Build and verify the loop tree without modifying the graph. This @@ -790,6 +806,30 @@ // Return true if exp is a scaled induction var plus (or minus) constant bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0); + // Return true if proj is for "proj->[region->..]call_uct" + bool is_uncommon_trap_proj(ProjNode* proj, bool must_reason_predicate = false); + // Return true for "if(test)-> proj -> ... + // | + // V + // other_proj->[region->..]call_uct" + bool is_uncommon_trap_if_pattern(ProjNode* proj, bool must_reason_predicate = false); + // Create a new if above the uncommon_trap_if_pattern for the predicate to be promoted + ProjNode* create_new_if_for_predicate(ProjNode* cont_proj); + // Find a good location to insert a predicate + ProjNode* find_predicate_insertion_point(Node* start_c); + // Construct a range check for a predicate if + BoolNode* rc_predicate(Node* ctrl, + int scale, Node* offset, + Node* init, Node* limit, Node* stride, + Node* range); + + // Implementation of the loop predication to promote checks outside the loop + bool loop_predication_impl(IdealLoopTree *loop); + + // Helper function to collect predicate for eliminating the useless ones + void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1); + void eliminate_useless_predicates(); + // Eliminate range-checks and other trip-counter vs loop-invariant tests. void do_range_check( IdealLoopTree *loop, Node_List &old_new ); @@ -906,7 +946,6 @@ const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); // Helper functions - void register_new_node( Node *n, Node *blk ); Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache ); Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ); void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); @@ -918,6 +957,7 @@ public: void set_created_loop_node() { _created_loop_node = true; } bool created_loop_node() { return _created_loop_node; } + void register_new_node( Node *n, Node *blk ); #ifndef PRODUCT void dump( ) const; diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/parse.hpp --- a/src/share/vm/opto/parse.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/parse.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -430,6 +430,11 @@ } } + // Return true if the parser should add a loop predicate + bool should_add_predicate(int target_bci); + // Insert a loop predicate into the graph + void add_predicate(); + // Note: Intrinsic generation routines may be found in library_call.cpp. // Helper function to setup Ideal Call nodes @@ -491,7 +496,7 @@ void do_ifnull(BoolTest::mask btest, Node* c); void do_if(BoolTest::mask btest, Node* c); - void repush_if_args(); + int repush_if_args(); void adjust_map_after_if(BoolTest::mask btest, Node* c, float prob, Block* path, Block* other_path); IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask); diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/parse1.cpp --- a/src/share/vm/opto/parse1.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/parse1.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1383,6 +1383,10 @@ set_parse_bci(iter().cur_bci()); if (bci() == block()->limit()) { + // insert a predicate if it falls through to a loop head block + if (should_add_predicate(bci())){ + add_predicate(); + } // Do not walk into the next block until directed by do_all_blocks. merge(bci()); break; @@ -2083,6 +2087,37 @@ } } +//------------------------------should_add_predicate-------------------------- +bool Parse::should_add_predicate(int target_bci) { + if (!UseLoopPredicate) return false; + Block* target = successor_for_bci(target_bci); + if (target != NULL && + target->is_loop_head() && + block()->rpo() < target->rpo()) { + return true; + } + return false; +} + +//------------------------------add_predicate--------------------------------- +void Parse::add_predicate() { + assert(UseLoopPredicate,"use only for loop predicate"); + Node *cont = _gvn.intcon(1); + Node* opq = _gvn.transform(new (C, 2) Opaque1Node(C, cont)); + Node *bol = _gvn.transform(new (C, 2) Conv2BNode(opq)); + IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN); + Node* iffalse = _gvn.transform(new (C, 1) IfFalseNode(iff)); + C->add_predicate_opaq(opq); + { + PreserveJVMState pjvms(this); + set_control(iffalse); + uncommon_trap(Deoptimization::Reason_predicate, + Deoptimization::Action_maybe_recompile); + } + Node* iftrue = _gvn.transform(new (C, 1) IfTrueNode(iff)); + set_control(iftrue); +} + #ifndef PRODUCT //------------------------show_parse_info-------------------------------------- void Parse::show_parse_info() { diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/parse2.cpp --- a/src/share/vm/opto/parse2.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/parse2.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -278,6 +278,11 @@ if (len < 1) { // If this is a backward branch, add safepoint maybe_add_safepoint(default_dest); + if (should_add_predicate(default_dest)){ + _sp += 1; // set original stack for use by uncommon_trap + add_predicate(); + _sp -= 1; + } merge(default_dest); return; } @@ -324,6 +329,11 @@ if (len < 1) { // If this is a backward branch, add safepoint maybe_add_safepoint(default_dest); + if (should_add_predicate(default_dest)){ + _sp += 1; // set original stack for use by uncommon_trap + add_predicate(); + _sp -= 1; + } merge(default_dest); return; } @@ -731,6 +741,9 @@ push(_gvn.makecon(ret_addr)); // Flow to the jsr. + if (should_add_predicate(jsr_bci)){ + add_predicate(); + } merge(jsr_bci); } @@ -881,7 +894,7 @@ //-------------------------------repush_if_args-------------------------------- // Push arguments of an "if" bytecode back onto the stack by adjusting _sp. -inline void Parse::repush_if_args() { +inline int Parse::repush_if_args() { #ifndef PRODUCT if (PrintOpto && WizardMode) { tty->print("defending against excessive implicit null exceptions on %s @%d in ", @@ -895,6 +908,7 @@ assert(argument(0) != NULL, "must exist"); assert(bc_depth == 1 || argument(1) != NULL, "two must exist"); _sp += bc_depth; + return bc_depth; } //----------------------------------do_ifnull---------------------------------- @@ -954,8 +968,14 @@ // Update method data profile_taken_branch(target_bci); adjust_map_after_if(btest, c, prob, branch_block, next_block); - if (!stopped()) + if (!stopped()) { + if (should_add_predicate(target_bci)){ // add a predicate if it branches to a loop + int nargs = repush_if_args(); // set original stack for uncommon_trap + add_predicate(); + _sp -= nargs; + } merge(target_bci); + } } } @@ -1076,8 +1096,14 @@ // Update method data profile_taken_branch(target_bci); adjust_map_after_if(taken_btest, c, prob, branch_block, next_block); - if (!stopped()) + if (!stopped()) { + if (should_add_predicate(target_bci)){ // add a predicate if it branches to a loop + int nargs = repush_if_args(); // set original stack for the uncommon_trap + add_predicate(); + _sp -= nargs; + } merge(target_bci); + } } } @@ -2080,6 +2106,10 @@ // Update method data profile_taken_branch(target_bci); + // Add loop predicate if it goes to a loop + if (should_add_predicate(target_bci)){ + add_predicate(); + } // Merge the current control into the target basic block merge(target_bci); diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/runtime.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -143,7 +143,7 @@ // We failed the fast-path allocation. Now we need to do a scavenge or GC // and try allocation again. -void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) { +void OptoRuntime::new_store_pre_barrier(JavaThread* thread) { // After any safepoint, just before going back to compiled code, // we inform the GC that we will be doing initializing writes to // this object in the future without emitting card-marks, so @@ -156,7 +156,7 @@ assert(Universe::heap()->can_elide_tlab_store_barriers(), "compiler must check this first"); // GC may decide to give back a safer copy of new_obj. - new_obj = Universe::heap()->defer_store_barrier(thread, new_obj); + new_obj = Universe::heap()->new_store_pre_barrier(thread, new_obj); thread->set_vm_result(new_obj); } @@ -200,7 +200,7 @@ if (GraphKit::use_ReduceInitialCardMarks()) { // inform GC that we won't do card marks for initializing writes. - maybe_defer_card_mark(thread); + new_store_pre_barrier(thread); } JRT_END @@ -239,7 +239,7 @@ if (GraphKit::use_ReduceInitialCardMarks()) { // inform GC that we won't do card marks for initializing writes. - maybe_defer_card_mark(thread); + new_store_pre_barrier(thread); } JRT_END diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/runtime.hpp --- a/src/share/vm/opto/runtime.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/runtime.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -133,8 +133,9 @@ // Allocate storage for a objArray or typeArray static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread); - // Post-slow-path-allocation step for implementing ReduceInitialCardMarks: - static void maybe_defer_card_mark(JavaThread* thread); + // Post-slow-path-allocation, pre-initializing-stores step for + // implementing ReduceInitialCardMarks + static void new_store_pre_barrier(JavaThread* thread); // Allocate storage for a multi-dimensional arrays // Note: needs to be fixed for arbitrary number of dimensions diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/split_if.cpp --- a/src/share/vm/opto/split_if.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/split_if.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -219,6 +219,7 @@ //------------------------------register_new_node------------------------------ void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) { + assert(!n->is_CFG(), "must be data node"); _igvn.register_new_node_with_optimizer(n); set_ctrl(n, blk); IdealLoopTree *loop = get_loop(blk); diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/opto/subnode.cpp --- a/src/share/vm/opto/subnode.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/opto/subnode.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1244,8 +1244,7 @@ if( t1 == Type::TOP ) return Type::TOP; if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; double d = t1->getd(); - if( d < 0.0 ) return Type::DOUBLE; - return TypeD::make( SharedRuntime::dcos( d ) ); + return TypeD::make( StubRoutines::intrinsic_cos( d ) ); } //============================================================================= @@ -1256,8 +1255,7 @@ if( t1 == Type::TOP ) return Type::TOP; if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; double d = t1->getd(); - if( d < 0.0 ) return Type::DOUBLE; - return TypeD::make( SharedRuntime::dsin( d ) ); + return TypeD::make( StubRoutines::intrinsic_sin( d ) ); } //============================================================================= @@ -1268,8 +1266,7 @@ if( t1 == Type::TOP ) return Type::TOP; if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; double d = t1->getd(); - if( d < 0.0 ) return Type::DOUBLE; - return TypeD::make( SharedRuntime::dtan( d ) ); + return TypeD::make( StubRoutines::intrinsic_tan( d ) ); } //============================================================================= @@ -1280,8 +1277,7 @@ if( t1 == Type::TOP ) return Type::TOP; if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; double d = t1->getd(); - if( d < 0.0 ) return Type::DOUBLE; - return TypeD::make( SharedRuntime::dlog( d ) ); + return TypeD::make( StubRoutines::intrinsic_log( d ) ); } //============================================================================= @@ -1292,8 +1288,7 @@ if( t1 == Type::TOP ) return Type::TOP; if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; double d = t1->getd(); - if( d < 0.0 ) return Type::DOUBLE; - return TypeD::make( SharedRuntime::dlog10( d ) ); + return TypeD::make( StubRoutines::intrinsic_log10( d ) ); } //============================================================================= @@ -1304,8 +1299,7 @@ if( t1 == Type::TOP ) return Type::TOP; if( t1->base() != Type::DoubleCon ) return Type::DOUBLE; double d = t1->getd(); - if( d < 0.0 ) return Type::DOUBLE; - return TypeD::make( SharedRuntime::dexp( d ) ); + return TypeD::make( StubRoutines::intrinsic_exp( d ) ); } @@ -1323,5 +1317,5 @@ double d2 = t2->getd(); if( d1 < 0.0 ) return Type::DOUBLE; if( d2 < 0.0 ) return Type::DOUBLE; - return TypeD::make( SharedRuntime::dpow( d1, d2 ) ); + return TypeD::make( StubRoutines::intrinsic_pow( d1, d2 ) ); } diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/compilationPolicy.cpp --- a/src/share/vm/runtime/compilationPolicy.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/runtime/compilationPolicy.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 2000-2007 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2000-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,6 +74,16 @@ if (m->is_abstract()) return false; if (DontCompileHugeMethods && m->code_size() > HugeMethodLimit) return false; + // Math intrinsics should never be compiled as this can lead to + // monotonicity problems because the interpreter will prefer the + // compiled code to the intrinsic version. This can't happen in + // production because the invocation counter can't be incremented + // but we shouldn't expose the system to this problem in testing + // modes. + if (!AbstractInterpreter::can_be_compiled(m)) { + return false; + } + return !m->is_not_compilable(); } diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/deoptimization.cpp --- a/src/share/vm/runtime/deoptimization.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/runtime/deoptimization.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1672,7 +1672,8 @@ "unhandled", "constraint", "div0_check", - "age" + "age", + "predicate" }; const char* Deoptimization::_trap_action_name[Action_LIMIT] = { // Note: Keep this in sync. with enum DeoptAction. diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/deoptimization.hpp --- a/src/share/vm/runtime/deoptimization.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/runtime/deoptimization.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -46,6 +46,7 @@ Reason_constraint, // arbitrary runtime constraint violated Reason_div0_check, // a null_check due to division by zero Reason_age, // nmethod too old; tier threshold reached + Reason_predicate, // compiler generated predicate failed Reason_LIMIT, // Note: Keep this enum in sync. with _trap_reason_name. Reason_RECORDED_LIMIT = Reason_unloaded // some are not recorded per bc diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/runtime/globals.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -2012,6 +2012,10 @@ diagnostic(bool, GCParallelVerificationEnabled, true, \ "Enable parallel memory system verification") \ \ + diagnostic(bool, DeferInitialCardMark, false, \ + "When +ReduceInitialCardMarks, explicitly defer any that " \ + "may arise from new_pre_store_barrier") \ + \ diagnostic(bool, VerifyRememberedSets, false, \ "Verify GC remembered sets") \ \ @@ -3456,6 +3460,9 @@ diagnostic(bool, OptimizeMethodHandles, true, \ "when constructing method handles, try to improve them") \ \ + experimental(bool, TrustFinalNonStaticFields, false, \ + "trust final non-static declarations for constant folding") \ + \ experimental(bool, EnableInvokeDynamic, false, \ "recognize the invokedynamic instruction") \ \ diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/stubRoutines.cpp --- a/src/share/vm/runtime/stubRoutines.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/runtime/stubRoutines.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -97,6 +97,14 @@ address StubRoutines::_unsafe_arraycopy = NULL; address StubRoutines::_generic_arraycopy = NULL; +double (* StubRoutines::_intrinsic_log )(double) = NULL; +double (* StubRoutines::_intrinsic_log10 )(double) = NULL; +double (* StubRoutines::_intrinsic_exp )(double) = NULL; +double (* StubRoutines::_intrinsic_pow )(double, double) = NULL; +double (* StubRoutines::_intrinsic_sin )(double) = NULL; +double (* StubRoutines::_intrinsic_cos )(double) = NULL; +double (* StubRoutines::_intrinsic_tan )(double) = NULL; + // Initialization // // Note: to break cycle with universe initialization, stubs are generated in two phases. diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/stubRoutines.hpp --- a/src/share/vm/runtime/stubRoutines.hpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/runtime/stubRoutines.hpp Fri Jan 22 15:06:53 2010 -0800 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2010 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -148,6 +148,20 @@ static address _unsafe_arraycopy; static address _generic_arraycopy; + // These are versions of the java.lang.Math methods which perform + // the same operations as the intrinsic version. They are used for + // constant folding in the compiler to ensure equivalence. If the + // intrinsic version returns the same result as the strict version + // then they can be set to the appropriate function from + // SharedRuntime. + static double (*_intrinsic_log)(double); + static double (*_intrinsic_log10)(double); + static double (*_intrinsic_exp)(double); + static double (*_intrinsic_pow)(double, double); + static double (*_intrinsic_sin)(double); + static double (*_intrinsic_cos)(double); + static double (*_intrinsic_tan)(double); + public: // Initialization/Testing static void initialize1(); // must happen before universe::genesis @@ -245,6 +259,35 @@ static address unsafe_arraycopy() { return _unsafe_arraycopy; } static address generic_arraycopy() { return _generic_arraycopy; } + static double intrinsic_log(double d) { + assert(_intrinsic_log != NULL, "must be defined"); + return _intrinsic_log(d); + } + static double intrinsic_log10(double d) { + assert(_intrinsic_log != NULL, "must be defined"); + return _intrinsic_log10(d); + } + static double intrinsic_exp(double d) { + assert(_intrinsic_exp != NULL, "must be defined"); + return _intrinsic_exp(d); + } + static double intrinsic_pow(double d, double d2) { + assert(_intrinsic_pow != NULL, "must be defined"); + return _intrinsic_pow(d, d2); + } + static double intrinsic_sin(double d) { + assert(_intrinsic_sin != NULL, "must be defined"); + return _intrinsic_sin(d); + } + static double intrinsic_cos(double d) { + assert(_intrinsic_cos != NULL, "must be defined"); + return _intrinsic_cos(d); + } + static double intrinsic_tan(double d) { + assert(_intrinsic_tan != NULL, "must be defined"); + return _intrinsic_tan(d); + } + // // Default versions of the above arraycopy functions for platforms which do // not have specialized versions diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/runtime/thread.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -2357,9 +2357,8 @@ }; void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) { - // Flush deferred store-barriers, if any, associated with - // initializing stores done by this JavaThread in the current epoch. - Universe::heap()->flush_deferred_store_barrier(this); + // Verify that the deferred card marks have been flushed. + assert(deferred_card_mark().is_empty(), "Should be empty during GC"); // The ThreadProfiler oops_do is done from FlatProfiler::oops_do // since there may be more than one thread using each ThreadProfiler. diff -r 3908ad124838 -r 2718ec34c699 src/share/vm/runtime/vmStructs.cpp --- a/src/share/vm/runtime/vmStructs.cpp Wed Jan 20 11:32:41 2010 -0700 +++ b/src/share/vm/runtime/vmStructs.cpp Fri Jan 22 15:06:53 2010 -0800 @@ -309,6 +309,7 @@ nonstatic_field(CollectedHeap, _reserved, MemRegion) \ nonstatic_field(SharedHeap, _perm_gen, PermGen*) \ nonstatic_field(CollectedHeap, _barrier_set, BarrierSet*) \ + nonstatic_field(CollectedHeap, _defer_initial_card_mark, bool) \ nonstatic_field(CollectedHeap, _is_gc_active, bool) \ nonstatic_field(CompactibleSpace, _compaction_top, HeapWord*) \ nonstatic_field(CompactibleSpace, _first_dead, HeapWord*) \ diff -r 3908ad124838 -r 2718ec34c699 test/compiler/6877254/Test.java --- a/test/compiler/6877254/Test.java Wed Jan 20 11:32:41 2010 -0700 +++ b/test/compiler/6877254/Test.java Fri Jan 22 15:06:53 2010 -0800 @@ -26,7 +26,7 @@ * @bug 6877254 * @summary Implement StoreCMNode::Ideal to promote its OopStore above the MergeMem * - * @run main/othervm -server -Xcomp -XX:+UseConcMarkSweepGC Test + * @run main/othervm -Xcomp Test */ public class Test { diff -r 3908ad124838 -r 2718ec34c699 test/compiler/6895383/Test.java --- a/test/compiler/6895383/Test.java Wed Jan 20 11:32:41 2010 -0700 +++ b/test/compiler/6895383/Test.java Fri Jan 22 15:06:53 2010 -0800 @@ -30,6 +30,9 @@ * @run main/othervm -Xcomp Test */ +import java.util.*; +import java.util.concurrent.*; + public class Test { public static void main(String argv[]) { Test test = new Test(); diff -r 3908ad124838 -r 2718ec34c699 test/compiler/6896727/Test.java --- a/test/compiler/6896727/Test.java Wed Jan 20 11:32:41 2010 -0700 +++ b/test/compiler/6896727/Test.java Fri Jan 22 15:06:53 2010 -0800 @@ -26,7 +26,7 @@ * @test * @bug 6896727 * @summary nsk/logging/LoggingPermission/LoggingPermission/logperm002 fails with G1, EscapeAnalisys w/o COOPs - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:+DoEscapeAnalysis -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC Test + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:+DoEscapeAnalysis Test */ public class Test {