# HG changeset patch # User amurillo # Date 1400235923 25200 # Node ID 87bdb86f0aedbd9b9ef8e9999b273114c8be4748 # Parent 382a82b0a3e773e57156618d91d15987a25d55c5# Parent 49961f279e2404fdb079dd00cdde7e8c74a75e8b Merge diff -r 382a82b0a3e7 -r 87bdb86f0aed make/hotspot_version --- a/make/hotspot_version Tue May 13 23:17:52 2014 -0700 +++ b/make/hotspot_version Fri May 16 03:25:23 2014 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=25 HS_MINOR_VER=20 -HS_BUILD_NUMBER=14 +HS_BUILD_NUMBER=15 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r 382a82b0a3e7 -r 87bdb86f0aed src/cpu/x86/vm/macroAssembler_x86.cpp --- a/src/cpu/x86/vm/macroAssembler_x86.cpp Tue May 13 23:17:52 2014 -0700 +++ b/src/cpu/x86/vm/macroAssembler_x86.cpp Fri May 16 03:25:23 2014 -0700 @@ -3152,10 +3152,12 @@ // if fast computation is not possible, result is NaN. Requires // fallback from user of this macro. // increase precision for intermediate steps of the computation + BLOCK_COMMENT("fast_pow {"); increase_precision(); fyl2x(); // Stack: (Y*log2(X)) ... pow_exp_core_encoding(); // Stack: exp(X) ... restore_precision(); + BLOCK_COMMENT("} fast_pow"); } void MacroAssembler::fast_exp() { diff -r 382a82b0a3e7 -r 87bdb86f0aed src/os/bsd/vm/os_bsd.cpp --- a/src/os/bsd/vm/os_bsd.cpp Tue May 13 23:17:52 2014 -0700 +++ b/src/os/bsd/vm/os_bsd.cpp Fri May 16 03:25:23 2014 -0700 @@ -127,8 +127,12 @@ // global variables julong os::Bsd::_physical_memory = 0; - +#ifdef __APPLE__ +mach_timebase_info_data_t os::Bsd::_timebase_info = {0, 0}; +volatile uint64_t os::Bsd::_max_abstime = 0; +#else int (*os::Bsd::_clock_gettime)(clockid_t, struct timespec *) = NULL; +#endif pthread_t os::Bsd::_main_thread; int os::Bsd::_page_size = -1; @@ -986,13 +990,15 @@ return jlong(time.tv_sec) * 1000 + jlong(time.tv_usec / 1000); } +#ifndef __APPLE__ #ifndef CLOCK_MONOTONIC #define CLOCK_MONOTONIC (1) #endif +#endif #ifdef __APPLE__ void os::Bsd::clock_init() { - // XXXDARWIN: Investigate replacement monotonic clock + mach_timebase_info(&_timebase_info); } #else void os::Bsd::clock_init() { @@ -1007,10 +1013,38 @@ #endif +#ifdef __APPLE__ + +jlong os::javaTimeNanos() { + const uint64_t tm = mach_absolute_time(); + const uint64_t now = (tm * Bsd::_timebase_info.numer) / Bsd::_timebase_info.denom; + const uint64_t prev = Bsd::_max_abstime; + if (now <= prev) { + return prev; // same or retrograde time; + } + const uint64_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&Bsd::_max_abstime, prev); + assert(obsv >= prev, "invariant"); // Monotonicity + // If the CAS succeeded then we're done and return "now". + // If the CAS failed and the observed value "obsv" is >= now then + // we should return "obsv". If the CAS failed and now > obsv > prv then + // some other thread raced this thread and installed a new value, in which case + // we could either (a) retry the entire operation, (b) retry trying to install now + // or (c) just return obsv. We use (c). No loop is required although in some cases + // we might discard a higher "now" value in deference to a slightly lower but freshly + // installed obsv value. That's entirely benign -- it admits no new orderings compared + // to (a) or (b) -- and greatly reduces coherence traffic. + // We might also condition (c) on the magnitude of the delta between obsv and now. + // Avoiding excessive CAS operations to hot RW locations is critical. + // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate + return (prev == obsv) ? now : obsv; +} + +#else // __APPLE__ + jlong os::javaTimeNanos() { if (Bsd::supports_monotonic_clock()) { struct timespec tp; - int status = Bsd::clock_gettime(CLOCK_MONOTONIC, &tp); + int status = Bsd::_clock_gettime(CLOCK_MONOTONIC, &tp); assert(status == 0, "gettime error"); jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec); return result; @@ -1023,6 +1057,8 @@ } } +#endif // __APPLE__ + void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) { if (Bsd::supports_monotonic_clock()) { info_ptr->max_value = ALL_64_BITS; diff -r 382a82b0a3e7 -r 87bdb86f0aed src/os/bsd/vm/os_bsd.hpp --- a/src/os/bsd/vm/os_bsd.hpp Tue May 13 23:17:52 2014 -0700 +++ b/src/os/bsd/vm/os_bsd.hpp Fri May 16 03:25:23 2014 -0700 @@ -58,7 +58,13 @@ // For signal flags diagnostics static int sigflags[MAXSIGNUM]; +#ifdef __APPLE__ + // mach_absolute_time + static mach_timebase_info_data_t _timebase_info; + static volatile uint64_t _max_abstime; +#else static int (*_clock_gettime)(clockid_t, struct timespec *); +#endif static GrowableArray* _cpu_to_node; @@ -135,11 +141,11 @@ static void clock_init(void); static inline bool supports_monotonic_clock() { +#ifdef __APPLE__ + return true; +#else return _clock_gettime != NULL; - } - - static int clock_gettime(clockid_t clock_id, struct timespec *tp) { - return _clock_gettime ? _clock_gettime(clock_id, tp) : -1; +#endif } // Stack repair handling diff -r 382a82b0a3e7 -r 87bdb86f0aed src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp Tue May 13 23:17:52 2014 -0700 +++ b/src/os/solaris/vm/os_solaris.cpp Fri May 16 03:25:23 2014 -0700 @@ -415,11 +415,7 @@ static hrtime_t first_hrtime = 0; static const hrtime_t hrtime_hz = 1000*1000*1000; -const int LOCK_BUSY = 1; -const int LOCK_FREE = 0; -const int LOCK_INVALID = -1; static volatile hrtime_t max_hrtime = 0; -static volatile int max_hrtime_lock = LOCK_FREE; // Update counter with LSB as lock-in-progress void os::Solaris::initialize_system_info() { @@ -1534,58 +1530,31 @@ } -// gethrtime can move backwards if read from one cpu and then a different cpu -// getTimeNanos is guaranteed to not move backward on Solaris -// local spinloop created as faster for a CAS on an int than -// a CAS on a 64bit jlong. Also Atomic::cmpxchg for jlong is not -// supported on sparc v8 or pre supports_cx8 intel boxes. -// oldgetTimeNanos for systems which do not support CAS on 64bit jlong -// i.e. sparc v8 and pre supports_cx8 (i486) intel boxes -inline hrtime_t oldgetTimeNanos() { - int gotlock = LOCK_INVALID; - hrtime_t newtime = gethrtime(); - - for (;;) { -// grab lock for max_hrtime - int curlock = max_hrtime_lock; - if (curlock & LOCK_BUSY) continue; - if (gotlock = Atomic::cmpxchg(LOCK_BUSY, &max_hrtime_lock, LOCK_FREE) != LOCK_FREE) continue; - if (newtime > max_hrtime) { - max_hrtime = newtime; - } else { - newtime = max_hrtime; - } - // release lock - max_hrtime_lock = LOCK_FREE; - return newtime; - } -} -// gethrtime can move backwards if read from one cpu and then a different cpu -// getTimeNanos is guaranteed to not move backward on Solaris +// gethrtime() should be monotonic according to the documentation, +// but some virtualized platforms are known to break this guarantee. +// getTimeNanos() must be guaranteed not to move backwards, so we +// are forced to add a check here. inline hrtime_t getTimeNanos() { - if (VM_Version::supports_cx8()) { - const hrtime_t now = gethrtime(); - // Use atomic long load since 32-bit x86 uses 2 registers to keep long. - const hrtime_t prev = Atomic::load((volatile jlong*)&max_hrtime); - if (now <= prev) return prev; // same or retrograde time; - const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev); - assert(obsv >= prev, "invariant"); // Monotonicity - // If the CAS succeeded then we're done and return "now". - // If the CAS failed and the observed value "obs" is >= now then - // we should return "obs". If the CAS failed and now > obs > prv then - // some other thread raced this thread and installed a new value, in which case - // we could either (a) retry the entire operation, (b) retry trying to install now - // or (c) just return obs. We use (c). No loop is required although in some cases - // we might discard a higher "now" value in deference to a slightly lower but freshly - // installed obs value. That's entirely benign -- it admits no new orderings compared - // to (a) or (b) -- and greatly reduces coherence traffic. - // We might also condition (c) on the magnitude of the delta between obs and now. - // Avoiding excessive CAS operations to hot RW locations is critical. - // See http://blogs.sun.com/dave/entry/cas_and_cache_trivia_invalidate - return (prev == obsv) ? now : obsv ; - } else { - return oldgetTimeNanos(); - } + const hrtime_t now = gethrtime(); + const hrtime_t prev = max_hrtime; + if (now <= prev) { + return prev; // same or retrograde time; + } + const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev); + assert(obsv >= prev, "invariant"); // Monotonicity + // If the CAS succeeded then we're done and return "now". + // If the CAS failed and the observed value "obsv" is >= now then + // we should return "obsv". If the CAS failed and now > obsv > prv then + // some other thread raced this thread and installed a new value, in which case + // we could either (a) retry the entire operation, (b) retry trying to install now + // or (c) just return obsv. We use (c). No loop is required although in some cases + // we might discard a higher "now" value in deference to a slightly lower but freshly + // installed obsv value. That's entirely benign -- it admits no new orderings compared + // to (a) or (b) -- and greatly reduces coherence traffic. + // We might also condition (c) on the magnitude of the delta between obsv and now. + // Avoiding excessive CAS operations to hot RW locations is critical. + // See https://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate + return (prev == obsv) ? now : obsv; } // Time since start-up in seconds to a fine granularity. diff -r 382a82b0a3e7 -r 87bdb86f0aed src/share/vm/c1/c1_GraphBuilder.cpp --- a/src/share/vm/c1/c1_GraphBuilder.cpp Tue May 13 23:17:52 2014 -0700 +++ b/src/share/vm/c1/c1_GraphBuilder.cpp Fri May 16 03:25:23 2014 -0700 @@ -1697,6 +1697,15 @@ return NULL; } +void GraphBuilder::check_args_for_profiling(Values* obj_args, int expected) { +#ifdef ASSERT + bool ignored_will_link; + ciSignature* declared_signature = NULL; + ciMethod* real_target = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature); + assert(expected == obj_args->length() || real_target->is_method_handle_intrinsic(), "missed on arg?"); +#endif +} + // Collect arguments that we want to profile in a list Values* GraphBuilder::collect_args_for_profiling(Values* args, ciMethod* target, bool may_have_receiver) { int start = 0; @@ -1705,13 +1714,14 @@ return NULL; } int s = obj_args->size(); - for (int i = start, j = 0; j < s; i++) { + // if called through method handle invoke, some arguments may have been popped + for (int i = start, j = 0; j < s && i < args->length(); i++) { if (args->at(i)->type()->is_object_kind()) { obj_args->push(args->at(i)); j++; } } - assert(s == obj_args->length(), "missed on arg?"); + check_args_for_profiling(obj_args, s); return obj_args; } @@ -3843,14 +3853,7 @@ j++; } } -#ifdef ASSERT - { - bool ignored_will_link; - ciSignature* declared_signature = NULL; - ciMethod* real_target = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature); - assert(s == obj_args->length() || real_target->is_method_handle_intrinsic(), "missed on arg?"); - } -#endif + check_args_for_profiling(obj_args, s); } profile_call(callee, recv, holder_known ? callee->holder() : NULL, obj_args, true); } diff -r 382a82b0a3e7 -r 87bdb86f0aed src/share/vm/c1/c1_GraphBuilder.hpp --- a/src/share/vm/c1/c1_GraphBuilder.hpp Tue May 13 23:17:52 2014 -0700 +++ b/src/share/vm/c1/c1_GraphBuilder.hpp Fri May 16 03:25:23 2014 -0700 @@ -392,6 +392,7 @@ Values* args_list_for_profiling(ciMethod* target, int& start, bool may_have_receiver); Values* collect_args_for_profiling(Values* args, ciMethod* target, bool may_have_receiver); + void check_args_for_profiling(Values* obj_args, int expected); public: NOT_PRODUCT(void print_stats();) diff -r 382a82b0a3e7 -r 87bdb86f0aed src/share/vm/c1/c1_LIRGenerator.cpp --- a/src/share/vm/c1/c1_LIRGenerator.cpp Tue May 13 23:17:52 2014 -0700 +++ b/src/share/vm/c1/c1_LIRGenerator.cpp Fri May 16 03:25:23 2014 -0700 @@ -2634,8 +2634,10 @@ // LIR_Assembler::emit_profile_type() from emitting useless code profiled_k = ciTypeEntries::with_status(result, profiled_k); } - if (exact_signature_k != NULL && exact_klass != exact_signature_k) { - assert(exact_klass == NULL, "obj and signature disagree?"); + // exact_klass and exact_signature_k can be both non NULL but + // different if exact_klass is loaded after the ciObject for + // exact_signature_k is created. + if (exact_klass == NULL && exact_signature_k != NULL && exact_klass != exact_signature_k) { // sometimes the type of the signature is better than the best type // the compiler has exact_klass = exact_signature_k; @@ -2646,8 +2648,7 @@ if (improved_klass == NULL) { improved_klass = comp->cha_exact_type(callee_signature_k); } - if (improved_klass != NULL && exact_klass != improved_klass) { - assert(exact_klass == NULL, "obj and signature disagree?"); + if (exact_klass == NULL && improved_klass != NULL && exact_klass != improved_klass) { exact_klass = exact_signature_k; } } diff -r 382a82b0a3e7 -r 87bdb86f0aed src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Tue May 13 23:17:52 2014 -0700 +++ b/src/share/vm/opto/library_call.cpp Fri May 16 03:25:23 2014 -0700 @@ -216,7 +216,7 @@ bool inline_math_subtractExactL(bool is_decrement); bool inline_exp(); bool inline_pow(); - void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); + Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); bool inline_min_max(vmIntrinsics::ID id); Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y); // This returns Type::AnyPtr, RawPtr, or OopPtr. @@ -1678,7 +1678,7 @@ return true; } -void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) { +Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName) { //------------------- //result=(result.isNaN())? funcAddr():result; // Check: If isNaN() by checking result!=result? then either trap @@ -1694,7 +1694,7 @@ uncommon_trap(Deoptimization::Reason_intrinsic, Deoptimization::Action_make_not_entrant); } - set_result(result); + return result; } else { // If this inlining ever returned NaN in the past, we compile a call // to the runtime to properly handle corner cases @@ -1724,9 +1724,10 @@ result_region->init_req(2, control()); result_val->init_req(2, value); - set_result(result_region, result_val); + set_control(_gvn.transform(result_region)); + return _gvn.transform(result_val); } else { - set_result(result); + return result; } } } @@ -1738,7 +1739,8 @@ Node* arg = round_double_node(argument(0)); Node* n = _gvn.transform(new (C) ExpDNode(C, control(), arg)); - finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); + n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); + set_result(n); C->set_has_split_ifs(true); // Has chance for split-if optimization return true; @@ -1748,27 +1750,48 @@ // Inline power instructions, if possible. bool LibraryCallKit::inline_pow() { // Pseudocode for pow - // if (x <= 0.0) { - // long longy = (long)y; - // if ((double)longy == y) { // if y is long - // if (y + 1 == y) longy = 0; // huge number: even - // result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y); + // if (y == 2) { + // return x * x; + // } else { + // if (x <= 0.0) { + // long longy = (long)y; + // if ((double)longy == y) { // if y is long + // if (y + 1 == y) longy = 0; // huge number: even + // result = ((1&longy) == 0)?-DPow(abs(x), y):DPow(abs(x), y); + // } else { + // result = NaN; + // } // } else { - // result = NaN; + // result = DPow(x,y); // } - // } else { - // result = DPow(x,y); + // if (result != result)? { + // result = uncommon_trap() or runtime_call(); + // } + // return result; // } - // if (result != result)? { - // result = uncommon_trap() or runtime_call(); - // } - // return result; Node* x = round_double_node(argument(0)); Node* y = round_double_node(argument(2)); Node* result = NULL; + Node* const_two_node = makecon(TypeD::make(2.0)); + Node* cmp_node = _gvn.transform(new (C) CmpDNode(y, const_two_node)); + Node* bool_node = _gvn.transform(new (C) BoolNode(cmp_node, BoolTest::eq)); + IfNode* if_node = create_and_xform_if(control(), bool_node, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN); + Node* if_true = _gvn.transform(new (C) IfTrueNode(if_node)); + Node* if_false = _gvn.transform(new (C) IfFalseNode(if_node)); + + RegionNode* region_node = new (C) RegionNode(3); + region_node->init_req(1, if_true); + + Node* phi_node = new (C) PhiNode(region_node, Type::DOUBLE); + // special case for x^y where y == 2, we can convert it to x * x + phi_node->init_req(1, _gvn.transform(new (C) MulDNode(x, x))); + + // set control to if_false since we will now process the false branch + set_control(if_false); + if (!too_many_traps(Deoptimization::Reason_intrinsic)) { // Short form: skip the fancy tests and just check for NaN result. result = _gvn.transform(new (C) PowDNode(C, control(), x, y)); @@ -1892,7 +1915,15 @@ result = _gvn.transform(phi); } - finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); + result = finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); + + // control from finish_pow_exp is now input to the region node + region_node->set_req(2, control()); + // the result from finish_pow_exp is now input to the phi node + phi_node->init_req(2, result); + set_control(_gvn.transform(region_node)); + record_for_igvn(region_node); + set_result(_gvn.transform(phi_node)); C->set_has_split_ifs(true); // Has chance for split-if optimization return true; diff -r 382a82b0a3e7 -r 87bdb86f0aed src/share/vm/runtime/os.hpp --- a/src/share/vm/runtime/os.hpp Tue May 13 23:17:52 2014 -0700 +++ b/src/share/vm/runtime/os.hpp Fri May 16 03:25:23 2014 -0700 @@ -48,6 +48,9 @@ #ifdef TARGET_OS_FAMILY_bsd # include "jvm_bsd.h" # include +# ifdef __APPLE__ +# include +# endif #endif class AgentLibrary; diff -r 382a82b0a3e7 -r 87bdb86f0aed test/compiler/profiling/TestMethodHandleInvokesIntrinsic.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/profiling/TestMethodHandleInvokesIntrinsic.java Fri May 16 03:25:23 2014 -0700 @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8041458 + * @summary profiling of arguments in C1 at MethodHandle invoke of intrinsic tries to profile popped argument. + * @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:TieredStopAtLevel=3 TestMethodHandleInvokesIntrinsic + * + */ + +import java.lang.invoke.*; + +public class TestMethodHandleInvokesIntrinsic { + + static final MethodHandle mh_nanoTime; + static final MethodHandle mh_getClass; + static { + MethodHandles.Lookup lookup = MethodHandles.lookup(); + MethodType mt = MethodType.methodType(long.class); + MethodHandle MH = null; + try { + MH = lookup.findStatic(System.class, "nanoTime", mt); + } catch(NoSuchMethodException nsme) { + nsme.printStackTrace(); + throw new RuntimeException("TEST FAILED", nsme); + } catch(IllegalAccessException iae) { + iae.printStackTrace(); + throw new RuntimeException("TEST FAILED", iae); + } + mh_nanoTime = MH; + + mt = MethodType.methodType(Class.class); + MH = null; + try { + MH = lookup.findVirtual(Object.class, "getClass", mt); + } catch(NoSuchMethodException nsme) { + nsme.printStackTrace(); + throw new RuntimeException("TEST FAILED", nsme); + } catch(IllegalAccessException iae) { + iae.printStackTrace(); + throw new RuntimeException("TEST FAILED", iae); + } + mh_getClass = MH; + } + + static long m1() throws Throwable { + return (long)mh_nanoTime.invokeExact(); + } + + static Class m2(Object o) throws Throwable { + return (Class)mh_getClass.invokeExact(o); + } + + static public void main(String[] args) { + try { + for (int i = 0; i < 20000; i++) { + m1(); + } + TestMethodHandleInvokesIntrinsic o = new TestMethodHandleInvokesIntrinsic(); + for (int i = 0; i < 20000; i++) { + m2(o); + } + } catch(Throwable t) { + System.out.println("Unexpected exception"); + t.printStackTrace(); + throw new RuntimeException("TEST FAILED", t); + } + + System.out.println("TEST PASSED"); + } +}