# HG changeset patch # User coleenp # Date 1321378855 18000 # Node ID f9a80a035a4ad728bf22187fa6c3aace0b9f4420 # Parent 78bef05801ca8b77e2bbf4f2d8d2905b500616d7# Parent 3c7d67df8d0757d1d6c495dd72c7a1781322e109 Merge diff -r 3c7d67df8d07 -r f9a80a035a4a .hgtags --- a/.hgtags Thu Nov 10 06:23:48 2011 -0500 +++ b/.hgtags Tue Nov 15 12:40:55 2011 -0500 @@ -195,3 +195,5 @@ 4d3850d9d326ac3a9bee2d867727e954322d014e hs23-b03 4538caeef7b6cbd4302bebced805d65e68ccf301 jdk8-b11 6534482ff68ad79066dfe15dfb6d8905f09681bd hs23-b04 +1d3900713a67a0a39faf4e12c9c158d55aebef87 jdk8-b12 +3e609627e780736f372eb14d29bb9b5e53b21fbf hs23-b05 diff -r 3c7d67df8d07 -r f9a80a035a4a agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java --- a/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java Thu Nov 10 06:23:48 2011 -0500 +++ b/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java Tue Nov 15 12:40:55 2011 -0500 @@ -30,6 +30,7 @@ import sun.jvm.hotspot.gc_implementation.parallelScavenge.*; import sun.jvm.hotspot.gc_implementation.shared.*; import sun.jvm.hotspot.memory.*; +import sun.jvm.hotspot.oops.*; import sun.jvm.hotspot.runtime.*; public class HeapSummary extends Tool { @@ -134,6 +135,9 @@ } else { throw new RuntimeException("unknown CollectedHeap type : " + heap.getClass()); } + + System.out.println(); + printInternStringStatistics(); } // Helper methods @@ -248,4 +252,41 @@ return -1; } } + + private void printInternStringStatistics() { + class StringStat implements StringTable.StringVisitor { + private int count; + private long size; + private OopField stringValueField; + + StringStat() { + VM vm = VM.getVM(); + SystemDictionary sysDict = vm.getSystemDictionary(); + InstanceKlass strKlass = sysDict.getStringKlass(); + // String has a field named 'value' of type 'char[]'. + stringValueField = (OopField) strKlass.findField("value", "[C"); + } + + private long stringSize(Instance instance) { + // We include String content in size calculation. + return instance.getObjectSize() + + stringValueField.getValue(instance).getObjectSize(); + } + + public void visit(Instance str) { + count++; + size += stringSize(str); + } + + public void print() { + System.out.println(count + + " interned Strings occupying " + size + " bytes."); + } + } + + StringStat stat = new StringStat(); + StringTable strTable = VM.getVM().getStringTable(); + strTable.stringsDo(stat); + stat.print(); + } } diff -r 3c7d67df8d07 -r f9a80a035a4a agent/src/share/classes/sun/jvm/hotspot/tools/PermStat.java --- a/agent/src/share/classes/sun/jvm/hotspot/tools/PermStat.java Thu Nov 10 06:23:48 2011 -0500 +++ b/agent/src/share/classes/sun/jvm/hotspot/tools/PermStat.java Tue Nov 15 12:40:55 2011 -0500 @@ -63,47 +63,9 @@ } public void run() { - printInternStringStatistics(); printClassLoaderStatistics(); } - private void printInternStringStatistics() { - class StringStat implements StringTable.StringVisitor { - private int count; - private long size; - private OopField stringValueField; - - StringStat() { - VM vm = VM.getVM(); - SystemDictionary sysDict = vm.getSystemDictionary(); - InstanceKlass strKlass = sysDict.getStringKlass(); - // String has a field named 'value' of type 'char[]'. - stringValueField = (OopField) strKlass.findField("value", "[C"); - } - - private long stringSize(Instance instance) { - // We include String content in size calculation. - return instance.getObjectSize() + - stringValueField.getValue(instance).getObjectSize(); - } - - public void visit(Instance str) { - count++; - size += stringSize(str); - } - - public void print() { - System.out.println(count + - " intern Strings occupying " + size + " bytes."); - } - } - - StringStat stat = new StringStat(); - StringTable strTable = VM.getVM().getStringTable(); - strTable.stringsDo(stat); - stat.print(); - } - private void printClassLoaderStatistics() { final PrintStream out = System.out; final PrintStream err = System.err; diff -r 3c7d67df8d07 -r f9a80a035a4a make/hotspot_version --- a/make/hotspot_version Thu Nov 10 06:23:48 2011 -0500 +++ b/make/hotspot_version Tue Nov 15 12:40:55 2011 -0500 @@ -35,7 +35,7 @@ HS_MAJOR_VER=23 HS_MINOR_VER=0 -HS_BUILD_NUMBER=05 +HS_BUILD_NUMBER=06 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/sparc/vm/assembler_sparc.inline.hpp --- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -597,6 +597,10 @@ inline void MacroAssembler::jmp( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, G0, rspec); } inline bool MacroAssembler::is_far_target(address d) { + if (ForceUnreachable) { + // References outside the code cache should be treated as far + return d < CodeCache::low_bound() || d > CodeCache::high_bound(); + } return !is_in_wdisp30_range(d, CodeCache::low_bound()) || !is_in_wdisp30_range(d, CodeCache::high_bound()); } @@ -679,28 +683,44 @@ inline void MacroAssembler::load_contents(const AddressLiteral& addrlit, Register d, int offset) { assert_not_delayed(); - sethi(addrlit, d); + if (ForceUnreachable) { + patchable_sethi(addrlit, d); + } else { + sethi(addrlit, d); + } ld(d, addrlit.low10() + offset, d); } inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) { assert_not_delayed(); - sethi(addrlit, d); + if (ForceUnreachable) { + patchable_sethi(addrlit, d); + } else { + sethi(addrlit, d); + } ld_ptr(d, addrlit.low10() + offset, d); } inline void MacroAssembler::store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) { assert_not_delayed(); - sethi(addrlit, temp); + if (ForceUnreachable) { + patchable_sethi(addrlit, temp); + } else { + sethi(addrlit, temp); + } st(s, temp, addrlit.low10() + offset); } inline void MacroAssembler::store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) { assert_not_delayed(); - sethi(addrlit, temp); + if (ForceUnreachable) { + patchable_sethi(addrlit, temp); + } else { + sethi(addrlit, temp); + } st_ptr(s, temp, addrlit.low10() + offset); } diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp --- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -367,10 +367,10 @@ void DeoptimizeStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); - __ call(SharedRuntime::deopt_blob()->unpack_with_reexecution()); + __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type); __ delayed()->nop(); ce->add_call_info_here(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1116,7 +1116,7 @@ } else { __ set(value_hi, O7); } - offset = store(tmp, base, addr->disp() + hi_word_offset_in_bytes, T_INT, wide, false); + store(tmp, base, addr->disp() + hi_word_offset_in_bytes, T_INT, wide, false); break; } case T_OBJECT: { diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/sparc/vm/c1_Runtime1_sparc.cpp --- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -766,7 +766,22 @@ __ ret(); __ delayed()->restore(); + } + break; + case deoptimize_id: + { + __ set_info("deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + AddressLiteral dest(deopt_blob->unpack_with_reexecution()); + __ jump_to(dest, O0); + __ delayed()->restore(); } break; diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/sparc/vm/methodHandles_sparc.cpp --- a/src/cpu/sparc/vm/methodHandles_sparc.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -352,6 +352,7 @@ BLOCK_COMMENT("load_stack_move {"); __ ldsw(G3_amh_conversion, stack_move_reg); __ sra(stack_move_reg, CONV_STACK_MOVE_SHIFT, stack_move_reg); +#ifdef ASSERT if (VerifyMethodHandles) { Label L_ok, L_bad; int32_t stack_move_limit = 0x0800; // extra-large @@ -363,6 +364,7 @@ __ stop("load_stack_move of garbage value"); __ BIND(L_ok); } +#endif BLOCK_COMMENT("} load_stack_move"); } diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/sparc/vm/methodHandles_sparc.hpp --- a/src/cpu/sparc/vm/methodHandles_sparc.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/sparc/vm/methodHandles_sparc.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -27,7 +27,7 @@ // Adapters enum /* platform_dependent_constants */ { - adapter_code_size = NOT_LP64(22000 DEBUG_ONLY(+ 40000)) LP64_ONLY(32000 DEBUG_ONLY(+ 80000)) + adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000)) }; public: diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/sparc/vm/sparc.ad Tue Nov 15 12:40:55 2011 -0500 @@ -1860,6 +1860,14 @@ // Threshold size for cleararray. const int Matcher::init_array_short_size = 8 * BytesPerLong; +// No additional cost for CMOVL. +const int Matcher::long_cmove_cost() { return 0; } + +// CMOVF/CMOVD are expensive on T4 and on SPARC64. +const int Matcher::float_cmove_cost() { + return (VM_Version::is_T4() || VM_Version::is_sparc64()) ? ConditionalMoveLimit : 0; +} + // Should the Matcher clone shifts on addressing modes, expecting them to // be subsumed into complex addressing expressions or compute them into // registers? True for Intel but false for most RISCs diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -211,7 +211,7 @@ #ifdef COMPILER2 // T4 and newer Sparc cpus have fast RDPC. if (has_fast_rdpc() && FLAG_IS_DEFAULT(UseRDPCForConstantTableBase)) { -// FLAG_SET_DEFAULT(UseRDPCForConstantTableBase, true); + FLAG_SET_DEFAULT(UseRDPCForConstantTableBase, true); } // Currently not supported anywhere. diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/c1_CodeStubs_x86.cpp --- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -387,9 +387,9 @@ void DeoptimizeStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); - __ call(RuntimeAddress(SharedRuntime::deopt_blob()->unpack_with_reexecution())); + __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); ce->add_call_info_here(_info); - debug_only(__ should_not_reach_here()); + DEBUG_ONLY(__ should_not_reach_here()); } diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/c1_Runtime1_x86.cpp --- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1447,7 +1447,22 @@ oop_maps = new OopMapSet(); oop_maps->add_gc_map(call_offset, map); restore_live_registers(sasm, save_fpu_registers); + } + break; + case deoptimize_id: + { + StubFrame f(sasm, "deoptimize", dont_gc_arguments); + const int num_rt_args = 1; // thread + OopMap* oop_map = save_live_registers(sasm, num_rt_args); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ leave(); + __ jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); } break; diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/frame_x86.cpp --- a/src/cpu/x86/vm/frame_x86.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/frame_x86.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -234,10 +234,12 @@ void frame::patch_pc(Thread* thread, address pc) { address* pc_addr = &(((address*) sp())[-1]); if (TracePcPatching) { - tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "] ", + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", pc_addr, *pc_addr, pc); } - assert(_pc == *pc_addr, err_msg("must be: " INTPTR_FORMAT " == " INTPTR_FORMAT, _pc, *pc_addr)); + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); *pc_addr = pc; _cb = CodeCache::find_blob(pc); address original_pc = nmethod::get_deopt_original_pc(this); diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/methodHandles_x86.cpp --- a/src/cpu/x86/vm/methodHandles_x86.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/methodHandles_x86.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -382,6 +382,7 @@ __ movslq(rdi_stack_move, rdi_stack_move); } #endif //_LP64 +#ifdef ASSERT if (VerifyMethodHandles) { Label L_ok, L_bad; int32_t stack_move_limit = 0x4000; // extra-large @@ -393,6 +394,7 @@ __ stop("load_stack_move of garbage value"); __ BIND(L_ok); } +#endif BLOCK_COMMENT("} load_stack_move"); } diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/methodHandles_x86.hpp --- a/src/cpu/x86/vm/methodHandles_x86.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/methodHandles_x86.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -27,7 +27,7 @@ // Adapters enum /* platform_dependent_constants */ { - adapter_code_size = NOT_LP64(30000 DEBUG_ONLY(+ 10000)) LP64_ONLY(80000 DEBUG_ONLY(+ 120000)) + adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 15000)) LP64_ONLY(32000 DEBUG_ONLY(+ 80000)) }; public: diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -2797,17 +2797,25 @@ // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) // Use rbp because the frames look interpreted now - __ set_last_Java_frame(noreg, rbp, NULL); - + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(noreg, rbp, the_pc); + + __ andptr(rsp, -(StackAlignmentInBytes)); // Fix stack alignment as required by ABI __ mov(c_rarg0, r15_thread); __ movl(c_rarg1, r14); // second arg: exec_mode __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); + // Revert SP alignment after call since we're going to do some SP relative addressing below + __ movptr(rsp, Address(r15_thread, JavaThread::last_Java_sp_offset())); // Set an oopmap for the call site - oop_maps->add_gc_map(__ pc() - start, + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, new OopMap( frame_size_in_words, 0 )); - __ reset_last_Java_frame(true, false); + // Clear fp AND pc + __ reset_last_Java_frame(true, true); // Collect return values __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes())); @@ -2968,7 +2976,10 @@ // Prolog // Use rbp because the frames look interpreted now - __ set_last_Java_frame(noreg, rbp, NULL); + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(noreg, rbp, the_pc); // Call C code. Need thread but NOT official VM entry // crud. We cannot block on this call, no GC can happen. Call should @@ -2977,14 +2988,17 @@ // // BasicType unpack_frames(JavaThread* thread, int exec_mode); + __ andptr(rsp, -(StackAlignmentInBytes)); // Align SP as required by ABI __ mov(c_rarg0, r15_thread); __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); // Set an oopmap for the call site - oop_maps->add_gc_map(__ pc() - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); - - __ reset_last_Java_frame(true, false); + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + + // Clear fp AND pc + __ reset_last_Java_frame(true, true); // Pop self-frame. __ leave(); // Epilog diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/templateInterpreter_x86_32.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1609,6 +1609,12 @@ // and sender_sp is fp+8 intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + interpreter_frame->interpreter_frame_set_locals(locals); BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); BasicObjectLock* monbot = montop - moncount; diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1622,6 +1622,12 @@ // sender_sp is fp+16 XXX intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + interpreter_frame->interpreter_frame_set_locals(locals); BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); BasicObjectLock* monbot = montop - moncount; diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/x86_32.ad Tue Nov 15 12:40:55 2011 -0500 @@ -1393,6 +1393,12 @@ // Threshold size for cleararray. const int Matcher::init_array_short_size = 8 * BytesPerLong; +// Needs 2 CMOV's for longs. +const int Matcher::long_cmove_cost() { return 1; } + +// No CMOVF/CMOVD with SSE/SSE2 +const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } + // Should the Matcher clone shifts on addressing modes, expecting them to // be subsumed into complex addressing expressions or compute them into // registers? True for Intel but false for most RISCs @@ -7905,6 +7911,40 @@ //----------Conditional Move--------------------------------------------------- // Conditional move +instruct jmovI_reg(cmpOp cop, eFlagsReg cr, eRegI dst, eRegI src) %{ + predicate(!VM_Version::supports_cmov() ); + match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); + ins_cost(200); + format %{ "J$cop,us skip\t# signed cmove\n\t" + "MOV $dst,$src\n" + "skip:" %} + ins_encode %{ + Label Lskip; + // Invert sense of branch from sense of CMOV + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); + __ movl($dst$$Register, $src$$Register); + __ bind(Lskip); + %} + ins_pipe( pipe_cmov_reg ); +%} + +instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src) %{ + predicate(!VM_Version::supports_cmov() ); + match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); + ins_cost(200); + format %{ "J$cop,us skip\t# unsigned cmove\n\t" + "MOV $dst,$src\n" + "skip:" %} + ins_encode %{ + Label Lskip; + // Invert sense of branch from sense of CMOV + __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); + __ movl($dst$$Register, $src$$Register); + __ bind(Lskip); + %} + ins_pipe( pipe_cmov_reg ); +%} + instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{ predicate(VM_Version::supports_cmov() ); match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); diff -r 3c7d67df8d07 -r f9a80a035a4a src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Thu Nov 10 06:23:48 2011 -0500 +++ b/src/cpu/x86/vm/x86_64.ad Tue Nov 15 12:40:55 2011 -0500 @@ -1993,6 +1993,12 @@ // Threshold size for cleararray. const int Matcher::init_array_short_size = 8 * BytesPerLong; +// No additional cost for CMOVL. +const int Matcher::long_cmove_cost() { return 0; } + +// No CMOVF/CMOVD with SSE2 +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } + // Should the Matcher clone shifts on addressing modes, expecting them // to be subsumed into complex addressing expressions or compute them // into registers? True for Intel but false for most RISCs diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/asm/codeBuffer.cpp --- a/src/share/vm/asm/codeBuffer.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/asm/codeBuffer.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -26,6 +26,7 @@ #include "asm/codeBuffer.hpp" #include "compiler/disassembler.hpp" #include "utilities/copy.hpp" +#include "utilities/xmlstream.hpp" // The structure of a CodeSection: // @@ -81,7 +82,7 @@ CodeBuffer::CodeBuffer(CodeBlob* blob) { initialize_misc("static buffer"); initialize(blob->content_begin(), blob->content_size()); - assert(verify_section_allocation(), "initial use of buffer OK"); + verify_section_allocation(); } void CodeBuffer::initialize(csize_t code_size, csize_t locs_size) { @@ -108,17 +109,18 @@ _insts.initialize_locs(locs_size / sizeof(relocInfo)); } - assert(verify_section_allocation(), "initial use of blob is OK"); + verify_section_allocation(); } CodeBuffer::~CodeBuffer() { + verify_section_allocation(); + // If we allocate our code buffer from the CodeCache // via a BufferBlob, and it's not permanent, then // free the BufferBlob. // The rest of the memory will be freed when the ResourceObj // is released. - assert(verify_section_allocation(), "final storage configuration still OK"); for (CodeBuffer* cb = this; cb != NULL; cb = cb->before_expand()) { // Previous incarnations of this buffer are held live, so that internal // addresses constructed before expansions will not be confused. @@ -484,7 +486,7 @@ // Done calculating sections; did it come out to the right end? assert(buf_offset == total_content_size(), "sanity"); - assert(dest->verify_section_allocation(), "final configuration works"); + dest->verify_section_allocation(); } csize_t CodeBuffer::total_offset_of(CodeSection* cs) const { @@ -632,7 +634,8 @@ // CodeBuffer gets the final layout (consts, insts, stubs in order of // ascending address). void CodeBuffer::relocate_code_to(CodeBuffer* dest) const { - DEBUG_ONLY(address dest_end = dest->_total_start + dest->_total_size); + address dest_end = dest->_total_start + dest->_total_size; + address dest_filled = NULL; for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) { // pull code out of each section const CodeSection* cs = code_section(n); @@ -654,6 +657,8 @@ Copy::fill_to_bytes(dest_cs->end(), dest_cs->remaining(), Assembler::code_fill_byte()); } + // Keep track of the highest filled address + dest_filled = MAX2(dest_filled, dest_cs->end() + dest_cs->remaining()); assert(cs->locs_start() != (relocInfo*)badAddress, "this section carries no reloc storage, but reloc was attempted"); @@ -668,6 +673,14 @@ } } } + + if (dest->blob() == NULL) { + // Destination is a final resting place, not just another buffer. + // Normalize uninitialized bytes in the final padding. + Copy::fill_to_bytes(dest_filled, dest_end - dest_filled, + Assembler::code_fill_byte()); + + } } csize_t CodeBuffer::figure_expanded_capacities(CodeSection* which_cs, @@ -799,7 +812,7 @@ _decode_begin = NULL; // sanity // Make certain that the new sections are all snugly inside the new blob. - assert(verify_section_allocation(), "expanded allocation is ship-shape"); + verify_section_allocation(); #ifndef PRODUCT if (PrintNMethods && (WizardMode || Verbose)) { @@ -828,35 +841,48 @@ DEBUG_ONLY(cb->_blob = (BufferBlob*)badAddress); } -#ifdef ASSERT -bool CodeBuffer::verify_section_allocation() { +void CodeBuffer::verify_section_allocation() { address tstart = _total_start; - if (tstart == badAddress) return true; // smashed by set_blob(NULL) + if (tstart == badAddress) return; // smashed by set_blob(NULL) address tend = tstart + _total_size; if (_blob != NULL) { - assert(tstart >= _blob->content_begin(), "sanity"); - assert(tend <= _blob->content_end(), "sanity"); + + guarantee(tstart >= _blob->content_begin(), "sanity"); + guarantee(tend <= _blob->content_end(), "sanity"); } // Verify disjointness. for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) { CodeSection* sect = code_section(n); if (!sect->is_allocated() || sect->is_empty()) continue; - assert((intptr_t)sect->start() % sect->alignment() == 0 + guarantee((intptr_t)sect->start() % sect->alignment() == 0 || sect->is_empty() || _blob == NULL, "start is aligned"); for (int m = (int) SECT_FIRST; m < (int) SECT_LIMIT; m++) { CodeSection* other = code_section(m); if (!other->is_allocated() || other == sect) continue; - assert(!other->contains(sect->start() ), "sanity"); + guarantee(!other->contains(sect->start() ), "sanity"); // limit is an exclusive address and can be the start of another // section. - assert(!other->contains(sect->limit() - 1), "sanity"); + guarantee(!other->contains(sect->limit() - 1), "sanity"); } - assert(sect->end() <= tend, "sanity"); + guarantee(sect->end() <= tend, "sanity"); + guarantee(sect->end() <= sect->limit(), "sanity"); } - return true; } -#endif //ASSERT + +void CodeBuffer::log_section_sizes(const char* name) { + if (xtty != NULL) { + // log info about buffer usage + xtty->print_cr("", name, _total_size); + for (int n = (int) CodeBuffer::SECT_FIRST; n < (int) CodeBuffer::SECT_LIMIT; n++) { + CodeSection* sect = code_section(n); + if (!sect->is_allocated() || sect->is_empty()) continue; + xtty->print_cr("", + n, sect->limit() - sect->start(), sect->limit() - sect->end()); + } + xtty->print_cr(""); + } +} #ifndef PRODUCT @@ -884,7 +910,6 @@ _comments.add_comment(offset, comment); } - class CodeComment: public CHeapObj { private: friend class CodeComments; diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/asm/codeBuffer.hpp --- a/src/share/vm/asm/codeBuffer.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/asm/codeBuffer.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -362,10 +362,8 @@ // helper for CodeBuffer::expand() void take_over_code_from(CodeBuffer* cs); -#ifdef ASSERT // ensure sections are disjoint, ordered, and contained in the blob - bool verify_section_allocation(); -#endif + void verify_section_allocation(); // copies combined relocations to the blob, returns bytes copied // (if target is null, it is a dry run only, just for sizing) @@ -393,7 +391,7 @@ assert(code_start != NULL, "sanity"); initialize_misc("static buffer"); initialize(code_start, code_size); - assert(verify_section_allocation(), "initial use of buffer OK"); + verify_section_allocation(); } // (2) CodeBuffer referring to pre-allocated CodeBlob. @@ -545,6 +543,9 @@ void block_comment(intptr_t offset, const char * comment) PRODUCT_RETURN; + // Log a little info about section usage in the CodeBuffer + void log_section_sizes(const char* name); + #ifndef PRODUCT public: // Printing / Decoding diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/c1/c1_Canonicalizer.hpp --- a/src/share/vm/c1/c1_Canonicalizer.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/c1/c1_Canonicalizer.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -51,6 +51,7 @@ public: Canonicalizer(Compilation* c, Value x, int bci) : _compilation(c), _canonical(x), _bci(bci) { + NOT_PRODUCT(x->set_printable_bci(bci)); if (CanonicalizeNodes) x->visit(this); } Value canonical() const { return _canonical; } diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/c1/c1_Runtime1.cpp --- a/src/share/vm/c1/c1_Runtime1.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/c1/c1_Runtime1.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -681,6 +681,23 @@ } JRT_END +// Cf. OptoRuntime::deoptimize_caller_frame +JRT_ENTRY(void, Runtime1::deoptimize(JavaThread* thread)) + // Called from within the owner thread, so no need for safepoint + RegisterMap reg_map(thread, false); + frame stub_frame = thread->last_frame(); + assert(stub_frame.is_runtime_frame(), "sanity check"); + frame caller_frame = stub_frame.sender(®_map); + + // We are coming from a compiled method; check this is true. + assert(CodeCache::find_nmethod(caller_frame.pc()) != NULL, "sanity"); + + // Deoptimize the caller frame. + Deoptimization::deoptimize_frame(thread, caller_frame.id()); + + // Return to the now deoptimized frame. +JRT_END + static klassOop resolve_field_return_klass(methodHandle caller, int bci, TRAPS) { Bytecode_field field_access(caller, bci); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/c1/c1_Runtime1.hpp --- a/src/share/vm/c1/c1_Runtime1.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/c1/c1_Runtime1.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -63,6 +63,7 @@ stub(monitorenter_nofpu) /* optimized version that does not preserve fpu registers */ \ stub(monitorexit) \ stub(monitorexit_nofpu) /* optimized version that does not preserve fpu registers */ \ + stub(deoptimize) \ stub(access_field_patching) \ stub(load_klass_patching) \ stub(g1_pre_barrier_slow) \ @@ -152,6 +153,8 @@ static void monitorenter(JavaThread* thread, oopDesc* obj, BasicObjectLock* lock); static void monitorexit (JavaThread* thread, BasicObjectLock* lock); + static void deoptimize(JavaThread* thread); + static int access_field_patching(JavaThread* thread); static int move_klass_patching(JavaThread* thread); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/ci/ciMethodHandle.cpp --- a/src/share/vm/ci/ciMethodHandle.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/ci/ciMethodHandle.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -86,12 +86,12 @@ } -#ifndef PRODUCT +#ifdef ASSERT // ------------------------------------------------------------------ // ciMethodHandle::print_chain_impl // // Implementation of the print method. -void ciMethodHandle::print_chain_impl(outputStream* st) { +void ciMethodHandle::print_chain_impl() { ASSERT_IN_VM; MethodHandleChain::print(get_oop()); } @@ -101,7 +101,7 @@ // ciMethodHandle::print_chain // // Implementation of the print_chain method. -void ciMethodHandle::print_chain(outputStream* st) { - GUARDED_VM_ENTRY(print_chain_impl(st);); +void ciMethodHandle::print_chain() { + GUARDED_VM_ENTRY(print_chain_impl();); } #endif diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/ci/ciMethodHandle.hpp --- a/src/share/vm/ci/ciMethodHandle.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/ci/ciMethodHandle.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -45,7 +45,7 @@ ciMethod* get_adapter( bool is_invokedynamic); protected: - void print_chain_impl(outputStream* st) PRODUCT_RETURN; + void print_chain_impl() NOT_DEBUG_RETURN; public: ciMethodHandle(instanceHandle h_i) : @@ -79,7 +79,7 @@ return _invokedynamic_adapter; } - void print_chain(outputStream* st = tty) PRODUCT_RETURN; + void print_chain() NOT_DEBUG_RETURN; }; #endif // SHARE_VM_CI_CIMETHODHANDLE_HPP diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/code/dependencies.cpp --- a/src/share/vm/code/dependencies.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/code/dependencies.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -763,9 +763,14 @@ // Method m is inherited into ctxk. return true; if (lm != NULL) { - if (!(lm->is_public() || lm->is_protected())) + if (!(lm->is_public() || lm->is_protected())) { // Method is [package-]private, so the override story is complex. return true; // Must punt the assertion to true. + } + if (lm->is_static()) { + // Static methods don't override non-static so punt + return true; + } if ( !Dependencies::is_concrete_method(lm) && !Dependencies::is_concrete_method(m) && Klass::cast(lm->method_holder())->is_subtype_of(m->method_holder())) @@ -1091,9 +1096,11 @@ } bool Dependencies::is_concrete_method(methodOop m) { - if (m->is_abstract()) return false; - // %%% We could treat unexecuted methods as virtually abstract also. - // This would require a deoptimization barrier on first execution. + // Statics are irrelevant to virtual call sites. + if (m->is_static()) return false; + + // We could also return false if m does not yet appear to be + // executed, if the VM version supports this distinction also. return !m->is_abstract(); } @@ -1113,7 +1120,7 @@ bool Dependencies::is_concrete_klass(ciInstanceKlass* k) { if (k->is_abstract()) return false; - // We could return also false if k does not yet appear to be + // We could also return false if k does not yet appear to be // instantiated, if the VM version supports this distinction also. //if (k->is_not_instantiated()) return false; return true; @@ -1123,7 +1130,7 @@ // Statics are irrelevant to virtual call sites. if (m->is_static()) return false; - // We could return also false if m does not yet appear to be + // We could also return false if m does not yet appear to be // executed, if the VM version supports this distinction also. return !m->is_abstract(); } diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/compiler/compileBroker.cpp --- a/src/share/vm/compiler/compileBroker.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/compiler/compileBroker.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1722,11 +1722,11 @@ if (PrintCompilation) { const char* reason = ci_env.failure_reason(); if (compilable == ciEnv::MethodCompilable_not_at_tier) { - tty->print_cr("%3d COMPILE SKIPPED: %s (retry at different tier)", compile_id, reason); + tty->print_cr("%4d COMPILE SKIPPED: %s (retry at different tier)", compile_id, reason); } else if (compilable == ciEnv::MethodCompilable_never) { - tty->print_cr("%3d COMPILE SKIPPED: %s (not retryable)", compile_id, reason); + tty->print_cr("%4d COMPILE SKIPPED: %s (not retryable)", compile_id, reason); } else if (compilable == ciEnv::MethodCompilable) { - tty->print_cr("%3d COMPILE SKIPPED: %s", compile_id, reason); + tty->print_cr("%4d COMPILE SKIPPED: %s", compile_id, reason); } } } else { @@ -1743,6 +1743,14 @@ collect_statistics(thread, time, task); + if (PrintCompilation && PrintCompilation2) { + tty->print("%7d ", (int) tty->time_stamp().milliseconds()); // print timestamp + tty->print("%4d ", compile_id); // print compilation number + tty->print("%s ", (is_osr ? "%" : " ")); + int code_size = (task->code() == NULL) ? 0 : task->code()->total_size(); + tty->print_cr("size: %d time: %d inlined: %d bytes", code_size, time.milliseconds(), task->num_inlined_bytecodes()); + } + if (compilable == ciEnv::MethodCompilable_never) { if (is_osr) { method->set_not_osr_compilable(); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/interpreter/bytecode.hpp --- a/src/share/vm/interpreter/bytecode.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/interpreter/bytecode.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -234,6 +234,13 @@ is_invokespecial() || is_invokedynamic(); } + bool is_method_handle_invoke() const { + return (is_invokedynamic() || + (is_invokevirtual() && + method()->constants()->klass_ref_at_noresolve(index()) == vmSymbols::java_lang_invoke_MethodHandle() && + methodOopDesc::is_method_handle_invoke_name(name()))); + } + // Helper to skip verification. Used is_valid() to check if the result is really an invoke inline friend Bytecode_invoke Bytecode_invoke_check(methodHandle method, int bci); }; diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/interpreter/bytecodeTracer.cpp --- a/src/share/vm/interpreter/bytecodeTracer.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/interpreter/bytecodeTracer.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -241,7 +241,7 @@ st->print_cr(" not secondary entry?", i); return false; } - i = cache->entry_at(i)->main_entry_index(); + i = cache->entry_at(i)->main_entry_index() + constantPoolOopDesc::CPCACHE_INDEX_TAG; goto check_cache_index; } else { st->print_cr(" not in cache[*]?", i); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/interpreter/interpreterRuntime.cpp --- a/src/share/vm/interpreter/interpreterRuntime.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/interpreter/interpreterRuntime.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -549,8 +549,8 @@ if (is_put && !is_static && klass->is_subclass_of(SystemDictionary::CallSite_klass()) && (info.name() == vmSymbols::target_name())) { const jint direction = frame::interpreter_frame_expression_stack_direction(); - oop call_site = *((oop*) thread->last_frame().interpreter_frame_tos_at(-1 * direction)); - oop method_handle = *((oop*) thread->last_frame().interpreter_frame_tos_at( 0 * direction)); + Handle call_site (THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at(-1 * direction))); + Handle method_handle(THREAD, *((oop*) thread->last_frame().interpreter_frame_tos_at( 0 * direction))); assert(call_site ->is_a(SystemDictionary::CallSite_klass()), "must be"); assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be"); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/libadt/vectset.cpp --- a/src/share/vm/libadt/vectset.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/libadt/vectset.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -350,6 +350,21 @@ return (int)_xor; } +//------------------------------iterate---------------------------------------- +// Used by Set::print(). +class VSetI_ : public SetI_ { + VectorSetI vsi; +public: + VSetI_( const VectorSet *vset, uint &elem ) : vsi(vset) { elem = vsi.elem; } + + uint next(void) { ++vsi; return vsi.elem; } + int test(void) { return vsi.test(); } +}; + +SetI_ *VectorSet::iterate(uint &elem) const { + return new(ResourceObj::C_HEAP) VSetI_(this, elem); +} + //============================================================================= //------------------------------next------------------------------------------- // Find and return the next element of a vector set, or return garbage and diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/libadt/vectset.hpp --- a/src/share/vm/libadt/vectset.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/libadt/vectset.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -151,7 +151,7 @@ private: - SetI_ *iterate(uint&) const { ShouldNotCallThis(); return NULL; } // Removed + SetI_ *iterate(uint&) const; }; //------------------------------Iteration-------------------------------------- diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/oops/constantPoolKlass.cpp --- a/src/share/vm/oops/constantPoolKlass.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/oops/constantPoolKlass.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -532,7 +532,7 @@ if (cp->tag_at(i).is_unresolved_klass()) { // This will force loading of the class klassOop klass = cp->klass_at(i, CHECK); - if (klass->is_instance()) { + if (klass->klass_part()->oop_is_instance()) { // Force initialization of class instanceKlass::cast(klass)->initialize(CHECK); } diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/addnode.cpp --- a/src/share/vm/opto/addnode.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/addnode.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -34,8 +34,6 @@ // Portions of code courtesy of Clifford Click -#define MAXFLOAT ((float)3.40282346638528860e+38) - // Classic Add functionality. This covers all the usual 'add' behaviors for // an algebraic ring. Add-integer, add-float, add-double, and binary-or are // all inherited from this class. The various identity values are supplied diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/c2_globals.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -201,7 +201,7 @@ diagnostic(bool, UnrollLimitCheck, true, \ "Additional overflow checks during loop unroll") \ \ - product(bool, OptimizeFill, false, \ + product(bool, OptimizeFill, true, \ "convert fill/copy loops into intrinsic") \ \ develop(bool, TraceOptimizeFill, false, \ @@ -459,7 +459,7 @@ product(bool, UseOptoBiasInlining, true, \ "Generate biased locking code in C2 ideal graph") \ \ - product(bool, OptimizeStringConcat, false, \ + product(bool, OptimizeStringConcat, true, \ "Optimize the construction of Strings by StringBuilder") \ \ notproduct(bool, PrintOptimizeStringConcat, false, \ diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/callGenerator.cpp --- a/src/share/vm/opto/callGenerator.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/callGenerator.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -775,15 +775,15 @@ Node* bol = NULL; int bc = jvms->method()->java_code_at_bci(jvms->bci()); - if (bc == Bytecodes::_invokespecial) { - // This is the selectAlternative idiom for guardWithTest + if (bc != Bytecodes::_invokedynamic) { + // This is the selectAlternative idiom for guardWithTest or + // similar idioms. Node* receiver = kit.argument(0); // Check if the MethodHandle is the expected one Node* cmp = gvn.transform(new(kit.C, 3) CmpPNode(receiver, predicted_mh)); bol = gvn.transform(new(kit.C, 2) BoolNode(cmp, BoolTest::eq) ); } else { - assert(bc == Bytecodes::_invokedynamic, "must be"); // Get the constant pool cache from the caller class. ciMethod* caller_method = jvms->method(); ciBytecodeStream str(caller_method); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/compile.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -346,15 +346,15 @@ // Disconnect all useless nodes by disconnecting those at the boundary. void Compile::remove_useless_nodes(Unique_Node_List &useful) { uint next = 0; - while( next < useful.size() ) { + while (next < useful.size()) { Node *n = useful.at(next++); // Use raw traversal of out edges since this code removes out edges int max = n->outcnt(); - for (int j = 0; j < max; ++j ) { + for (int j = 0; j < max; ++j) { Node* child = n->raw_out(j); - if( ! useful.member(child) ) { - assert( !child->is_top() || child != top(), - "If top is cached in Compile object it is in useful list"); + if (! useful.member(child)) { + assert(!child->is_top() || child != top(), + "If top is cached in Compile object it is in useful list"); // Only need to remove this out-edge to the useless node n->raw_del_out(j); --j; @@ -362,7 +362,14 @@ } } if (n->outcnt() == 1 && n->has_special_unique_user()) { - record_for_igvn( n->unique_out() ); + record_for_igvn(n->unique_out()); + } + } + // Remove useless macro and predicate opaq nodes + for (int i = C->macro_count()-1; i >= 0; i--) { + Node* n = C->macro_node(i); + if (!useful.member(n)) { + remove_macro_node(n); } } debug_only(verify_graph_edges(true/*check for no_dead_code*/);) @@ -719,6 +726,7 @@ while (_late_inlines.length() > 0) { CallGenerator* cg = _late_inlines.pop(); cg->do_late_inline(); + if (failing()) return; } } assert(_late_inlines.length() == 0, "should have been processed"); @@ -1691,13 +1699,20 @@ // Perform escape analysis if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) { + if (has_loops()) { + // Cleanup graph (remove dead nodes). + TracePhase t2("idealLoop", &_t_idealLoop, true); + PhaseIdealLoop ideal_loop( igvn, false, true ); + if (major_progress()) print_method("PhaseIdealLoop before EA", 2); + if (failing()) return; + } TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, true); ConnectionGraph::do_analysis(this, &igvn); if (failing()) return; igvn.optimize(); - print_method("Iter GVN 3", 2); + print_method("Iter GVN after EA", 2); if (failing()) return; diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/escape.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -108,14 +108,16 @@ // Add ConP(#NULL) and ConN(#NULL) nodes. Node* oop_null = igvn->zerocon(T_OBJECT); _oop_null = oop_null->_idx; - assert(_oop_null < C->unique(), "should be created already"); + assert(_oop_null < nodes_size(), "should be created already"); add_node(oop_null, PointsToNode::JavaObject, PointsToNode::NoEscape, true); if (UseCompressedOops) { Node* noop_null = igvn->zerocon(T_NARROWOOP); _noop_null = noop_null->_idx; - assert(_noop_null < C->unique(), "should be created already"); + assert(_noop_null < nodes_size(), "should be created already"); add_node(noop_null, PointsToNode::JavaObject, PointsToNode::NoEscape, true); + } else { + _noop_null = _oop_null; // Should be initialized } } @@ -174,6 +176,9 @@ } void ConnectionGraph::set_escape_state(uint ni, PointsToNode::EscapeState es) { + // Don't change non-escaping state of NULL pointer. + if (ni == _noop_null || ni == _oop_null) + return; PointsToNode *npt = ptnode_adr(ni); PointsToNode::EscapeState old_es = npt->escape_state(); if (es > old_es) @@ -231,8 +236,8 @@ } if (orig_es != es) { // cache the computed escape state - assert(es != PointsToNode::UnknownEscape, "should have computed an escape state"); - ptnode_adr(idx)->set_escape_state(es); + assert(es > orig_es, "should have computed an escape state"); + set_escape_state(idx, es); } // orig_es could be PointsToNode::UnknownEscape return es; } @@ -334,7 +339,7 @@ add_pointsto_edge(ni, etgt); if(etgt == _phantom_object) { // Special case - field set outside (globally escaping). - ptn->set_escape_state(PointsToNode::GlobalEscape); + set_escape_state(ni, PointsToNode::GlobalEscape); } } else if (et == PointsToNode::DeferredEdge) { deferred_edges->append(etgt); @@ -373,16 +378,17 @@ // whose offset matches "offset". void ConnectionGraph::add_deferred_edge_to_fields(uint from_i, uint adr_i, int offs) { PointsToNode* an = ptnode_adr(adr_i); + bool is_alloc = an->_node->is_Allocate(); for (uint fe = 0; fe < an->edge_count(); fe++) { assert(an->edge_type(fe) == PointsToNode::FieldEdge, "expecting a field edge"); int fi = an->edge_target(fe); PointsToNode* pf = ptnode_adr(fi); - int po = pf->offset(); - if (pf->edge_count() == 0) { - // we have not seen any stores to this field, assume it was set outside this method + int offset = pf->offset(); + if (!is_alloc) { + // Assume the field was set outside this method if it is not Allocation add_pointsto_edge(fi, _phantom_object); } - if (po == offs || po == Type::OffsetBot || offs == Type::OffsetBot) { + if (offset == offs || offset == Type::OffsetBot || offs == Type::OffsetBot) { add_deferred_edge(from_i, fi); } } @@ -1036,7 +1042,7 @@ PointsToNode::EscapeState es = escape_state(alloc); // We have an allocation or call which returns a Java object, // see if it is unescaped. - if (es != PointsToNode::NoEscape || !ptn->_scalar_replaceable) + if (es != PointsToNode::NoEscape || !ptn->scalar_replaceable()) continue; // Find CheckCastPP for the allocate or for the return value of a call @@ -1085,7 +1091,7 @@ // so it could be eliminated. alloc->as_Allocate()->_is_scalar_replaceable = true; } - set_escape_state(n->_idx, es); + set_escape_state(n->_idx, es); // CheckCastPP escape state // in order for an object to be scalar-replaceable, it must be: // - a direct allocation (not a call returning an object) // - non-escaping @@ -1097,15 +1103,14 @@ set_map(n->_idx, alloc); const TypeOopPtr *t = igvn->type(n)->isa_oopptr(); if (t == NULL) - continue; // not a TypeInstPtr + continue; // not a TypeOopPtr tinst = t->cast_to_exactness(true)->is_oopptr()->cast_to_instance_id(ni); igvn->hash_delete(n); igvn->set_type(n, tinst); n->raise_bottom_type(tinst); igvn->hash_insert(n); record_for_optimizer(n); - if (alloc->is_Allocate() && ptn->_scalar_replaceable && - (t->isa_instptr() || t->isa_aryptr())) { + if (alloc->is_Allocate() && (t->isa_instptr() || t->isa_aryptr())) { // First, put on the worklist all Field edges from Connection Graph // which is more accurate then putting immediate users from Ideal Graph. @@ -1533,7 +1538,8 @@ worklist_init.push(C->root()); } - GrowableArray cg_worklist; + GrowableArray alloc_worklist; + GrowableArray addp_worklist; PhaseGVN* igvn = _igvn; bool has_allocations = false; @@ -1546,11 +1552,13 @@ if (n->is_Allocate() || n->is_CallStaticJava() && ptnode_adr(n->_idx)->node_type() == PointsToNode::JavaObject) { has_allocations = true; + if (n->is_Allocate()) + alloc_worklist.append(n); } if(n->is_AddP()) { // Collect address nodes. Use them during stage 3 below // to build initial connection graph field edges. - cg_worklist.append(n->_idx); + addp_worklist.append(n); } else if (n->is_MergeMem()) { // Collect all MergeMem nodes to add memory slices for // scalar replaceable objects in split_unique_types(). @@ -1576,10 +1584,9 @@ // 3. Pass to create initial fields edges (JavaObject -F-> AddP) // to reduce number of iterations during stage 4 below. - uint cg_length = cg_worklist.length(); - for( uint next = 0; next < cg_length; ++next ) { - int ni = cg_worklist.at(next); - Node* n = ptnode_adr(ni)->_node; + uint addp_length = addp_worklist.length(); + for( uint next = 0; next < addp_length; ++next ) { + Node* n = addp_worklist.at(next); Node* base = get_addp_base(n); if (base->is_Proj()) base = base->in(0); @@ -1589,7 +1596,7 @@ } } - cg_worklist.clear(); + GrowableArray cg_worklist; cg_worklist.append(_phantom_object); GrowableArray worklist; @@ -1648,73 +1655,44 @@ Arena* arena = Thread::current()->resource_area(); VectorSet visited(arena); + + // 5. Find fields initializing values for not escaped allocations + uint alloc_length = alloc_worklist.length(); + for (uint next = 0; next < alloc_length; ++next) { + Node* n = alloc_worklist.at(next); + if (ptnode_adr(n->_idx)->escape_state() == PointsToNode::NoEscape) { + find_init_values(n, &visited, igvn); + } + } + worklist.clear(); - // 5. Remove deferred edges from the graph and adjust - // escape state of nonescaping objects. - cg_length = cg_worklist.length(); - for( uint next = 0; next < cg_length; ++next ) { + // 6. Remove deferred edges from the graph. + uint cg_length = cg_worklist.length(); + for (uint next = 0; next < cg_length; ++next) { int ni = cg_worklist.at(next); PointsToNode* ptn = ptnode_adr(ni); PointsToNode::NodeType nt = ptn->node_type(); if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) { remove_deferred(ni, &worklist, &visited); Node *n = ptn->_node; - if (n->is_AddP()) { - // Search for objects which are not scalar replaceable - // and adjust their escape state. - adjust_escape_state(ni, igvn); - } } } - // 6. Propagate escape states. - worklist.clear(); - bool has_non_escaping_obj = false; - - // push all GlobalEscape nodes on the worklist - for( uint next = 0; next < cg_length; ++next ) { - int nk = cg_worklist.at(next); - if (ptnode_adr(nk)->escape_state() == PointsToNode::GlobalEscape) - worklist.push(nk); - } - // mark all nodes reachable from GlobalEscape nodes - while(worklist.length() > 0) { - PointsToNode* ptn = ptnode_adr(worklist.pop()); - uint e_cnt = ptn->edge_count(); - for (uint ei = 0; ei < e_cnt; ei++) { - uint npi = ptn->edge_target(ei); - PointsToNode *np = ptnode_adr(npi); - if (np->escape_state() < PointsToNode::GlobalEscape) { - np->set_escape_state(PointsToNode::GlobalEscape); - worklist.push(npi); - } - } + // 7. Adjust escape state of nonescaping objects. + for (uint next = 0; next < addp_length; ++next) { + Node* n = addp_worklist.at(next); + adjust_escape_state(n); } - // push all ArgEscape nodes on the worklist - for( uint next = 0; next < cg_length; ++next ) { - int nk = cg_worklist.at(next); - if (ptnode_adr(nk)->escape_state() == PointsToNode::ArgEscape) - worklist.push(nk); - } + // 8. Propagate escape states. + worklist.clear(); + + // mark all nodes reachable from GlobalEscape nodes + (void)propagate_escape_state(&cg_worklist, &worklist, PointsToNode::GlobalEscape); + // mark all nodes reachable from ArgEscape nodes - while(worklist.length() > 0) { - PointsToNode* ptn = ptnode_adr(worklist.pop()); - if (ptn->node_type() == PointsToNode::JavaObject) - has_non_escaping_obj = true; // Non GlobalEscape - uint e_cnt = ptn->edge_count(); - for (uint ei = 0; ei < e_cnt; ei++) { - uint npi = ptn->edge_target(ei); - PointsToNode *np = ptnode_adr(npi); - if (np->escape_state() < PointsToNode::ArgEscape) { - np->set_escape_state(PointsToNode::ArgEscape); - worklist.push(npi); - } - } - } - - GrowableArray alloc_worklist; + bool has_non_escaping_obj = propagate_escape_state(&cg_worklist, &worklist, PointsToNode::ArgEscape); // push all NoEscape nodes on the worklist for( uint next = 0; next < cg_length; ++next ) { @@ -1722,15 +1700,20 @@ if (ptnode_adr(nk)->escape_state() == PointsToNode::NoEscape) worklist.push(nk); } + alloc_worklist.clear(); // mark all nodes reachable from NoEscape nodes while(worklist.length() > 0) { - PointsToNode* ptn = ptnode_adr(worklist.pop()); - if (ptn->node_type() == PointsToNode::JavaObject) - has_non_escaping_obj = true; // Non GlobalEscape + uint nk = worklist.pop(); + PointsToNode* ptn = ptnode_adr(nk); + if (ptn->node_type() == PointsToNode::JavaObject && + !(nk == _noop_null || nk == _oop_null)) + has_non_escaping_obj = true; // Non Escape Node* n = ptn->_node; - if (n->is_Allocate() && ptn->_scalar_replaceable ) { + bool scalar_replaceable = ptn->scalar_replaceable(); + if (n->is_Allocate() && scalar_replaceable) { // Push scalar replaceable allocations on alloc_worklist - // for processing in split_unique_types(). + // for processing in split_unique_types(). Note, + // following code may change scalar_replaceable value. alloc_worklist.append(n); } uint e_cnt = ptn->edge_count(); @@ -1738,7 +1721,14 @@ uint npi = ptn->edge_target(ei); PointsToNode *np = ptnode_adr(npi); if (np->escape_state() < PointsToNode::NoEscape) { - np->set_escape_state(PointsToNode::NoEscape); + set_escape_state(npi, PointsToNode::NoEscape); + if (!scalar_replaceable) { + np->set_scalar_replaceable(false); + } + worklist.push(npi); + } else if (np->scalar_replaceable() && !scalar_replaceable) { + // Propagate scalar_replaceable value. + np->set_scalar_replaceable(false); worklist.push(npi); } } @@ -1747,7 +1737,12 @@ _collecting = false; assert(C->unique() == nodes_size(), "there should be no new ideal nodes during ConnectionGraph build"); - if (EliminateLocks) { + assert(ptnode_adr(_oop_null)->escape_state() == PointsToNode::NoEscape, "sanity"); + if (UseCompressedOops) { + assert(ptnode_adr(_noop_null)->escape_state() == PointsToNode::NoEscape, "sanity"); + } + + if (EliminateLocks && has_non_escaping_obj) { // Mark locks before changing ideal graph. int cnt = C->macro_count(); for( int i=0; i < cnt; i++ ) { @@ -1772,7 +1767,18 @@ } #endif - bool has_scalar_replaceable_candidates = alloc_worklist.length() > 0; + bool has_scalar_replaceable_candidates = false; + alloc_length = alloc_worklist.length(); + for (uint next = 0; next < alloc_length; ++next) { + Node* n = alloc_worklist.at(next); + PointsToNode* ptn = ptnode_adr(n->_idx); + assert(ptn->escape_state() == PointsToNode::NoEscape, "sanity"); + if (ptn->scalar_replaceable()) { + has_scalar_replaceable_candidates = true; + break; + } + } + if ( has_scalar_replaceable_candidates && C->AliasLevel() >= 3 && EliminateAllocations ) { @@ -1801,53 +1807,32 @@ return has_non_escaping_obj; } -// Adjust escape state after Connection Graph is built. -void ConnectionGraph::adjust_escape_state(int nidx, PhaseTransform* phase) { - PointsToNode* ptn = ptnode_adr(nidx); - Node* n = ptn->_node; - assert(n->is_AddP(), "Should be called for AddP nodes only"); - // Search for objects which are not scalar replaceable. - // Mark their escape state as ArgEscape to propagate the state - // to referenced objects. - // Note: currently there are no difference in compiler optimizations - // for ArgEscape objects and NoEscape objects which are not - // scalar replaceable. +// Find fields initializing values for allocations. +void ConnectionGraph::find_init_values(Node* alloc, VectorSet* visited, PhaseTransform* phase) { + assert(alloc->is_Allocate(), "Should be called for Allocate nodes only"); + PointsToNode* pta = ptnode_adr(alloc->_idx); + assert(pta->escape_state() == PointsToNode::NoEscape, "Not escaped Allocate nodes only"); + InitializeNode* ini = alloc->as_Allocate()->initialization(); Compile* C = _compile; - - int offset = ptn->offset(); - Node* base = get_addp_base(n); - VectorSet* ptset = PointsTo(base); - int ptset_size = ptset->Size(); - + visited->Reset(); // Check if a oop field's initializing value is recorded and add // a corresponding NULL field's value if it is not recorded. // Connection Graph does not record a default initialization by NULL // captured by Initialize node. // - // Note: it will disable scalar replacement in some cases: - // - // Point p[] = new Point[1]; - // p[0] = new Point(); // Will be not scalar replaced - // - // but it will save us from incorrect optimizations in next cases: - // - // Point p[] = new Point[1]; - // if ( x ) p[0] = new Point(); // Will be not scalar replaced - // - // Do a simple control flow analysis to distinguish above cases. - // - if (offset != Type::OffsetBot && ptset_size == 1) { - uint elem = ptset->getelem(); // Allocation node's index - // It does not matter if it is not Allocation node since - // only non-escaping allocations are scalar replaced. - if (ptnode_adr(elem)->_node->is_Allocate() && - ptnode_adr(elem)->escape_state() == PointsToNode::NoEscape) { - AllocateNode* alloc = ptnode_adr(elem)->_node->as_Allocate(); - InitializeNode* ini = alloc->initialization(); + uint ae_cnt = pta->edge_count(); + for (uint ei = 0; ei < ae_cnt; ei++) { + uint nidx = pta->edge_target(ei); // Field (AddP) + PointsToNode* ptn = ptnode_adr(nidx); + assert(ptn->_node->is_AddP(), "Should be AddP nodes only"); + int offset = ptn->offset(); + if (offset != Type::OffsetBot && + offset != oopDesc::klass_offset_in_bytes() && + !visited->test_set(offset)) { // Check only oop fields. - const Type* adr_type = n->as_AddP()->bottom_type(); + const Type* adr_type = ptn->_node->as_AddP()->bottom_type(); BasicType basic_field_type = T_INT; if (adr_type->isa_instptr()) { ciField* field = C->alias_type(adr_type->isa_instptr())->field(); @@ -1857,12 +1842,20 @@ // Ignore non field load (for example, klass load) } } else if (adr_type->isa_aryptr()) { - const Type* elemtype = adr_type->isa_aryptr()->elem(); - basic_field_type = elemtype->array_element_basic_type(); + if (offset != arrayOopDesc::length_offset_in_bytes()) { + const Type* elemtype = adr_type->isa_aryptr()->elem(); + basic_field_type = elemtype->array_element_basic_type(); + } else { + // Ignore array length load + } +#ifdef ASSERT } else { - // Raw pointers are used for initializing stores so skip it. + // Raw pointers are used for initializing stores so skip it + // since it should be recorded already + Node* base = get_addp_base(ptn->_node); assert(adr_type->isa_rawptr() && base->is_Proj() && (base->in(0) == alloc),"unexpected pointer type"); +#endif } if (basic_field_type == T_OBJECT || basic_field_type == T_NARROWOOP || @@ -1877,18 +1870,33 @@ // Check for a store which follows allocation without branches. // For example, a volatile field store is not collected // by Initialize node. TODO: it would be nice to use idom() here. - for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { - store = n->fast_out(i); - if (store->is_Store() && store->in(0) != NULL) { - Node* ctrl = store->in(0); - while(!(ctrl == ini || ctrl == alloc || ctrl == NULL || - ctrl == C->root() || ctrl == C->top() || ctrl->is_Region() || - ctrl->is_IfTrue() || ctrl->is_IfFalse())) { - ctrl = ctrl->in(0); - } - if (ctrl == ini || ctrl == alloc) { - value = store->in(MemNode::ValueIn); - break; + // + // Search all references to the same field which use different + // AddP nodes, for example, in the next case: + // + // Point p[] = new Point[1]; + // if ( x ) { p[0] = new Point(); p[0].x = x; } + // if ( p[0] != null ) { y = p[0].x; } // has CastPP + // + for (uint next = ei; (next < ae_cnt) && (value == NULL); next++) { + uint fpi = pta->edge_target(next); // Field (AddP) + PointsToNode *ptf = ptnode_adr(fpi); + if (ptf->offset() == offset) { + Node* nf = ptf->_node; + for (DUIterator_Fast imax, i = nf->fast_outs(imax); i < imax; i++) { + store = nf->fast_out(i); + if (store->is_Store() && store->in(0) != NULL) { + Node* ctrl = store->in(0); + while(!(ctrl == ini || ctrl == alloc || ctrl == NULL || + ctrl == C->root() || ctrl == C->top() || ctrl->is_Region() || + ctrl->is_IfTrue() || ctrl->is_IfFalse())) { + ctrl = ctrl->in(0); + } + if (ctrl == ini || ctrl == alloc) { + value = store->in(MemNode::ValueIn); + break; + } + } } } } @@ -1897,21 +1905,35 @@ if (value == NULL || value != ptnode_adr(value->_idx)->_node) { // A field's initializing value was not recorded. Add NULL. uint null_idx = UseCompressedOops ? _noop_null : _oop_null; - add_pointsto_edge(nidx, null_idx); + add_edge_from_fields(alloc->_idx, null_idx, offset); } } } } +} + +// Adjust escape state after Connection Graph is built. +void ConnectionGraph::adjust_escape_state(Node* n) { + PointsToNode* ptn = ptnode_adr(n->_idx); + assert(n->is_AddP(), "Should be called for AddP nodes only"); + // Search for objects which are not scalar replaceable + // and mark them to propagate the state to referenced objects. + // + + int offset = ptn->offset(); + Node* base = get_addp_base(n); + VectorSet* ptset = PointsTo(base); + int ptset_size = ptset->Size(); // An object is not scalar replaceable if the field which may point // to it has unknown offset (unknown element of an array of objects). // + if (offset == Type::OffsetBot) { uint e_cnt = ptn->edge_count(); for (uint ei = 0; ei < e_cnt; ei++) { uint npi = ptn->edge_target(ei); - set_escape_state(npi, PointsToNode::ArgEscape); - ptnode_adr(npi)->_scalar_replaceable = false; + ptnode_adr(npi)->set_scalar_replaceable(false); } } @@ -1930,20 +1952,62 @@ // to unknown field (unknown element for arrays, offset is OffsetBot). // // Or the address may point to more then one object. This may produce - // the false positive result (set scalar_replaceable to false) + // the false positive result (set not scalar replaceable) // since the flow-insensitive escape analysis can't separate // the case when stores overwrite the field's value from the case // when stores happened on different control branches. // + // Note: it will disable scalar replacement in some cases: + // + // Point p[] = new Point[1]; + // p[0] = new Point(); // Will be not scalar replaced + // + // but it will save us from incorrect optimizations in next cases: + // + // Point p[] = new Point[1]; + // if ( x ) p[0] = new Point(); // Will be not scalar replaced + // if (ptset_size > 1 || ptset_size != 0 && (has_LoadStore || offset == Type::OffsetBot)) { for( VectorSetI j(ptset); j.test(); ++j ) { - set_escape_state(j.elem, PointsToNode::ArgEscape); - ptnode_adr(j.elem)->_scalar_replaceable = false; + ptnode_adr(j.elem)->set_scalar_replaceable(false); } } } +// Propagate escape states to referenced nodes. +bool ConnectionGraph::propagate_escape_state(GrowableArray* cg_worklist, + GrowableArray* worklist, + PointsToNode::EscapeState esc_state) { + bool has_java_obj = false; + + // push all nodes with the same escape state on the worklist + uint cg_length = cg_worklist->length(); + for (uint next = 0; next < cg_length; ++next) { + int nk = cg_worklist->at(next); + if (ptnode_adr(nk)->escape_state() == esc_state) + worklist->push(nk); + } + // mark all reachable nodes + while (worklist->length() > 0) { + PointsToNode* ptn = ptnode_adr(worklist->pop()); + if (ptn->node_type() == PointsToNode::JavaObject) { + has_java_obj = true; + } + uint e_cnt = ptn->edge_count(); + for (uint ei = 0; ei < e_cnt; ei++) { + uint npi = ptn->edge_target(ei); + PointsToNode *np = ptnode_adr(npi); + if (np->escape_state() < esc_state) { + set_escape_state(npi, esc_state); + worklist->push(npi); + } + } + } + // Has not escaping java objects + return has_java_obj && (esc_state < PointsToNode::GlobalEscape); +} + void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *phase) { switch (call->Opcode()) { @@ -2100,6 +2164,7 @@ } else { es = PointsToNode::NoEscape; edge_to = call_idx; + assert(ptnode_adr(call_idx)->scalar_replaceable(), "sanity"); } set_escape_state(call_idx, es); add_pointsto_edge(resproj_idx, edge_to); @@ -2123,10 +2188,11 @@ } else { es = PointsToNode::NoEscape; edge_to = call_idx; + assert(ptnode_adr(call_idx)->scalar_replaceable(), "sanity"); int length = call->in(AllocateNode::ALength)->find_int_con(-1); if (length < 0 || length > EliminateAllocationArraySizeLimit) { // Not scalar replaceable if the length is not constant or too big. - ptnode_adr(call_idx)->_scalar_replaceable = false; + ptnode_adr(call_idx)->set_scalar_replaceable(false); } } set_escape_state(call_idx, es); @@ -2168,11 +2234,12 @@ // Mark it as NoEscape so that objects referenced by // it's fields will be marked as NoEscape at least. set_escape_state(call_idx, PointsToNode::NoEscape); + ptnode_adr(call_idx)->set_scalar_replaceable(false); add_pointsto_edge(resproj_idx, call_idx); copy_dependencies = true; } else if (call_analyzer->is_return_local()) { // determine whether any arguments are returned - set_escape_state(call_idx, PointsToNode::NoEscape); + set_escape_state(call_idx, PointsToNode::ArgEscape); bool ret_arg = false; for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { const Type* at = d->field_at(i); @@ -2189,7 +2256,6 @@ add_pointsto_edge(resproj_idx, arg->_idx); else add_deferred_edge(resproj_idx, arg->_idx); - arg_esp->_hidden_alias = true; } } } @@ -2198,18 +2264,12 @@ set_escape_state(call_idx, PointsToNode::GlobalEscape); add_pointsto_edge(resproj_idx, _phantom_object); } - copy_dependencies = true; + if (done) { + copy_dependencies = true; + } } else { set_escape_state(call_idx, PointsToNode::GlobalEscape); add_pointsto_edge(resproj_idx, _phantom_object); - for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { - const Type* at = d->field_at(i); - if (at->isa_oopptr() != NULL) { - Node *arg = call->in(i)->uncast(); - PointsToNode *arg_esp = ptnode_adr(arg->_idx); - arg_esp->_hidden_alias = true; - } - } } if (copy_dependencies) call_analyzer->copy_dependencies(_compile->dependencies()); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/escape.hpp --- a/src/share/vm/opto/escape.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/escape.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -74,7 +74,7 @@ // C2 does not have local variables. However for the purposes of constructing // the connection graph, the following IR nodes are treated as local variables: // Phi (pointer values) -// LoadP +// LoadP, LoadN // Proj#5 (value returned from callnodes including allocations) // CheckCastPP, CastPP // @@ -84,7 +84,7 @@ // // The following node types are JavaObject: // -// top() +// phantom_object (general globally escaped object) // Allocate // AllocateArray // Parm (for incoming arguments) @@ -93,6 +93,7 @@ // ConP // LoadKlass // ThreadLocal +// CallStaticJava (which returns Object) // // AddP nodes are fields. // @@ -130,10 +131,12 @@ typedef enum { UnknownEscape = 0, - NoEscape = 1, // A scalar replaceable object with unique type. - ArgEscape = 2, // An object passed as argument or referenced by - // argument (and not globally escape during call). - GlobalEscape = 3 // An object escapes the method and thread. + NoEscape = 1, // An object does not escape method or thread and it is + // not passed to call. It could be replaced with scalar. + ArgEscape = 2, // An object does not escape method or thread but it is + // passed as argument to call or referenced by argument + // and it does not escape during call. + GlobalEscape = 3 // An object escapes the method or thread. } EscapeState; typedef enum { @@ -153,28 +156,25 @@ NodeType _type; EscapeState _escape; - GrowableArray* _edges; // outgoing edges + GrowableArray* _edges; // outgoing edges + Node* _node; // Ideal node corresponding to this PointsTo node. + int _offset; // Object fields offsets. + bool _scalar_replaceable; // Not escaped object could be replaced with scalar public: - Node* _node; // Ideal node corresponding to this PointsTo node. - int _offset; // Object fields offsets. - bool _scalar_replaceable;// Not escaped object could be replaced with scalar - bool _hidden_alias; // This node is an argument to a function. - // which may return it creating a hidden alias. - PointsToNode(): _type(UnknownType), _escape(UnknownEscape), _edges(NULL), _node(NULL), _offset(-1), - _scalar_replaceable(true), - _hidden_alias(false) {} + _scalar_replaceable(true) {} EscapeState escape_state() const { return _escape; } NodeType node_type() const { return _type;} int offset() { return _offset;} + bool scalar_replaceable() { return _scalar_replaceable;} void set_offset(int offs) { _offset = offs;} void set_escape_state(EscapeState state) { _escape = state; } @@ -182,6 +182,7 @@ assert(_type == UnknownType || _type == ntype, "Can't change node type"); _type = ntype; } + void set_scalar_replaceable(bool v) { _scalar_replaceable = v; } // count of outgoing edges uint edge_count() const { return (_edges == NULL) ? 0 : _edges->length(); } @@ -233,8 +234,8 @@ // that pointer values loaded from // a field which has not been set // are assumed to point to. - uint _oop_null; // ConP(#NULL) - uint _noop_null; // ConN(#NULL) + uint _oop_null; // ConP(#NULL)->_idx + uint _noop_null; // ConN(#NULL)->_idx Compile * _compile; // Compile object for current compilation PhaseIterGVN * _igvn; // Value numbering @@ -339,8 +340,16 @@ // Set the escape state of a node void set_escape_state(uint ni, PointsToNode::EscapeState es); + // Find fields initializing values for allocations. + void find_init_values(Node* n, VectorSet* visited, PhaseTransform* phase); + // Adjust escape state after Connection Graph is built. - void adjust_escape_state(int nidx, PhaseTransform* phase); + void adjust_escape_state(Node* n); + + // Propagate escape states to referenced nodes. + bool propagate_escape_state(GrowableArray* cg_worklist, + GrowableArray* worklist, + PointsToNode::EscapeState esc_state); // Compute the escape information bool compute_escape(); @@ -357,21 +366,6 @@ // escape state of a node PointsToNode::EscapeState escape_state(Node *n); - // other information we have collected - bool is_scalar_replaceable(Node *n) { - if (_collecting || (n->_idx >= nodes_size())) - return false; - PointsToNode* ptn = ptnode_adr(n->_idx); - return ptn->escape_state() == PointsToNode::NoEscape && ptn->_scalar_replaceable; - } - - bool hidden_alias(Node *n) { - if (_collecting || (n->_idx >= nodes_size())) - return true; - PointsToNode* ptn = ptnode_adr(n->_idx); - return (ptn->escape_state() != PointsToNode::NoEscape) || ptn->_hidden_alias; - } - #ifndef PRODUCT void dump(); #endif diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/loopnode.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1883,7 +1883,7 @@ //----------------------------build_and_optimize------------------------------- // Create a PhaseLoop. Build the ideal Loop tree. Map each Ideal Node to // its corresponding LoopNode. If 'optimize' is true, do some loop cleanups. -void PhaseIdealLoop::build_and_optimize(bool do_split_ifs) { +void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool skip_loop_opts) { ResourceMark rm; int old_progress = C->major_progress(); @@ -2072,6 +2072,16 @@ } #endif + if (skip_loop_opts) { + // Cleanup any modified bits + _igvn.optimize(); + + if (C->log() != NULL) { + log_loop_tree(_ltree_root, _ltree_root, C->log()); + } + return; + } + if (ReassociateInvariants) { // Reassociate invariants and prep for split_thru_phi for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) { diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/loopnode.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -747,11 +747,11 @@ _dom_lca_tags(arena()), // Thread::resource_area _verify_me(NULL), _verify_only(true) { - build_and_optimize(false); + build_and_optimize(false, false); } // build the loop tree and perform any requested optimizations - void build_and_optimize(bool do_split_if); + void build_and_optimize(bool do_split_if, bool skip_loop_opts); public: // Dominators for the sea of nodes @@ -762,13 +762,13 @@ Node *dom_lca_internal( Node *n1, Node *n2 ) const; // Compute the Ideal Node to Loop mapping - PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs) : + PhaseIdealLoop( PhaseIterGVN &igvn, bool do_split_ifs, bool skip_loop_opts = false) : PhaseTransform(Ideal_Loop), _igvn(igvn), _dom_lca_tags(arena()), // Thread::resource_area _verify_me(NULL), _verify_only(false) { - build_and_optimize(do_split_ifs); + build_and_optimize(do_split_ifs, skip_loop_opts); } // Verify that verify_me made the same decisions as a fresh run. @@ -778,7 +778,7 @@ _dom_lca_tags(arena()), // Thread::resource_area _verify_me(verify_me), _verify_only(false) { - build_and_optimize(false); + build_and_optimize(false, false); } // Build and verify the loop tree without modifying the graph. This diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/loopopts.cpp --- a/src/share/vm/opto/loopopts.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/loopopts.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -28,6 +28,7 @@ #include "opto/connode.hpp" #include "opto/divnode.hpp" #include "opto/loopnode.hpp" +#include "opto/matcher.hpp" #include "opto/mulnode.hpp" #include "opto/rootnode.hpp" #include "opto/subnode.hpp" @@ -472,46 +473,50 @@ // 1 or 2 items with a total of 1 or 2 ops executed speculatively. Node *PhaseIdealLoop::conditional_move( Node *region ) { - assert( region->is_Region(), "sanity check" ); - if( region->req() != 3 ) return NULL; + assert(region->is_Region(), "sanity check"); + if (region->req() != 3) return NULL; // Check for CFG diamond Node *lp = region->in(1); Node *rp = region->in(2); - if( !lp || !rp ) return NULL; + if (!lp || !rp) return NULL; Node *lp_c = lp->in(0); - if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL; + if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL; IfNode *iff = lp_c->as_If(); - // Check for highly predictable branch. No point in CMOV'ing if - // we are going to predict accurately all the time. - // %%% This hides patterns produced by utility methods like Math.min. - if( iff->_prob < PROB_UNLIKELY_MAG(3) || - iff->_prob > PROB_LIKELY_MAG(3) ) - return NULL; - // Check for ops pinned in an arm of the diamond. // Can't remove the control flow in this case - if( lp->outcnt() > 1 ) return NULL; - if( rp->outcnt() > 1 ) return NULL; + if (lp->outcnt() > 1) return NULL; + if (rp->outcnt() > 1) return NULL; + + IdealLoopTree* r_loop = get_loop(region); + assert(r_loop == get_loop(iff), "sanity"); + // Always convert to CMOVE if all results are used only outside this loop. + bool used_inside_loop = (r_loop == _ltree_root); // Check profitability int cost = 0; int phis = 0; for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { Node *out = region->fast_out(i); - if( !out->is_Phi() ) continue; // Ignore other control edges, etc + if (!out->is_Phi()) continue; // Ignore other control edges, etc phis++; PhiNode* phi = out->as_Phi(); - switch (phi->type()->basic_type()) { - case T_LONG: - cost++; // Probably encodes as 2 CMOV's + BasicType bt = phi->type()->basic_type(); + switch (bt) { + case T_FLOAT: + case T_DOUBLE: { + cost += Matcher::float_cmove_cost(); // Could be very expensive + break; + } + case T_LONG: { + cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's + } case T_INT: // These all CMOV fine - case T_FLOAT: - case T_DOUBLE: - case T_ADDRESS: // (RawPtr) + case T_ADDRESS: { // (RawPtr) cost++; break; + } case T_NARROWOOP: // Fall through case T_OBJECT: { // Base oops are OK, but not derived oops const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr(); @@ -524,7 +529,7 @@ // relevant bases. This puts the allocator in the business of // manufacturing expensive instructions, generally a bad plan. // Just Say No to Conditionally-Moved Derived Pointers. - if( tp && tp->offset() != 0 ) + if (tp && tp->offset() != 0) return NULL; cost++; break; @@ -533,39 +538,64 @@ return NULL; // In particular, can't do memory or I/O } // Add in cost any speculative ops - for( uint j = 1; j < region->req(); j++ ) { + for (uint j = 1; j < region->req(); j++) { Node *proj = region->in(j); Node *inp = phi->in(j); if (get_ctrl(inp) == proj) { // Found local op cost++; // Check for a chain of dependent ops; these will all become // speculative in a CMOV. - for( uint k = 1; k < inp->req(); k++ ) + for (uint k = 1; k < inp->req(); k++) if (get_ctrl(inp->in(k)) == proj) - return NULL; // Too much speculative goo + cost += ConditionalMoveLimit; // Too much speculative goo } } // See if the Phi is used by a Cmp or Narrow oop Decode/Encode. // This will likely Split-If, a higher-payoff operation. for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) { Node* use = phi->fast_out(k); - if( use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP() ) - return NULL; + if (use->is_Cmp() || use->is_DecodeN() || use->is_EncodeP()) + cost += ConditionalMoveLimit; + // Is there a use inside the loop? + // Note: check only basic types since CMoveP is pinned. + if (!used_inside_loop && is_java_primitive(bt)) { + IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use); + if (r_loop == u_loop || r_loop->is_member(u_loop)) { + used_inside_loop = true; + } + } } } - if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo Node* bol = iff->in(1); - assert( bol->Opcode() == Op_Bool, "" ); + assert(bol->Opcode() == Op_Bool, ""); int cmp_op = bol->in(1)->Opcode(); // It is expensive to generate flags from a float compare. // Avoid duplicated float compare. - if( phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; + if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; + + float infrequent_prob = PROB_UNLIKELY_MAG(3); + // Ignore cost and blocks frequency if CMOVE can be moved outside the loop. + if (used_inside_loop) { + if (cost >= ConditionalMoveLimit) return NULL; // Too much goo + + // BlockLayoutByFrequency optimization moves infrequent branch + // from hot path. No point in CMOV'ing in such case (110 is used + // instead of 100 to take into account not exactness of float value). + if (BlockLayoutByFrequency) { + infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f); + } + } + // Check for highly predictable branch. No point in CMOV'ing if + // we are going to predict accurately all the time. + if (iff->_prob < infrequent_prob || + iff->_prob > (1.0f - infrequent_prob)) + return NULL; // -------------- // Now replace all Phis with CMOV's Node *cmov_ctrl = iff->in(0); uint flip = (lp->Opcode() == Op_IfTrue); - while( 1 ) { + while (1) { PhiNode* phi = NULL; for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { Node *out = region->fast_out(i); @@ -576,15 +606,15 @@ } if (phi == NULL) break; #ifndef PRODUCT - if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV"); + if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV"); #endif // Move speculative ops - for( uint j = 1; j < region->req(); j++ ) { + for (uint j = 1; j < region->req(); j++) { Node *proj = region->in(j); Node *inp = phi->in(j); if (get_ctrl(inp) == proj) { // Found local op #ifndef PRODUCT - if( PrintOpto && VerifyLoopOptimizations ) { + if (PrintOpto && VerifyLoopOptimizations) { tty->print(" speculate: "); inp->dump(); } @@ -596,7 +626,15 @@ register_new_node( cmov, cmov_ctrl ); _igvn.replace_node( phi, cmov ); #ifndef PRODUCT - if( VerifyLoopOptimizations ) verify(); + if (TraceLoopOpts) { + tty->print("CMOV "); + r_loop->dump_head(); + if (Verbose) { + bol->in(1)->dump(1); + cmov->dump(1); + } + } + if (VerifyLoopOptimizations) verify(); #endif } @@ -676,14 +714,14 @@ // Split 'n' through the merge point if it is profitable Node *phi = split_thru_phi( n, n_blk, policy ); - if( !phi ) return n; + if (!phi) return n; // Found a Phi to split thru! // Replace 'n' with the new phi _igvn.replace_node( n, phi ); // Moved a load around the loop, 'en-registering' something. - if( n_blk->Opcode() == Op_Loop && n->is_Load() && - !phi->in(LoopNode::LoopBackControl)->is_Load() ) + if (n_blk->is_Loop() && n->is_Load() && + !phi->in(LoopNode::LoopBackControl)->is_Load()) C->set_major_progress(); return phi; diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/machnode.cpp --- a/src/share/vm/opto/machnode.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/machnode.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -484,6 +484,13 @@ // Bind the offset lazily. if (offset == -1) { Compile::ConstantTable& constant_table = Compile::current()->constant_table(); + // If called from Compile::scratch_emit_size assume the worst-case + // for load offsets: half the constant table size. + // NOTE: Don't return or calculate the actual offset (which might + // be zero) because that leads to problems with e.g. jumpXtnd on + // some architectures (cf. add-optimization in SPARC jumpXtnd). + if (Compile::current()->in_scratch_emit_size()) + return constant_table.size() / 2; offset = constant_table.table_base_offset() + constant_table.find_offset(_constant); _constant.set_offset(offset); } diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/matcher.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1915,7 +1915,7 @@ set_dontcare(n); break; case Op_Jump: - mstack.push(n->in(1), Visit); // Switch Value + mstack.push(n->in(1), Pre_Visit); // Switch Value (could be shared) mstack.push(n->in(0), Pre_Visit); // Visit Control input continue; // while (mstack.is_nonempty()) case Op_StrComp: diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/matcher.hpp --- a/src/share/vm/opto/matcher.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/matcher.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -360,6 +360,12 @@ // Anything this size or smaller may get converted to discrete scalar stores. static const int init_array_short_size; + // Some hardware needs 2 CMOV's for longs. + static const int long_cmove_cost(); + + // Some hardware have expensive CMOV for float and double. + static const int float_cmove_cost(); + // Should the Matcher clone shifts on addressing modes, expecting them to // be subsumed into complex addressing expressions or compute them into // registers? True for Intel but false for most RISCs diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/memnode.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1421,6 +1421,12 @@ const TypeOopPtr *t_oop = addr_t->isa_oopptr(); if (can_reshape && opt_mem->is_Phi() && (t_oop != NULL) && t_oop->is_known_instance_field()) { + PhaseIterGVN *igvn = phase->is_IterGVN(); + if (igvn != NULL && igvn->_worklist.member(opt_mem)) { + // Delay this transformation until memory Phi is processed. + phase->is_IterGVN()->_worklist.push(this); + return NULL; + } // Split instance field load through Phi. Node* result = split_through_phi(phase); if (result != NULL) return result; diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/phaseX.cpp --- a/src/share/vm/opto/phaseX.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/phaseX.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -322,11 +322,12 @@ void NodeHash::dump() { _total_inserts += _inserts; _total_insert_probes += _insert_probes; - if( PrintCompilation && PrintOptoStatistics && Verbose && (_inserts > 0) ) { // PrintOptoGVN - if( PrintCompilation2 ) { - for( uint i=0; i<_max; i++ ) - if( _table[i] ) - tty->print("%d/%d/%d ",i,_table[i]->hash()&(_max-1),_table[i]->_idx); + if (PrintCompilation && PrintOptoStatistics && Verbose && (_inserts > 0)) { + if (WizardMode) { + for (uint i=0; i<_max; i++) { + if (_table[i]) + tty->print("%d/%d/%d ",i,_table[i]->hash()&(_max-1),_table[i]->_idx); + } } tty->print("\nGVN Hash stats: %d grows to %d max_size\n", _grows, _max); tty->print(" %d/%d (%8.1f%% full)\n", _inserts, _max, (double)_inserts/_max*100.0); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/runtime.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -338,6 +338,24 @@ // inform GC that we won't do card marks for initializing writes. new_store_pre_barrier(thread); } + + oop result = thread->vm_result(); + if ((len > 0) && (result != NULL) && + is_deoptimized_caller_frame(thread)) { + // Zero array here if the caller is deoptimized. + int size = ((typeArrayOop)result)->object_size(); + BasicType elem_type = typeArrayKlass::cast(array_type)->element_type(); + const size_t hs = arrayOopDesc::header_size(elem_type); + // Align to next 8 bytes to avoid trashing arrays's length. + const size_t aligned_hs = align_object_offset(hs); + HeapWord* obj = (HeapWord*)result; + if (aligned_hs > hs) { + Copy::zero_to_words(obj+hs, aligned_hs-hs); + } + // Optimized zeroing. + Copy::fill_to_aligned_words(obj+aligned_hs, size-aligned_hs); + } + JRT_END // Note: multianewarray for one dimension is handled inline by GraphKit::new_array. @@ -1130,12 +1148,22 @@ assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check"); frame caller_frame = stub_frame.sender(®_map); - // bypass VM_DeoptimizeFrame and deoptimize the frame directly + // Deoptimize the caller frame. Deoptimization::deoptimize_frame(thread, caller_frame.id()); } } +bool OptoRuntime::is_deoptimized_caller_frame(JavaThread *thread) { + // Called from within the owner thread, so no need for safepoint + RegisterMap reg_map(thread); + frame stub_frame = thread->last_frame(); + assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check"); + frame caller_frame = stub_frame.sender(®_map); + return caller_frame.is_deoptimized_frame(); +} + + const TypeFunc *OptoRuntime::register_finalizer_Type() { // create input type (domain) const Type **fields = TypeTuple::fields(1); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/opto/runtime.hpp --- a/src/share/vm/opto/runtime.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/opto/runtime.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -175,6 +175,7 @@ static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm); static address rethrow_C (oopDesc* exception, JavaThread *thread, address return_pc ); static void deoptimize_caller_frame (JavaThread *thread, bool doit); + static bool is_deoptimized_caller_frame (JavaThread *thread); // CodeBlob support // =================================================================== diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/prims/jniCheck.cpp --- a/src/share/vm/prims/jniCheck.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/prims/jniCheck.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -107,7 +107,7 @@ if (env != xenv) { \ NativeReportJNIFatalError(thr, warn_wrong_jnienv); \ } \ - __ENTRY(result_type, header, thr) + VM_ENTRY_BASE(result_type, header, thr) #define UNCHECKED() (unchecked_jni_NativeInterface) diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/prims/jvmtiEnter.xsl --- a/src/share/vm/prims/jvmtiEnter.xsl Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/prims/jvmtiEnter.xsl Tue Nov 15 12:40:55 2011 -0500 @@ -426,7 +426,7 @@ ThreadInVMfromNative __tiv(current_thread); - __ENTRY(jvmtiError, + VM_ENTRY_BASE(jvmtiError, , current_thread) diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/prims/jvmtiEnv.cpp --- a/src/share/vm/prims/jvmtiEnv.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/prims/jvmtiEnv.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -173,7 +173,7 @@ // from native so as to resolve the jthread. ThreadInVMfromNative __tiv(current_thread); - __ENTRY(jvmtiError, JvmtiEnv::GetThreadLocalStorage , current_thread) + VM_ENTRY_BASE(jvmtiError, JvmtiEnv::GetThreadLocalStorage , current_thread) debug_only(VMNativeEntryWrapper __vew;) oop thread_oop = JNIHandles::resolve_external_guard(thread); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/prims/jvmtiExport.cpp --- a/src/share/vm/prims/jvmtiExport.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/prims/jvmtiExport.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -373,7 +373,7 @@ JavaThread* current_thread = (JavaThread*) ThreadLocalStorage::thread(); // transition code: native to VM ThreadInVMfromNative __tiv(current_thread); - __ENTRY(jvmtiEnv*, JvmtiExport::get_jvmti_interface, current_thread) + VM_ENTRY_BASE(jvmtiEnv*, JvmtiExport::get_jvmti_interface, current_thread) debug_only(VMNativeEntryWrapper __vew;) JvmtiEnv *jvmti_env = JvmtiEnv::create_a_jvmti(version); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/prims/methodHandles.cpp --- a/src/share/vm/prims/methodHandles.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/prims/methodHandles.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -206,9 +206,12 @@ _adapter_code = MethodHandlesAdapterBlob::create(adapter_code_size); if (_adapter_code == NULL) vm_exit_out_of_memory(adapter_code_size, "CodeCache: no room for MethodHandles adapters"); - CodeBuffer code(_adapter_code); - MethodHandlesAdapterGenerator g(&code); - g.generate(); + { + CodeBuffer code(_adapter_code); + MethodHandlesAdapterGenerator g(&code); + g.generate(); + code.log_section_sizes("MethodHandlesAdapterBlob"); + } } //------------------------------------------------------------------------------ @@ -3079,26 +3082,26 @@ JVM_END JVM_ENTRY(void, MHN_setCallSiteTargetNormal(JNIEnv* env, jobject igcls, jobject call_site_jh, jobject target_jh)) { - oop call_site = JNIHandles::resolve_non_null(call_site_jh); - oop target = JNIHandles::resolve(target_jh); + Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh)); + Handle target (THREAD, JNIHandles::resolve(target_jh)); { // Walk all nmethods depending on this call site. MutexLocker mu(Compile_lock, thread); Universe::flush_dependents_on(call_site, target); } - java_lang_invoke_CallSite::set_target(call_site, target); + java_lang_invoke_CallSite::set_target(call_site(), target()); } JVM_END JVM_ENTRY(void, MHN_setCallSiteTargetVolatile(JNIEnv* env, jobject igcls, jobject call_site_jh, jobject target_jh)) { - oop call_site = JNIHandles::resolve_non_null(call_site_jh); - oop target = JNIHandles::resolve(target_jh); + Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh)); + Handle target (THREAD, JNIHandles::resolve(target_jh)); { // Walk all nmethods depending on this call site. MutexLocker mu(Compile_lock, thread); Universe::flush_dependents_on(call_site, target); } - java_lang_invoke_CallSite::set_target_volatile(call_site, target); + java_lang_invoke_CallSite::set_target_volatile(call_site(), target()); } JVM_END diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/prims/unsafe.cpp --- a/src/share/vm/prims/unsafe.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/prims/unsafe.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -302,21 +302,24 @@ UNSAFE_ENTRY(void, Unsafe_SetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) UnsafeWrapper("Unsafe_SetObjectVolatile"); + { + // Catch VolatileCallSite.target stores (via + // CallSite.setTargetVolatile) and check call site dependencies. + oop p = JNIHandles::resolve(obj); + if ((offset == java_lang_invoke_CallSite::target_offset_in_bytes()) && p->is_a(SystemDictionary::CallSite_klass())) { + Handle call_site (THREAD, p); + Handle method_handle(THREAD, JNIHandles::resolve(x_h)); + assert(call_site ->is_a(SystemDictionary::CallSite_klass()), "must be"); + assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be"); + { + // Walk all nmethods depending on this call site. + MutexLocker mu(Compile_lock, thread); + Universe::flush_dependents_on(call_site(), method_handle()); + } + } + } oop x = JNIHandles::resolve(x_h); oop p = JNIHandles::resolve(obj); - // Catch VolatileCallSite.target stores (via - // CallSite.setTargetVolatile) and check call site dependencies. - if ((offset == java_lang_invoke_CallSite::target_offset_in_bytes()) && p->is_a(SystemDictionary::CallSite_klass())) { - oop call_site = p; - oop method_handle = x; - assert(call_site ->is_a(SystemDictionary::CallSite_klass()), "must be"); - assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "must be"); - { - // Walk all nmethods depending on this call site. - MutexLocker mu(Compile_lock, thread); - Universe::flush_dependents_on(call_site, method_handle); - } - } void* addr = index_oop_from_field_offset_long(p, offset); OrderAccess::release(); if (UseCompressedOops) { diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/runtime/arguments.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1577,18 +1577,9 @@ sprintf(buffer, "java.lang.Integer.IntegerCache.high=" INTX_FORMAT, AutoBoxCacheMax); add_property(buffer); } - if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) { - FLAG_SET_DEFAULT(DoEscapeAnalysis, true); - } if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) { FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500); } - if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) { - FLAG_SET_DEFAULT(OptimizeStringConcat, true); - } - if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeFill)) { - FLAG_SET_DEFAULT(OptimizeFill, true); - } #endif if (AggressiveOpts) { diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/runtime/deoptimization.cpp --- a/src/share/vm/runtime/deoptimization.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/runtime/deoptimization.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -362,8 +362,6 @@ intptr_t* frame_sizes = NEW_C_HEAP_ARRAY(intptr_t, number_of_frames); // +1 because we always have an interpreter return address for the final slot. address* frame_pcs = NEW_C_HEAP_ARRAY(address, number_of_frames + 1); - int callee_parameters = 0; - int callee_locals = 0; int popframe_extra_args = 0; // Create an interpreter return address for the stub to use as its return // address so the skeletal frames are perfectly walkable @@ -387,14 +385,16 @@ // handles are used. If the caller is interpreted get the real // value so that the proper amount of space can be added to it's // frame. - int caller_actual_parameters = callee_parameters; + bool caller_was_method_handle = false; if (deopt_sender.is_interpreted_frame()) { methodHandle method = deopt_sender.interpreter_frame_method(); - Bytecode_invoke cur = Bytecode_invoke_check(method, - deopt_sender.interpreter_frame_bci()); - Symbol* signature = method->constants()->signature_ref_at(cur.index()); - ArgumentSizeComputer asc(signature); - caller_actual_parameters = asc.size() + (cur.has_receiver() ? 1 : 0); + Bytecode_invoke cur = Bytecode_invoke_check(method, deopt_sender.interpreter_frame_bci()); + if (cur.is_method_handle_invoke()) { + // Method handle invokes may involve fairly arbitrary chains of + // calls so it's impossible to know how much actual space the + // caller has for locals. + caller_was_method_handle = true; + } } // @@ -411,14 +411,15 @@ // in the frame_sizes/frame_pcs so the assembly code can do a trivial walk. // so things look a little strange in this loop. // + int callee_parameters = 0; + int callee_locals = 0; for (int index = 0; index < array->frames(); index++ ) { // frame[number_of_frames - 1 ] = on_stack_size(youngest) // frame[number_of_frames - 2 ] = on_stack_size(sender(youngest)) // frame[number_of_frames - 3 ] = on_stack_size(sender(sender(youngest))) int caller_parms = callee_parameters; - if (index == array->frames() - 1) { - // Use the value from the interpreted caller - caller_parms = caller_actual_parameters; + if ((index == array->frames() - 1) && caller_was_method_handle) { + caller_parms = 0; } frame_sizes[number_of_frames - 1 - index] = BytesPerWord * array->element(index)->on_stack_size(caller_parms, callee_parameters, @@ -460,13 +461,13 @@ // QQQ I'd rather see this pushed down into last_frame_adjust // and have it take the sender (aka caller). - if (deopt_sender.is_compiled_frame()) { + if (deopt_sender.is_compiled_frame() || caller_was_method_handle) { caller_adjustment = last_frame_adjust(0, callee_locals); - } else if (callee_locals > caller_actual_parameters) { + } else if (callee_locals > callee_parameters) { // The caller frame may need extending to accommodate // non-parameter locals of the first unpacked interpreted frame. // Compute that adjustment. - caller_adjustment = last_frame_adjust(caller_actual_parameters, callee_locals); + caller_adjustment = last_frame_adjust(callee_parameters, callee_locals); } // If the sender is deoptimized the we must retrieve the address of the handler @@ -481,7 +482,7 @@ UnrollBlock* info = new UnrollBlock(array->frame_size() * BytesPerWord, caller_adjustment * BytesPerWord, - caller_actual_parameters, + caller_was_method_handle ? 0 : callee_parameters, number_of_frames, frame_sizes, frame_pcs, diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/runtime/frame.cpp --- a/src/share/vm/runtime/frame.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/runtime/frame.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1338,7 +1338,11 @@ // Label values common to most frames values.describe(-1, unextended_sp(), err_msg("unextended_sp for #%d", frame_no)); values.describe(-1, sp(), err_msg("sp for #%d", frame_no)); - values.describe(-1, fp(), err_msg("fp for #%d", frame_no)); + if (is_compiled_frame()) { + values.describe(-1, sp() + _cb->frame_size(), err_msg("computed fp for #%d", frame_no)); + } else { + values.describe(-1, fp(), err_msg("fp for #%d", frame_no)); + } } if (is_interpreted_frame()) { methodOop m = interpreter_frame_method(); @@ -1450,9 +1454,8 @@ } -void FrameValues::print() { +void FrameValues::print(JavaThread* thread) { _values.sort(compare); - JavaThread* thread = JavaThread::current(); // Sometimes values like the fp can be invalid values if the // register map wasn't updated during the walk. Trim out values @@ -1460,12 +1463,22 @@ int min_index = 0; int max_index = _values.length() - 1; intptr_t* v0 = _values.at(min_index).location; - while (!thread->is_in_stack((address)v0)) { - v0 = _values.at(++min_index).location; - } intptr_t* v1 = _values.at(max_index).location; - while (!thread->is_in_stack((address)v1)) { - v1 = _values.at(--max_index).location; + + if (thread == Thread::current()) { + while (!thread->is_in_stack((address)v0)) { + v0 = _values.at(++min_index).location; + } + while (!thread->is_in_stack((address)v1)) { + v1 = _values.at(--max_index).location; + } + } else { + while (!thread->on_local_stack((address)v0)) { + v0 = _values.at(++min_index).location; + } + while (!thread->on_local_stack((address)v1)) { + v1 = _values.at(--max_index).location; + } } intptr_t* min = MIN2(v0, v1); intptr_t* max = MAX2(v0, v1); diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/runtime/frame.hpp --- a/src/share/vm/runtime/frame.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/runtime/frame.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -516,7 +516,7 @@ void describe(int owner, intptr_t* location, const char* description, int priority = 0); void validate(); - void print(); + void print(JavaThread* thread); }; #endif diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/runtime/globals.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -577,8 +577,8 @@ develop(bool, VerifyStack, false, \ "Verify stack of each thread when it is entering a runtime call") \ \ - develop(bool, ForceUnreachable, false, \ - "(amd64) Make all non code cache addresses to be unreachable with rip-rel forcing use of 64bit literal fixups") \ + diagnostic(bool, ForceUnreachable, false, \ + "Make all non code cache addresses to be unreachable with forcing use of 64bit literal fixups") \ \ notproduct(bool, StressDerivedPointers, false, \ "Force scavenge when a derived pointers is detected on stack " \ @@ -904,7 +904,7 @@ product(bool, AlwaysRestoreFPU, false, \ "Restore the FPU control word after every JNI call (expensive)") \ \ - notproduct(bool, PrintCompilation2, false, \ + diagnostic(bool, PrintCompilation2, false, \ "Print additional statistics per compilation") \ \ diagnostic(bool, PrintAdapterHandlers, false, \ @@ -3364,7 +3364,7 @@ notproduct(bool, ExitOnFullCodeCache, false, \ "Exit the VM if we fill the code cache.") \ \ - product(bool, UseCodeCacheFlushing, false, \ + product(bool, UseCodeCacheFlushing, true, \ "Attempt to clean the code cache before shutting off compiler") \ \ product(intx, MinCodeCacheFlushingInterval, 30, \ diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/runtime/interfaceSupport.hpp --- a/src/share/vm/runtime/interfaceSupport.hpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/runtime/interfaceSupport.hpp Tue Nov 15 12:40:55 2011 -0500 @@ -72,9 +72,9 @@ } }; -// InterfaceSupport provides functionality used by the __LEAF and __ENTRY -// macros. These macros are used to guard entry points into the VM and -// perform checks upon leave of the VM. +// InterfaceSupport provides functionality used by the VM_LEAF_BASE and +// VM_ENTRY_BASE macros. These macros are used to guard entry points into +// the VM and perform checks upon leave of the VM. class InterfaceSupport: AllStatic { @@ -433,7 +433,7 @@ // LEAF routines do not lock, GC or throw exceptions -#define __LEAF(result_type, header) \ +#define VM_LEAF_BASE(result_type, header) \ TRACE_CALL(result_type, header) \ debug_only(NoHandleMark __hm;) \ /* begin of body */ @@ -441,7 +441,7 @@ // ENTRY routines may lock, GC and throw exceptions -#define __ENTRY(result_type, header, thread) \ +#define VM_ENTRY_BASE(result_type, header, thread) \ TRACE_CALL(result_type, header) \ HandleMarkCleaner __hm(thread); \ Thread* THREAD = thread; \ @@ -450,7 +450,7 @@ // QUICK_ENTRY routines behave like ENTRY but without a handle mark -#define __QUICK_ENTRY(result_type, header, thread) \ +#define VM_QUICK_ENTRY_BASE(result_type, header, thread) \ TRACE_CALL(result_type, header) \ debug_only(NoHandleMark __hm;) \ Thread* THREAD = thread; \ @@ -463,20 +463,20 @@ #define IRT_ENTRY(result_type, header) \ result_type header { \ ThreadInVMfromJava __tiv(thread); \ - __ENTRY(result_type, header, thread) \ + VM_ENTRY_BASE(result_type, header, thread) \ debug_only(VMEntryWrapper __vew;) #define IRT_LEAF(result_type, header) \ result_type header { \ - __LEAF(result_type, header) \ + VM_LEAF_BASE(result_type, header) \ debug_only(No_Safepoint_Verifier __nspv(true);) #define IRT_ENTRY_NO_ASYNC(result_type, header) \ result_type header { \ ThreadInVMfromJavaNoAsyncException __tiv(thread); \ - __ENTRY(result_type, header, thread) \ + VM_ENTRY_BASE(result_type, header, thread) \ debug_only(VMEntryWrapper __vew;) // Another special case for nmethod_entry_point so the nmethod that the @@ -487,7 +487,7 @@ result_type header { \ nmethodLocker _nmlock(nm); \ ThreadInVMfromJavaNoAsyncException __tiv(thread); \ - __ENTRY(result_type, header, thread) + VM_ENTRY_BASE(result_type, header, thread) #define IRT_END } @@ -497,20 +497,20 @@ #define JRT_ENTRY(result_type, header) \ result_type header { \ ThreadInVMfromJava __tiv(thread); \ - __ENTRY(result_type, header, thread) \ + VM_ENTRY_BASE(result_type, header, thread) \ debug_only(VMEntryWrapper __vew;) #define JRT_LEAF(result_type, header) \ result_type header { \ - __LEAF(result_type, header) \ + VM_LEAF_BASE(result_type, header) \ debug_only(JRT_Leaf_Verifier __jlv;) #define JRT_ENTRY_NO_ASYNC(result_type, header) \ result_type header { \ ThreadInVMfromJavaNoAsyncException __tiv(thread); \ - __ENTRY(result_type, header, thread) \ + VM_ENTRY_BASE(result_type, header, thread) \ debug_only(VMEntryWrapper __vew;) // Same as JRT Entry but allows for return value after the safepoint @@ -543,11 +543,11 @@ assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \ ThreadInVMfromNative __tiv(thread); \ debug_only(VMNativeEntryWrapper __vew;) \ - __ENTRY(result_type, header, thread) + VM_ENTRY_BASE(result_type, header, thread) // Ensure that the VMNativeEntryWrapper constructor, which can cause -// a GC, is called outside the NoHandleMark (set via __QUICK_ENTRY). +// a GC, is called outside the NoHandleMark (set via VM_QUICK_ENTRY_BASE). #define JNI_QUICK_ENTRY(result_type, header) \ extern "C" { \ result_type JNICALL header { \ @@ -555,7 +555,7 @@ assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \ ThreadInVMfromNative __tiv(thread); \ debug_only(VMNativeEntryWrapper __vew;) \ - __QUICK_ENTRY(result_type, header, thread) + VM_QUICK_ENTRY_BASE(result_type, header, thread) #define JNI_LEAF(result_type, header) \ @@ -563,7 +563,7 @@ result_type JNICALL header { \ JavaThread* thread=JavaThread::thread_from_jni_environment(env); \ assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \ - __LEAF(result_type, header) + VM_LEAF_BASE(result_type, header) // Close the routine and the extern "C" @@ -579,7 +579,7 @@ JavaThread* thread=JavaThread::thread_from_jni_environment(env); \ ThreadInVMfromNative __tiv(thread); \ debug_only(VMNativeEntryWrapper __vew;) \ - __ENTRY(result_type, header, thread) + VM_ENTRY_BASE(result_type, header, thread) #define JVM_ENTRY_NO_ENV(result_type, header) \ @@ -588,7 +588,7 @@ JavaThread* thread = (JavaThread*)ThreadLocalStorage::thread(); \ ThreadInVMfromNative __tiv(thread); \ debug_only(VMNativeEntryWrapper __vew;) \ - __ENTRY(result_type, header, thread) + VM_ENTRY_BASE(result_type, header, thread) #define JVM_QUICK_ENTRY(result_type, header) \ @@ -597,14 +597,14 @@ JavaThread* thread=JavaThread::thread_from_jni_environment(env); \ ThreadInVMfromNative __tiv(thread); \ debug_only(VMNativeEntryWrapper __vew;) \ - __QUICK_ENTRY(result_type, header, thread) + VM_QUICK_ENTRY_BASE(result_type, header, thread) #define JVM_LEAF(result_type, header) \ extern "C" { \ result_type JNICALL header { \ VM_Exit::block_if_vm_exited(); \ - __LEAF(result_type, header) + VM_LEAF_BASE(result_type, header) #define JVM_END } } diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/runtime/sharedRuntime.cpp --- a/src/share/vm/runtime/sharedRuntime.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/runtime/sharedRuntime.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -1672,9 +1672,12 @@ nmethod* nm = cb->as_nmethod_or_null(); assert(nm, "must be"); - // Don't fixup MethodHandle call sites as c2i/i2c adapters are used - // to implement MethodHandle actions. - if (nm->is_method_handle_return(caller_pc)) { + // Get the return PC for the passed caller PC. + address return_pc = caller_pc + frame::pc_return_offset; + + // Don't fixup method handle call sites as the executed method + // handle adapters are doing the required MethodHandle chain work. + if (nm->is_method_handle_return(return_pc)) { return; } @@ -1693,8 +1696,8 @@ // Expect to find a native call there (unless it was no-inline cache vtable dispatch) MutexLockerEx ml_patch(Patching_lock, Mutex::_no_safepoint_check_flag); - if (NativeCall::is_call_before(caller_pc + frame::pc_return_offset)) { - NativeCall *call = nativeCall_before(caller_pc + frame::pc_return_offset); + if (NativeCall::is_call_before(return_pc)) { + NativeCall *call = nativeCall_before(return_pc); // // bug 6281185. We might get here after resolving a call site to a vanilla // virtual call. Because the resolvee uses the verified entry it may then @@ -1744,7 +1747,6 @@ } } } - IRT_END diff -r 3c7d67df8d07 -r f9a80a035a4a src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Thu Nov 10 06:23:48 2011 -0500 +++ b/src/share/vm/runtime/thread.cpp Tue Nov 15 12:40:55 2011 -0500 @@ -2947,7 +2947,7 @@ values.validate(); } else { tty->print_cr("[Describe stack layout]"); - values.print(); + values.print(this); } } #endif diff -r 3c7d67df8d07 -r f9a80a035a4a test/compiler/6865265/StackOverflowBug.java --- a/test/compiler/6865265/StackOverflowBug.java Thu Nov 10 06:23:48 2011 -0500 +++ b/test/compiler/6865265/StackOverflowBug.java Tue Nov 15 12:40:55 2011 -0500 @@ -28,7 +28,7 @@ * @summary JVM crashes with "missing exception handler" error * @author volker.simonis@sap.com * - * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss128k StackOverflowBug + * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss224k StackOverflowBug */ diff -r 3c7d67df8d07 -r f9a80a035a4a test/compiler/7103261/Test7103261.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/7103261/Test7103261.java Tue Nov 15 12:40:55 2011 -0500 @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 7103261 + * @summary crash with jittester on sparc + * + * @run main Test7103261 + */ + +// exercise implicit null checking in the compiler for various field types +public class Test7103261 { + static Test7103261 null_value; + static Test7103261 nonnull_value = new Test7103261(); + static Test7103261 nonnull_value2 = new Test7103261(); + + long l; + int i; + float f; + double d; + byte b; + char c; + short s; + boolean z; + + public static void main(String[] args) { + constantStore(); + valueTest(false); + valueTest(true); + } + static void constantStore() { + for (int field = 0; field < 8; field++) { + try { + Test7103261 o = nonnull_value; + for (int i = 0; i < 100000; i++) { + switch (field) { + case 0: o.l = 0; break; + case 1: o.i = 0; break; + case 2: o.f = 0; break; + case 3: o.d = 0; break; + case 4: o.b = 0; break; + case 5: o.c = 0; break; + case 6: o.s = 0; break; + case 7: o.z = false; break; + default: throw new InternalError(); + } + if (i == 90000) { + // hide nullness from optimizer + o = null_value; + } + } + } catch (NullPointerException npe) { + } + } + } + static void valueTest(boolean store) { + for (int field = 0; field < 8; field++) { + try { + Test7103261 o = nonnull_value; + Test7103261 o2 = nonnull_value2; + for (int i = 0; i < 100000; i++) { + switch (field) { + case 0: o.l = o2.l; break; + case 1: o.i = o2.i; break; + case 2: o.f = o2.f; break; + case 3: o.d = o2.d; break; + case 4: o.b = o2.b; break; + case 5: o.c = o2.c; break; + case 6: o.s = o2.s; break; + case 7: o.z = o2.z; break; + default: throw new InternalError(); + } + if (i == 90000) { + // hide nullness from optimizer + if (store) + o = null_value; + else + o2 = null_value; + } + } + } catch (NullPointerException npe) { + } + } + } +}