# HG changeset patch # User trims # Date 1236906996 25200 # Node ID 7bb995fbd3c037dafbe7b4f956a029dedd9cf9c2 # Parent ce2272390558acf9c43039a4aec409da2af13ad7# Parent fcf566137dbf7a5765ce8091eae26dc8e824a57c Merge diff -r ce2272390558 -r 7bb995fbd3c0 make/linux/makefiles/adlc.make --- a/make/linux/makefiles/adlc.make Mon Mar 09 13:34:00 2009 -0700 +++ b/make/linux/makefiles/adlc.make Thu Mar 12 18:16:36 2009 -0700 @@ -61,8 +61,8 @@ CPPFLAGS += -DASSERT # CFLAGS_WARN holds compiler options to suppress/enable warnings. -# Suppress warnings (for now) -CFLAGS_WARN = -w +# Compiler warnings are treated as errors +CFLAGS_WARN = -Werror CFLAGS += $(CFLAGS_WARN) OBJECTNAMES = \ diff -r ce2272390558 -r 7bb995fbd3c0 make/solaris/makefiles/adlc.make --- a/make/solaris/makefiles/adlc.make Mon Mar 09 13:34:00 2009 -0700 +++ b/make/solaris/makefiles/adlc.make Thu Mar 12 18:16:36 2009 -0700 @@ -67,6 +67,8 @@ endif # CFLAGS_WARN holds compiler options to suppress/enable warnings. +# Compiler warnings are treated as errors +CFLAGS_WARN = +w -errwarn CFLAGS += $(CFLAGS_WARN) ifeq ("${Platform_compiler}", "sparcWorks") diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/sparc/vm/assembler_sparc.cpp --- a/src/cpu/sparc/vm/assembler_sparc.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -2615,6 +2615,158 @@ } } +RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterConstant(value + offset); + + // load indirectly to solve generation ordering problem + Address a(tmp, (address) delayed_value_addr); + load_ptr_contents(a, tmp); + +#ifdef ASSERT + tst(tmp); + breakpoint_trap(zero, xcc); +#endif + + if (offset != 0) + add(tmp, offset, tmp); + + return RegisterConstant(tmp); +} + + +void MacroAssembler::regcon_inc_ptr( RegisterConstant& dest, RegisterConstant src, Register temp ) { + assert(dest.register_or_noreg() != G0, "lost side effect"); + if ((src.is_constant() && src.as_constant() == 0) || + (src.is_register() && src.as_register() == G0)) { + // do nothing + } else if (dest.is_register()) { + add(dest.as_register(), ensure_rs2(src, temp), dest.as_register()); + } else if (src.is_constant()) { + intptr_t res = dest.as_constant() + src.as_constant(); + dest = RegisterConstant(res); // side effect seen by caller + } else { + assert(temp != noreg, "cannot handle constant += register"); + add(src.as_register(), ensure_rs2(dest, temp), temp); + dest = RegisterConstant(temp); // side effect seen by caller + } +} + +void MacroAssembler::regcon_sll_ptr( RegisterConstant& dest, RegisterConstant src, Register temp ) { + assert(dest.register_or_noreg() != G0, "lost side effect"); + if (!is_simm13(src.constant_or_zero())) + src = (src.as_constant() & 0xFF); + if ((src.is_constant() && src.as_constant() == 0) || + (src.is_register() && src.as_register() == G0)) { + // do nothing + } else if (dest.is_register()) { + sll_ptr(dest.as_register(), src, dest.as_register()); + } else if (src.is_constant()) { + intptr_t res = dest.as_constant() << src.as_constant(); + dest = RegisterConstant(res); // side effect seen by caller + } else { + assert(temp != noreg, "cannot handle constant <<= register"); + set(dest.as_constant(), temp); + sll_ptr(temp, src, temp); + dest = RegisterConstant(temp); // side effect seen by caller + } +} + + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterConstant itable_index, + Register method_result, + Register scan_temp, + Register sethi_temp, + Label& L_no_such_interface) { + assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = instanceKlass::vtable_start_offset() * wordSize; + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size() * wordSize; + + lduw(recv_klass, instanceKlass::vtable_length_offset() * wordSize, scan_temp); + // %%% We should store the aligned, prescaled offset in the klassoop. + // Then the next several instructions would fold away. + + int round_to_unit = ((HeapWordsPerLong > 1) ? BytesPerLong : 0); + int itb_offset = vtable_base; + if (round_to_unit != 0) { + // hoist first instruction of round_to(scan_temp, BytesPerLong): + itb_offset += round_to_unit - wordSize; + } + int itb_scale = exact_log2(vtableEntry::size() * wordSize); + sll(scan_temp, itb_scale, scan_temp); + add(scan_temp, itb_offset, scan_temp); + if (round_to_unit != 0) { + // Round up to align_object_offset boundary + // see code for instanceKlass::start_of_itable! + // Was: round_to(scan_temp, BytesPerLong); + // Hoisted: add(scan_temp, BytesPerLong-1, scan_temp); + and3(scan_temp, -round_to_unit, scan_temp); + } + add(recv_klass, scan_temp, scan_temp); + + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + RegisterConstant itable_offset = itable_index; + regcon_sll_ptr(itable_offset, exact_log2(itableMethodEntry::size() * wordSize)); + regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes()); + add(recv_klass, ensure_rs2(itable_offset, sethi_temp), recv_klass); + + // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { + // if (scan->interface() == intf) { + // result = (klass + scan->offset() + itable_index); + // } + // } + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + // %%%% Could load both offset and interface in one ldx, if they were + // in the opposite order. This would save a load. + ld_ptr(scan_temp, itableOffsetEntry::interface_offset_in_bytes(), method_result); + + // Check that this entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + bpr(Assembler::rc_z, false, Assembler::pn, method_result, L_no_such_interface); + delayed()->cmp(method_result, intf_klass); + + if (peel) { + brx(Assembler::equal, false, Assembler::pt, found_method); + } else { + brx(Assembler::notEqual, false, Assembler::pn, search); + // (invert the test to fall through to found_method...) + } + delayed()->add(scan_temp, scan_step, scan_temp); + + if (!peel) break; + + bind(search); + } + + bind(found_method); + + // Got a hit. + int ito_offset = itableOffsetEntry::offset_offset_in_bytes(); + // scan_temp[-scan_step] points to the vtable offset we need + ito_offset -= scan_step; + lduw(scan_temp, ito_offset, scan_temp); + ld_ptr(recv_klass, scan_temp, method_result); +} + + void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg, Label& done, Label* slow_case, @@ -4057,6 +4209,24 @@ card_table_write(bs->byte_map_base, tmp, store_addr); } +// Loading values by size and signed-ness +void MacroAssembler::load_sized_value(Register s1, RegisterConstant s2, Register d, + int size_in_bytes, bool is_signed) { + switch (size_in_bytes ^ (is_signed ? -1 : 0)) { + case ~8: // fall through: + case 8: ld_long( s1, s2, d ); break; + case ~4: ldsw( s1, s2, d ); break; + case 4: lduw( s1, s2, d ); break; + case ~2: ldsh( s1, s2, d ); break; + case 2: lduh( s1, s2, d ); break; + case ~1: ldsb( s1, s2, d ); break; + case 1: ldub( s1, s2, d ); break; + default: ShouldNotReachHere(); + } +} + + + void MacroAssembler::load_klass(Register src_oop, Register klass) { // The number of bytes in this code is used by // MachCallDynamicJavaNode::ret_addr_offset() diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/sparc/vm/assembler_sparc.hpp --- a/src/cpu/sparc/vm/assembler_sparc.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -384,6 +384,12 @@ inline bool is_simm13(int offset = 0); // check disp+offset for overflow + Address plus_disp(int disp) const { // bump disp by a small amount + Address a = (*this); + a._disp += disp; + return a; + } + Address split_disp() const { // deal with disp overflow Address a = (*this); int hi_disp = _disp & ~0x3ff; @@ -1082,6 +1088,7 @@ inline void add( Register s1, Register s2, Register d ); inline void add( Register s1, int simm13a, Register d, relocInfo::relocType rtype = relocInfo::none); inline void add( Register s1, int simm13a, Register d, RelocationHolder const& rspec); + inline void add( Register s1, RegisterConstant s2, Register d, int offset = 0); inline void add( const Address& a, Register d, int offset = 0); void addcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); } @@ -1298,6 +1305,16 @@ inline void ld( const Address& a, Register d, int offset = 0 ); inline void ldd( const Address& a, Register d, int offset = 0 ); + inline void ldub( Register s1, RegisterConstant s2, Register d ); + inline void ldsb( Register s1, RegisterConstant s2, Register d ); + inline void lduh( Register s1, RegisterConstant s2, Register d ); + inline void ldsh( Register s1, RegisterConstant s2, Register d ); + inline void lduw( Register s1, RegisterConstant s2, Register d ); + inline void ldsw( Register s1, RegisterConstant s2, Register d ); + inline void ldx( Register s1, RegisterConstant s2, Register d ); + inline void ld( Register s1, RegisterConstant s2, Register d ); + inline void ldd( Register s1, RegisterConstant s2, Register d ); + // pp 177 void ldsba( Register s1, Register s2, int ia, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); } @@ -1518,6 +1535,13 @@ inline void st( Register d, const Address& a, int offset = 0 ); inline void std( Register d, const Address& a, int offset = 0 ); + inline void stb( Register d, Register s1, RegisterConstant s2 ); + inline void sth( Register d, Register s1, RegisterConstant s2 ); + inline void stw( Register d, Register s1, RegisterConstant s2 ); + inline void stx( Register d, Register s1, RegisterConstant s2 ); + inline void std( Register d, Register s1, RegisterConstant s2 ); + inline void st( Register d, Register s1, RegisterConstant s2 ); + // pp 177 void stba( Register d, Register s1, Register s2, int ia ) { emit_long( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); } @@ -1835,6 +1859,7 @@ // Functions for isolating 64 bit shifts for LP64 inline void sll_ptr( Register s1, Register s2, Register d ); inline void sll_ptr( Register s1, int imm6a, Register d ); + inline void sll_ptr( Register s1, RegisterConstant s2, Register d ); inline void srl_ptr( Register s1, Register s2, Register d ); inline void srl_ptr( Register s1, int imm6a, Register d ); @@ -1940,20 +1965,47 @@ // st_ptr will perform st for 32 bit VM's and stx for 64 bit VM's inline void ld_ptr( Register s1, Register s2, Register d ); inline void ld_ptr( Register s1, int simm13a, Register d); + inline void ld_ptr( Register s1, RegisterConstant s2, Register d ); inline void ld_ptr( const Address& a, Register d, int offset = 0 ); inline void st_ptr( Register d, Register s1, Register s2 ); inline void st_ptr( Register d, Register s1, int simm13a); + inline void st_ptr( Register d, Register s1, RegisterConstant s2 ); inline void st_ptr( Register d, const Address& a, int offset = 0 ); // ld_long will perform ld for 32 bit VM's and ldx for 64 bit VM's // st_long will perform st for 32 bit VM's and stx for 64 bit VM's inline void ld_long( Register s1, Register s2, Register d ); inline void ld_long( Register s1, int simm13a, Register d ); + inline void ld_long( Register s1, RegisterConstant s2, Register d ); inline void ld_long( const Address& a, Register d, int offset = 0 ); inline void st_long( Register d, Register s1, Register s2 ); inline void st_long( Register d, Register s1, int simm13a ); + inline void st_long( Register d, Register s1, RegisterConstant s2 ); inline void st_long( Register d, const Address& a, int offset = 0 ); + // Loading values by size and signed-ness + void load_sized_value(Register s1, RegisterConstant s2, Register d, + int size_in_bytes, bool is_signed); + + // Helpers for address formation. + // They update the dest in place, whether it is a register or constant. + // They emit no code at all if src is a constant zero. + // If dest is a constant and src is a register, the temp argument + // is required, and becomes the result. + // If dest is a register and src is a non-simm13 constant, + // the temp argument is required, and is used to materialize the constant. + void regcon_inc_ptr( RegisterConstant& dest, RegisterConstant src, + Register temp = noreg ); + void regcon_sll_ptr( RegisterConstant& dest, RegisterConstant src, + Register temp = noreg ); + RegisterConstant ensure_rs2(RegisterConstant rs2, Register sethi_temp) { + guarantee(sethi_temp != noreg, "constant offset overflow"); + if (is_simm13(rs2.constant_or_zero())) + return rs2; // register or short constant + set(rs2.as_constant(), sethi_temp); + return sethi_temp; + } + // -------------------------------------------------- public: @@ -2267,6 +2319,14 @@ ); void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterConstant itable_index, + Register method_result, + Register temp_reg, Register temp2_reg, + Label& no_such_interface); + // Stack overflow checking // Note: this clobbers G3_scratch @@ -2281,6 +2341,8 @@ // stack overflow + shadow pages. Clobbers tsp and scratch registers. void bang_stack_size(Register Rsize, Register Rtsp, Register Rscratch); + virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, Register tmp, int offset); + void verify_tlab(); Condition negate_condition(Condition cond); diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/sparc/vm/assembler_sparc.inline.hpp --- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -143,6 +143,49 @@ inline void Assembler::ld( Register s1, int simm13a, Register d) { lduw( s1, simm13a, d); } #endif +inline void Assembler::ldub( Register s1, RegisterConstant s2, Register d) { + if (s2.is_register()) ldsb(s1, s2.as_register(), d); + else ldsb(s1, s2.as_constant(), d); +} +inline void Assembler::ldsb( Register s1, RegisterConstant s2, Register d) { + if (s2.is_register()) ldsb(s1, s2.as_register(), d); + else ldsb(s1, s2.as_constant(), d); +} +inline void Assembler::lduh( Register s1, RegisterConstant s2, Register d) { + if (s2.is_register()) ldsh(s1, s2.as_register(), d); + else ldsh(s1, s2.as_constant(), d); +} +inline void Assembler::ldsh( Register s1, RegisterConstant s2, Register d) { + if (s2.is_register()) ldsh(s1, s2.as_register(), d); + else ldsh(s1, s2.as_constant(), d); +} +inline void Assembler::lduw( Register s1, RegisterConstant s2, Register d) { + if (s2.is_register()) ldsw(s1, s2.as_register(), d); + else ldsw(s1, s2.as_constant(), d); +} +inline void Assembler::ldsw( Register s1, RegisterConstant s2, Register d) { + if (s2.is_register()) ldsw(s1, s2.as_register(), d); + else ldsw(s1, s2.as_constant(), d); +} +inline void Assembler::ldx( Register s1, RegisterConstant s2, Register d) { + if (s2.is_register()) ldx(s1, s2.as_register(), d); + else ldx(s1, s2.as_constant(), d); +} +inline void Assembler::ld( Register s1, RegisterConstant s2, Register d) { + if (s2.is_register()) ld(s1, s2.as_register(), d); + else ld(s1, s2.as_constant(), d); +} +inline void Assembler::ldd( Register s1, RegisterConstant s2, Register d) { + if (s2.is_register()) ldd(s1, s2.as_register(), d); + else ldd(s1, s2.as_constant(), d); +} + +// form effective addresses this way: +inline void Assembler::add( Register s1, RegisterConstant s2, Register d, int offset) { + if (s2.is_register()) add(s1, s2.as_register(), d); + else { add(s1, s2.as_constant() + offset, d); offset = 0; } + if (offset != 0) add(d, offset, d); +} inline void Assembler::ld( const Address& a, Register d, int offset ) { relocate(a.rspec(offset)); ld( a.base(), a.disp() + offset, d ); } inline void Assembler::ldsb( const Address& a, Register d, int offset ) { relocate(a.rspec(offset)); ldsb( a.base(), a.disp() + offset, d ); } @@ -200,6 +243,27 @@ inline void Assembler::st( Register d, Register s1, Register s2) { stw(d, s1, s2); } inline void Assembler::st( Register d, Register s1, int simm13a) { stw(d, s1, simm13a); } +inline void Assembler::stb( Register d, Register s1, RegisterConstant s2) { + if (s2.is_register()) stb(d, s1, s2.as_register()); + else stb(d, s1, s2.as_constant()); +} +inline void Assembler::sth( Register d, Register s1, RegisterConstant s2) { + if (s2.is_register()) sth(d, s1, s2.as_register()); + else sth(d, s1, s2.as_constant()); +} +inline void Assembler::stx( Register d, Register s1, RegisterConstant s2) { + if (s2.is_register()) stx(d, s1, s2.as_register()); + else stx(d, s1, s2.as_constant()); +} +inline void Assembler::std( Register d, Register s1, RegisterConstant s2) { + if (s2.is_register()) std(d, s1, s2.as_register()); + else std(d, s1, s2.as_constant()); +} +inline void Assembler::st( Register d, Register s1, RegisterConstant s2) { + if (s2.is_register()) st(d, s1, s2.as_register()); + else st(d, s1, s2.as_constant()); +} + inline void Assembler::stb( Register d, const Address& a, int offset) { relocate(a.rspec(offset)); stb( d, a.base(), a.disp() + offset); } inline void Assembler::sth( Register d, const Address& a, int offset) { relocate(a.rspec(offset)); sth( d, a.base(), a.disp() + offset); } inline void Assembler::stw( Register d, const Address& a, int offset) { relocate(a.rspec(offset)); stw( d, a.base(), a.disp() + offset); } @@ -244,6 +308,14 @@ #endif } +inline void MacroAssembler::ld_ptr( Register s1, RegisterConstant s2, Register d ) { +#ifdef _LP64 + Assembler::ldx( s1, s2, d); +#else + Assembler::ld( s1, s2, d); +#endif +} + inline void MacroAssembler::ld_ptr( const Address& a, Register d, int offset ) { #ifdef _LP64 Assembler::ldx( a, d, offset ); @@ -268,6 +340,14 @@ #endif } +inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterConstant s2 ) { +#ifdef _LP64 + Assembler::stx( d, s1, s2); +#else + Assembler::st( d, s1, s2); +#endif +} + inline void MacroAssembler::st_ptr( Register d, const Address& a, int offset) { #ifdef _LP64 Assembler::stx( d, a, offset); @@ -293,6 +373,14 @@ #endif } +inline void MacroAssembler::ld_long( Register s1, RegisterConstant s2, Register d ) { +#ifdef _LP64 + Assembler::ldx(s1, s2, d); +#else + Assembler::ldd(s1, s2, d); +#endif +} + inline void MacroAssembler::ld_long( const Address& a, Register d, int offset ) { #ifdef _LP64 Assembler::ldx(a, d, offset ); @@ -317,6 +405,14 @@ #endif } +inline void MacroAssembler::st_long( Register d, Register s1, RegisterConstant s2 ) { +#ifdef _LP64 + Assembler::stx(d, s1, s2); +#else + Assembler::std(d, s1, s2); +#endif +} + inline void MacroAssembler::st_long( Register d, const Address& a, int offset ) { #ifdef _LP64 Assembler::stx(d, a, offset); @@ -359,6 +455,11 @@ #endif } +inline void MacroAssembler::sll_ptr( Register s1, RegisterConstant s2, Register d ) { + if (s2.is_register()) sll_ptr(s1, s2.as_register(), d); + else sll_ptr(s1, s2.as_constant(), d); +} + // Use the right branch for the platform inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/sparc/vm/interp_masm_sparc.cpp --- a/src/cpu/sparc/vm/interp_masm_sparc.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -2465,7 +2465,10 @@ // InterpreterRuntime::post_method_entry(); // } // if (DTraceMethodProbes) { -// SharedRuntime::dtrace_method_entry(method, reciever); +// SharedRuntime::dtrace_method_entry(method, receiver); +// } +// if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { +// SharedRuntime::rc_trace_method_entry(method, receiver); // } void InterpreterMacroAssembler::notify_method_entry() { @@ -2497,6 +2500,13 @@ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), G2_thread, Lmethod); } + + // RedefineClasses() tracing support for obsolete method entry + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { + call_VM_leaf(noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + G2_thread, Lmethod); + } } diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/sparc/vm/nativeInst_sparc.hpp --- a/src/cpu/sparc/vm/nativeInst_sparc.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/sparc/vm/nativeInst_sparc.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -243,7 +243,7 @@ // Regenerate the instruction sequence that performs the 64 bit // sethi. This only does the sethi. The disp field (bottom 10 bits) - // must be handled seperately. + // must be handled separately. static void set_data64_sethi(address instaddr, intptr_t x); // combine the fields of a sethi/simm13 pair (simm13 = or, add, jmpl, ld/st) diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/sparc/vm/sharedRuntime_sparc.cpp --- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -2161,6 +2161,18 @@ __ restore(); } + // RedefineClasses() tracing support for obsolete method entry + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { + // create inner frame + __ save_frame(0); + __ mov(G2_thread, L7_thread_cache); + __ set_oop_constant(JNIHandles::make_local(method()), O1); + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + G2_thread, O1); + __ restore(); + } + // We are in the jni frame unless saved_frame is true in which case // we are in one frame deeper (the "inner" frame). If we are in the // "inner" frames the args are in the Iregs and if the jni frame then diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Thu Mar 12 18:16:36 2009 -0700 @@ -189,7 +189,7 @@ // double fp register numbers. FloatRegisterImpl in register_sparc.hpp // wants 0-63, so we have to convert every time we want to use fp regs // with the macroassembler, using reg_to_DoubleFloatRegister_object(). -// 255 is a flag meaning 'dont go here'. +// 255 is a flag meaning "don't go here". // I believe we can't handle callee-save doubles D32 and up until // the place in the sparc stack crawler that asserts on the 255 is // fixed up. @@ -462,7 +462,7 @@ // Macros to extract hi & lo halves from a long pair. // G0 is not part of any long pair, so assert on that. -// Prevents accidently using G1 instead of G0. +// Prevents accidentally using G1 instead of G0. #define LONG_HI_REG(x) (x) #define LONG_LO_REG(x) (x) @@ -1431,7 +1431,7 @@ #ifndef _LP64 // In the LP64 build, all registers can be moved as aligned/adjacent - // pairs, so there's never any need to move the high bits seperately. + // pairs, so there's never any need to move the high bits separately. // The 32-bit builds have to deal with the 32-bit ABI which can force // all sorts of silly alignment problems. @@ -1624,7 +1624,7 @@ Register temp_reg = G3; assert( G5_ic_reg != temp_reg, "conflicting registers" ); - // Load klass from reciever + // Load klass from receiver __ load_klass(O0, temp_reg); // Compare against expected klass __ cmp(temp_reg, G5_ic_reg); @@ -4149,7 +4149,7 @@ //----------OPERAND CLASSES---------------------------------------------------- // Operand Classes are groups of operands that are used to simplify -// instruction definitions by not requiring the AD writer to specify seperate +// instruction definitions by not requiring the AD writer to specify separate // instructions for every form of operand when the instruction accepts // multiple operand types with the same basic encoding and format. The classic // case of this is memory operands. @@ -5286,55 +5286,91 @@ ins_cost(MEMORY_REF_COST); size(4); - format %{ "LDSB $mem,$dst" %} + format %{ "LDSB $mem,$dst\t! byte" %} + opcode(Assembler::ldsb_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(iload_mask_mem); +%} + +// Load Byte (8bit signed) into a Long Register +instruct loadB2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDSB $mem,$dst\t! byte -> long" %} opcode(Assembler::ldsb_op3); ins_encode(simple_form3_mem_reg( mem, dst ) ); ins_pipe(iload_mask_mem); %} -// Load Byte (8bit UNsigned) into an int reg -instruct loadUB(iRegI dst, memory mem, immI_255 bytemask) %{ - match(Set dst (AndI (LoadB mem) bytemask)); +// Load Unsigned Byte (8bit UNsigned) into an int reg +instruct loadUB(iRegI dst, memory mem) %{ + match(Set dst (LoadUB mem)); ins_cost(MEMORY_REF_COST); size(4); - format %{ "LDUB $mem,$dst" %} + format %{ "LDUB $mem,$dst\t! ubyte" %} + opcode(Assembler::ldub_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Byte (8bit UNsigned) into a Long Register +instruct loadUB2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUB $mem,$dst\t! ubyte -> long" %} opcode(Assembler::ldub_op3); ins_encode(simple_form3_mem_reg( mem, dst ) ); ins_pipe(iload_mask_mem); %} -// Load Byte (8bit UNsigned) into a Long Register -instruct loadUBL(iRegL dst, memory mem, immL_FF bytemask) %{ - match(Set dst (AndL (ConvI2L (LoadB mem)) bytemask)); +// Load Short (16bit signed) +instruct loadS(iRegI dst, memory mem) %{ + match(Set dst (LoadS mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDSH $mem,$dst\t! short" %} + opcode(Assembler::ldsh_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(iload_mask_mem); +%} + +// Load Short (16bit signed) into a Long Register +instruct loadS2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadS mem))); ins_cost(MEMORY_REF_COST); size(4); - format %{ "LDUB $mem,$dst" %} - opcode(Assembler::ldub_op3); + format %{ "LDSH $mem,$dst\t! short -> long" %} + opcode(Assembler::ldsh_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) +instruct loadUS(iRegI dst, memory mem) %{ + match(Set dst (LoadUS mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUH $mem,$dst\t! ushort/char" %} + opcode(Assembler::lduh_op3); ins_encode(simple_form3_mem_reg( mem, dst ) ); ins_pipe(iload_mask_mem); %} // Load Unsigned Short/Char (16bit UNsigned) into a Long Register -instruct loadUS2L(iRegL dst, memory mem, immL_FFFF bytemask) %{ - match(Set dst (AndL (ConvI2L (LoadUS mem)) bytemask)); +instruct loadUS2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); ins_cost(MEMORY_REF_COST); size(4); - format %{ "LDUH $mem,$dst" %} - opcode(Assembler::lduh_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(iload_mask_mem); -%} - -// Load Unsigned Short/Char (16bit unsigned) -instruct loadUS(iRegI dst, memory mem) %{ - match(Set dst (LoadUS mem)); - ins_cost(MEMORY_REF_COST); - - size(4); - format %{ "LDUH $mem,$dst" %} + format %{ "LDUH $mem,$dst\t! ushort/char -> long" %} opcode(Assembler::lduh_op3); ins_encode(simple_form3_mem_reg( mem, dst ) ); ins_pipe(iload_mask_mem); @@ -5344,9 +5380,33 @@ instruct loadI(iRegI dst, memory mem) %{ match(Set dst (LoadI mem)); ins_cost(MEMORY_REF_COST); - size(4); - - format %{ "LDUW $mem,$dst" %} + + size(4); + format %{ "LDUW $mem,$dst\t! int" %} + opcode(Assembler::lduw_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(iload_mem); +%} + +// Load Integer into a Long Register +instruct loadI2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDSW $mem,$dst\t! int -> long" %} + opcode(Assembler::ldsw_op3); + ins_encode(simple_form3_mem_reg( mem, dst ) ); + ins_pipe(iload_mem); +%} + +// Load Unsigned Integer into a Long Register +instruct loadUI2L(iRegL dst, memory mem) %{ + match(Set dst (LoadUI2L mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDUW $mem,$dst\t! uint -> long" %} opcode(Assembler::lduw_op3); ins_encode(simple_form3_mem_reg( mem, dst ) ); ins_pipe(iload_mem); @@ -5356,6 +5416,7 @@ instruct loadL(iRegL dst, memory mem ) %{ match(Set dst (LoadL mem)); ins_cost(MEMORY_REF_COST); + size(4); format %{ "LDX $mem,$dst\t! long" %} opcode(Assembler::ldx_op3); @@ -5471,13 +5532,11 @@ format %{ "LDUW $mem,$dst\t! compressed ptr" %} ins_encode %{ - Register base = as_Register($mem$$base); - Register index = as_Register($mem$$index); - Register dst = $dst$$Register; + Register index = $mem$$index$$Register; if (index != G0) { - __ lduw(base, index, dst); + __ lduw($mem$$base$$Register, index, $dst$$Register); } else { - __ lduw(base, $mem$$disp, dst); + __ lduw($mem$$base$$Register, $mem$$disp, $dst$$Register); } %} ins_pipe(iload_mem); @@ -5521,18 +5580,6 @@ ins_pipe(iload_mem); %} -// Load Short (16bit signed) -instruct loadS(iRegI dst, memory mem) %{ - match(Set dst (LoadS mem)); - ins_cost(MEMORY_REF_COST); - - size(4); - format %{ "LDSH $mem,$dst" %} - opcode(Assembler::ldsh_op3); - ins_encode(simple_form3_mem_reg( mem, dst ) ); - ins_pipe(iload_mask_mem); -%} - // Load Double instruct loadD(regD dst, memory mem) %{ match(Set dst (LoadD mem)); @@ -6847,7 +6894,7 @@ ins_pipe(sdiv_reg_reg); %} -// Magic constant, reciprical of 10 +// Magic constant, reciprocal of 10 instruct loadConI_x66666667(iRegIsafe dst) %{ effect( DEF dst ); @@ -6857,7 +6904,7 @@ ins_pipe(ialu_hi_lo_reg); %} -// Register Shift Right Arithmatic Long by 32-63 +// Register Shift Right Arithmetic Long by 32-63 instruct sra_31( iRegI dst, iRegI src ) %{ effect( DEF dst, USE src ); format %{ "SRA $src,31,$dst\t! Used in div-by-10" %} @@ -9048,7 +9095,7 @@ // These must follow all instruction definitions as they use the names // defined in the instructions definitions. // -// peepmatch ( root_instr_name [preceeding_instruction]* ); +// peepmatch ( root_instr_name [preceding_instruction]* ); // // peepconstraint %{ // (instruction_number.operand_name relational_op instruction_number.operand_name diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/sparc/vm/templateTable_sparc.cpp --- a/src/cpu/sparc/vm/templateTable_sparc.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/sparc/vm/templateTable_sparc.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1545,7 +1545,7 @@ // Handle all the JSR stuff here, then exit. // It's much shorter and cleaner than intermingling with the - // non-JSR normal-branch stuff occuring below. + // non-JSR normal-branch stuff occurring below. if( is_jsr ) { // compute return address as bci in Otos_i __ ld_ptr(Address(Lmethod, 0, in_bytes(methodOopDesc::const_offset())), G3_scratch); @@ -3079,7 +3079,7 @@ Label ok; // Check that entry is non-null. Null entries are probably a bytecode - // problem. If the interface isn't implemented by the reciever class, + // problem. If the interface isn't implemented by the receiver class, // the VM should throw IncompatibleClassChangeError. linkResolver checks // this too but that's only if the entry isn't already resolved, so we // need to check again. diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/sparc/vm/vtableStubs_sparc.cpp --- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -106,6 +106,15 @@ __ delayed()->nop(); masm->flush(); + + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d", + vtable_index, s->entry_point(), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + s->set_exception_points(npe_addr, ame_addr); return s; } @@ -113,9 +122,9 @@ // NOTE: %%%% if any change is made to this stub make sure that the function // pd_code_size_limit is changed to ensure the correct size for VtableStub -VtableStub* VtableStubs::create_itable_stub(int vtable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index) { const int sparc_code_length = VtableStub::pd_code_size_limit(false); - VtableStub* s = new(sparc_code_length) VtableStub(false, vtable_index); + VtableStub* s = new(sparc_code_length) VtableStub(false, itable_index); ResourceMark rm; CodeBuffer cb(s->entry_point(), sparc_code_length); MacroAssembler* masm = new MacroAssembler(&cb); @@ -139,7 +148,6 @@ // are passed in the %o registers. Instead, longs are passed in G1 and G4 // and so those registers are not available here. __ save(SP,-frame::register_save_words*wordSize,SP); - Register I0_receiver = I0; // Location of receiver after save #ifndef PRODUCT if (CountCompiledCalls) { @@ -151,63 +159,31 @@ } #endif /* PRODUCT */ - // load start of itable entries into L0 register - const int base = instanceKlass::vtable_start_offset() * wordSize; - __ ld(Address(G3_klassOop, 0, instanceKlass::vtable_length_offset() * wordSize), L0); - - // %%% Could store the aligned, prescaled offset in the klassoop. - __ sll(L0, exact_log2(vtableEntry::size() * wordSize), L0); - // see code for instanceKlass::start_of_itable! - const int vtable_alignment = align_object_offset(1); - assert(vtable_alignment == 1 || vtable_alignment == 2, ""); - const int odd_bit = vtableEntry::size() * wordSize; - if (vtable_alignment == 2) { - __ and3(L0, odd_bit, L1); // isolate the odd bit - } - __ add(G3_klassOop, L0, L0); - if (vtable_alignment == 2) { - __ add(L0, L1, L0); // double the odd bit, to align up - } + Label throw_icce; - // Loop over all itable entries until desired interfaceOop (G5_interface) found - __ bind(search); - - // %%%% Could load both offset and interface in one ldx, if they were - // in the opposite order. This would save a load. - __ ld_ptr(L0, base + itableOffsetEntry::interface_offset_in_bytes(), L1); - - // If the entry is NULL then we've reached the end of the table - // without finding the expected interface, so throw an exception - Label throw_icce; - __ bpr(Assembler::rc_z, false, Assembler::pn, L1, throw_icce); - __ delayed()->cmp(G5_interface, L1); - __ brx(Assembler::notEqual, true, Assembler::pn, search); - __ delayed()->add(L0, itableOffsetEntry::size() * wordSize, L0); - - // entry found and L0 points to it, move offset of vtable for interface into L0 - __ ld(L0, base + itableOffsetEntry::offset_offset_in_bytes(), L0); - - // Compute itableMethodEntry and get methodOop(G5_method) and entrypoint(L0) for compiler - const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) + itableMethodEntry::method_offset_in_bytes(); - __ add(G3_klassOop, L0, L1); - __ ld_ptr(L1, method_offset, G5_method); + Register L5_method = L5; + __ lookup_interface_method(// inputs: rec. class, interface, itable index + G3_klassOop, G5_interface, itable_index, + // outputs: method, scan temp. reg + L5_method, L2, L3, + throw_icce); #ifndef PRODUCT if (DebugVtables) { Label L01; - __ ld_ptr(L1, method_offset, G5_method); - __ bpr(Assembler::rc_nz, false, Assembler::pt, G5_method, L01); + __ bpr(Assembler::rc_nz, false, Assembler::pt, L5_method, L01); __ delayed()->nop(); __ stop("methodOop is null"); __ bind(L01); - __ verify_oop(G5_method); + __ verify_oop(L5_method); } #endif // If the following load is through a NULL pointer, we'll take an OS // exception that should translate into an AbstractMethodError. We need the // window count to be correct at that time. - __ restore(); // Restore registers BEFORE the AME point + __ restore(L5_method, 0, G5_method); + // Restore registers *before* the AME point. address ame_addr = __ pc(); // if the vtable entry is null, the method is abstract __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3_scratch); @@ -225,6 +201,12 @@ masm->flush(); + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d", + itable_index, s->entry_point(), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); s->set_exception_points(npe_addr, ame_addr); @@ -243,8 +225,7 @@ (UseCompressedOops ? 2*BytesPerInstWord : 0); return basic + slop; } else { - // save, ld, ld, sll, and, add, add, ld, cmp, br, add, ld, add, ld, ld, jmp, restore, sethi, jmpl, restore - const int basic = (20 LP64_ONLY(+ 6)) * BytesPerInstWord + + const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord + // shift;add for load_klass (UseCompressedOops ? 2*BytesPerInstWord : 0); return (basic + slop); diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -129,13 +129,19 @@ // Convert the raw encoding form into the form expected by the constructor for // Address. An index of 4 (rsp) corresponds to having no index, so convert // that to noreg for the Address constructor. -Address Address::make_raw(int base, int index, int scale, int disp) { +Address Address::make_raw(int base, int index, int scale, int disp, bool disp_is_oop) { + RelocationHolder rspec; + if (disp_is_oop) { + rspec = Relocation::spec_simple(relocInfo::oop_type); + } bool valid_index = index != rsp->encoding(); if (valid_index) { Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp)); + madr._rspec = rspec; return madr; } else { Address madr(as_Register(base), noreg, Address::no_scale, in_ByteSize(disp)); + madr._rspec = rspec; return madr; } } @@ -3892,6 +3898,21 @@ emit_operand(src, dst); } +void Assembler::movsbq(Register dst, Address src) { + InstructionMark im(this); + prefixq(src, dst); + emit_byte(0x0F); + emit_byte(0xBE); + emit_operand(dst, src); +} + +void Assembler::movsbq(Register dst, Register src) { + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBE); + emit_byte(0xC0 | encode); +} + void Assembler::movslq(Register dst, int32_t imm32) { // dbx shows movslq(rcx, 3) as movq $0x0000000049000000,(%rbx) // and movslq(r8, 3); as movl $0x0000000048000000,(%rbx) @@ -3925,6 +3946,51 @@ emit_byte(0xC0 | encode); } +void Assembler::movswq(Register dst, Address src) { + InstructionMark im(this); + prefixq(src, dst); + emit_byte(0x0F); + emit_byte(0xBF); + emit_operand(dst, src); +} + +void Assembler::movswq(Register dst, Register src) { + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xBF); + emit_byte(0xC0 | encode); +} + +void Assembler::movzbq(Register dst, Address src) { + InstructionMark im(this); + prefixq(src, dst); + emit_byte(0x0F); + emit_byte(0xB6); + emit_operand(dst, src); +} + +void Assembler::movzbq(Register dst, Register src) { + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xB6); + emit_byte(0xC0 | encode); +} + +void Assembler::movzwq(Register dst, Address src) { + InstructionMark im(this); + prefixq(src, dst); + emit_byte(0x0F); + emit_byte(0xB7); + emit_operand(dst, src); +} + +void Assembler::movzwq(Register dst, Register src) { + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xB7); + emit_byte(0xC0 | encode); +} + void Assembler::negq(Register dst) { int encode = prefixq_and_encode(dst->encoding()); emit_byte(0xF7); @@ -6197,8 +6263,11 @@ return off; } -// word => int32 which seems bad for 64bit -int MacroAssembler::load_signed_word(Register dst, Address src) { +// Note: load_signed_short used to be called load_signed_word. +// Although the 'w' in x86 opcodes refers to the term "word" in the assembler +// manual, which means 16 bits, that usage is found nowhere in HotSpot code. +// The term "word" in HotSpot means a 32- or 64-bit machine word. +int MacroAssembler::load_signed_short(Register dst, Address src) { int off; if (LP64_ONLY(true ||) VM_Version::is_P6()) { // This is dubious to me since it seems safe to do a signed 16 => 64 bit @@ -6207,7 +6276,7 @@ off = offset(); movswl(dst, src); // movsxw } else { - off = load_unsigned_word(dst, src); + off = load_unsigned_short(dst, src); shll(dst, 16); sarl(dst, 16); } @@ -6229,7 +6298,8 @@ return off; } -int MacroAssembler::load_unsigned_word(Register dst, Address src) { +// Note: load_unsigned_short used to be called load_unsigned_word. +int MacroAssembler::load_unsigned_short(Register dst, Address src) { // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, // and "3.9 Partial Register Penalties", p. 22). int off; @@ -6244,6 +6314,28 @@ return off; } +void MacroAssembler::load_sized_value(Register dst, Address src, + int size_in_bytes, bool is_signed) { + switch (size_in_bytes ^ (is_signed ? -1 : 0)) { +#ifndef _LP64 + // For case 8, caller is responsible for manually loading + // the second word into another register. + case ~8: // fall through: + case 8: movl( dst, src ); break; +#else + case ~8: // fall through: + case 8: movq( dst, src ); break; +#endif + case ~4: // fall through: + case 4: movl( dst, src ); break; + case ~2: load_signed_short( dst, src ); break; + case 2: load_unsigned_short( dst, src ); break; + case ~1: load_signed_byte( dst, src ); break; + case 1: load_unsigned_byte( dst, src ); break; + default: ShouldNotReachHere(); + } +} + void MacroAssembler::mov32(AddressLiteral dst, Register src) { if (reachable(dst)) { movl(as_Address(dst), src); @@ -6463,7 +6555,8 @@ Address index(noreg, tmp, Address::times_1); ExternalAddress page(os::get_memory_serialize_page()); - movptr(ArrayAddress(page, index), tmp); + // Size of store must match masking code above + movl(as_Address(ArrayAddress(page, index)), tmp); } // Calls to C land @@ -7049,6 +7142,81 @@ } +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterConstant itable_index, + Register method_result, + Register scan_temp, + Label& L_no_such_interface) { + assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = instanceKlass::vtable_start_offset() * wordSize; + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size() * wordSize; + Address::ScaleFactor times_vte_scale = Address::times_ptr; + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + movl(scan_temp, Address(recv_klass, instanceKlass::vtable_length_offset() * wordSize)); + + // %%% Could store the aligned, prescaled offset in the klassoop. + lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); + if (HeapWordsPerLong > 1) { + // Round up to align_object_offset boundary + // see code for instanceKlass::start_of_itable! + round_to(scan_temp, BytesPerLong); + } + + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); + + // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { + // if (scan->interface() == intf) { + // result = (klass + scan->offset() + itable_index); + // } + // } + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + cmpptr(intf_klass, method_result); + + if (peel) { + jccb(Assembler::equal, found_method); + } else { + jccb(Assembler::notEqual, search); + // (invert the test to fall through to found_method...) + } + + if (!peel) break; + + bind(search); + + // Check that the previous entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + testptr(method_result, method_result); + jcc(Assembler::zero, L_no_such_interface); + addptr(scan_temp, scan_step); + } + + bind(found_method); + + // Got a hit. + movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); + movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); +} + + void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { ucomisd(dst, as_Address(src)); } @@ -7094,6 +7262,31 @@ } +RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterConstant(value + offset); + + // load indirectly to solve generation ordering problem + movptr(tmp, ExternalAddress((address) delayed_value_addr)); + +#ifdef ASSERT + Label L; + testl(tmp, tmp); + jccb(Assembler::notZero, L); + hlt(); + bind(L); +#endif + + if (offset != 0) + addptr(tmp, offset); + + return RegisterConstant(tmp); +} + + void MacroAssembler::verify_oop_addr(Address addr, const char* s) { if (!VerifyOops) return; diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/assembler_x86.hpp --- a/src/cpu/x86/vm/assembler_x86.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/assembler_x86.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -153,6 +153,21 @@ times_8 = 3, times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4) }; + static ScaleFactor times(int size) { + assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); + if (size == 8) return times_8; + if (size == 4) return times_4; + if (size == 2) return times_2; + return times_1; + } + static int scale_size(ScaleFactor scale) { + assert(scale != no_scale, ""); + assert(((1 << (int)times_1) == 1 && + (1 << (int)times_2) == 2 && + (1 << (int)times_4) == 4 && + (1 << (int)times_8) == 8), ""); + return (1 << (int)scale); + } private: Register _base; @@ -197,6 +212,22 @@ "inconsistent address"); } + Address(Register base, RegisterConstant index, ScaleFactor scale = times_1, int disp = 0) + : _base (base), + _index(index.register_or_noreg()), + _scale(scale), + _disp (disp + (index.constant_or_zero() * scale_size(scale))) { + if (!index.is_register()) scale = Address::no_scale; + assert(!_index->is_valid() == (scale == Address::no_scale), + "inconsistent address"); + } + + Address plus_disp(int disp) const { + Address a = (*this); + a._disp += disp; + return a; + } + // The following two overloads are used in connection with the // ByteSize type (see sizes.hpp). They simplify the use of // ByteSize'd arguments in assembly code. Note that their equivalent @@ -224,6 +255,17 @@ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); } + + Address(Register base, RegisterConstant index, ScaleFactor scale, ByteSize disp) + : _base (base), + _index(index.register_or_noreg()), + _scale(scale), + _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))) { + if (!index.is_register()) scale = Address::no_scale; + assert(!_index->is_valid() == (scale == Address::no_scale), + "inconsistent address"); + } + #endif // ASSERT // accessors @@ -236,11 +278,10 @@ // Convert the raw encoding form into the form expected by the constructor for // Address. An index of 4 (rsp) corresponds to having no index, so convert // that to noreg for the Address constructor. - static Address make_raw(int base, int index, int scale, int disp); + static Address make_raw(int base, int index, int scale, int disp, bool disp_is_oop); static Address make_array(ArrayAddress); - private: bool base_needs_rex() const { return _base != noreg && _base->encoding() >= 8; @@ -1097,6 +1138,9 @@ void movsbl(Register dst, Register src); #ifdef _LP64 + void movsbq(Register dst, Address src); + void movsbq(Register dst, Register src); + // Move signed 32bit immediate to 64bit extending sign void movslq(Address dst, int32_t imm64); void movslq(Register dst, int32_t imm64); @@ -1109,6 +1153,11 @@ void movswl(Register dst, Address src); void movswl(Register dst, Register src); +#ifdef _LP64 + void movswq(Register dst, Address src); + void movswq(Register dst, Register src); +#endif + void movw(Address dst, int imm16); void movw(Register dst, Address src); void movw(Address dst, Register src); @@ -1116,9 +1165,19 @@ void movzbl(Register dst, Address src); void movzbl(Register dst, Register src); +#ifdef _LP64 + void movzbq(Register dst, Address src); + void movzbq(Register dst, Register src); +#endif + void movzwl(Register dst, Address src); void movzwl(Register dst, Register src); +#ifdef _LP64 + void movzwq(Register dst, Address src); + void movzwq(Register dst, Register src); +#endif + void mull(Address src); void mull(Register src); @@ -1393,17 +1452,20 @@ // The following 4 methods return the offset of the appropriate move instruction - // Support for fast byte/word loading with zero extension (depending on particular CPU) + // Support for fast byte/short loading with zero extension (depending on particular CPU) int load_unsigned_byte(Register dst, Address src); - int load_unsigned_word(Register dst, Address src); - - // Support for fast byte/word loading with sign extension (depending on particular CPU) + int load_unsigned_short(Register dst, Address src); + + // Support for fast byte/short loading with sign extension (depending on particular CPU) int load_signed_byte(Register dst, Address src); - int load_signed_word(Register dst, Address src); + int load_signed_short(Register dst, Address src); // Support for sign-extension (hi:lo = extend_sign(lo)) void extend_sign(Register hi, Register lo); + // Loading values by size and signed-ness + void load_sized_value(Register dst, Address src, int size_in_bytes, bool is_signed); + // Support for inc/dec with optimal instruction selection depending on value void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } @@ -1721,6 +1783,14 @@ ); void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterConstant itable_index, + Register method_result, + Register scan_temp, + Label& no_such_interface); + //---- void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 @@ -1763,6 +1833,10 @@ // stack overflow + shadow pages. Also, clobbers tmp void bang_stack_size(Register size, Register tmp); + virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, + Register tmp, + int offset); + // Support for serializing memory accesses between threads void serialize_memory(Register thread, Register tmp); diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -554,8 +554,8 @@ __ jcc (Assembler::zero, noLoop); // compare first characters - __ load_unsigned_word(rcx, Address(rdi, 0)); - __ load_unsigned_word(rbx, Address(rsi, 0)); + __ load_unsigned_short(rcx, Address(rdi, 0)); + __ load_unsigned_short(rbx, Address(rsi, 0)); __ subl(rcx, rbx); __ jcc(Assembler::notZero, haveResult); // starting loop @@ -574,8 +574,8 @@ Label loop; __ align(wordSize); __ bind(loop); - __ load_unsigned_word(rcx, Address(rdi, rax, Address::times_2, 0)); - __ load_unsigned_word(rbx, Address(rsi, rax, Address::times_2, 0)); + __ load_unsigned_short(rcx, Address(rdi, rax, Address::times_2, 0)); + __ load_unsigned_short(rbx, Address(rsi, rax, Address::times_2, 0)); __ subl(rcx, rbx); __ jcc(Assembler::notZero, haveResult); __ increment(rax); diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -501,7 +501,7 @@ LIRItem right(x->y(), this); left.load_item(); - // dont load constants to save register + // don't load constants to save register right.load_nonconstant(); rlock_result(x); arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/cppInterpreter_x86.cpp --- a/src/cpu/x86/vm/cppInterpreter_x86.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -513,7 +513,7 @@ // compute full expression stack limit const Address size_of_stack (rbx, methodOopDesc::max_stack_offset()); - __ load_unsigned_word(rdx, size_of_stack); // get size of expression stack in words + __ load_unsigned_short(rdx, size_of_stack); // get size of expression stack in words __ negptr(rdx); // so we can subtract in next step // Allocate expression stack __ lea(rsp, Address(rsp, rdx, Address::times_ptr)); @@ -523,7 +523,7 @@ #ifdef _LP64 // Make sure stack is properly aligned and sized for the abi __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows - __ andptr(rsp, -16); // must be 16 byte boundry (see amd64 ABI) + __ andptr(rsp, -16); // must be 16 byte boundary (see amd64 ABI) #endif // _LP64 @@ -659,7 +659,7 @@ // Always give one monitor to allow us to start interp if sync method. // Any additional monitors need a check when moving the expression stack const int one_monitor = frame::interpreter_frame_monitor_size() * wordSize; - __ load_unsigned_word(rax, size_of_stack); // get size of expression stack in words + __ load_unsigned_short(rax, size_of_stack); // get size of expression stack in words __ lea(rax, Address(noreg, rax, Interpreter::stackElementScale(), one_monitor)); __ lea(rax, Address(rax, rdx, Interpreter::stackElementScale(), overhead_size)); @@ -863,13 +863,13 @@ __ bind(notByte); __ cmpl(rdx, stos); __ jcc(Assembler::notEqual, notShort); - __ load_signed_word(rax, field_address); + __ load_signed_short(rax, field_address); __ jmp(xreturn_path); __ bind(notShort); __ cmpl(rdx, ctos); __ jcc(Assembler::notEqual, notChar); - __ load_unsigned_word(rax, field_address); + __ load_unsigned_short(rax, field_address); __ jmp(xreturn_path); __ bind(notChar); @@ -937,7 +937,7 @@ const Register locals = rdi; // get parameter size (always needed) - __ load_unsigned_word(rcx, size_of_parameters); + __ load_unsigned_short(rcx, size_of_parameters); // rbx: methodOop // rcx: size of parameters @@ -970,7 +970,7 @@ #ifdef _LP64 // duplicate the alignment rsp got after setting stack_base __ subptr(rax, frame::arg_reg_save_area_bytes); // windows - __ andptr(rax, -16); // must be 16 byte boundry (see amd64 ABI) + __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI) #endif // _LP64 __ cmpptr(rax, rsp); __ jcc(Assembler::equal, L); @@ -1062,12 +1062,12 @@ // allocate space for parameters __ movptr(method, STATE(_method)); __ verify_oop(method); - __ load_unsigned_word(t, Address(method, methodOopDesc::size_of_parameters_offset())); + __ load_unsigned_short(t, Address(method, methodOopDesc::size_of_parameters_offset())); __ shll(t, 2); #ifdef _LP64 __ subptr(rsp, t); __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows - __ andptr(rsp, -16); // must be 16 byte boundry (see amd64 ABI) + __ andptr(rsp, -16); // must be 16 byte boundary (see amd64 ABI) #else __ addptr(t, 2*wordSize); // allocate two more slots for JNIEnv and possible mirror __ subptr(rsp, t); @@ -1659,11 +1659,11 @@ // const Address monitor(rbp, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); // get parameter size (always needed) - __ load_unsigned_word(rcx, size_of_parameters); + __ load_unsigned_short(rcx, size_of_parameters); // rbx: methodOop // rcx: size of parameters - __ load_unsigned_word(rdx, size_of_locals); // get size of locals in words + __ load_unsigned_short(rdx, size_of_locals); // get size of locals in words __ subptr(rdx, rcx); // rdx = no. of additional locals @@ -1949,7 +1949,7 @@ __ movptr(rbx, STATE(_result._to_call._callee)); // callee left args on top of expression stack, remove them - __ load_unsigned_word(rcx, Address(rbx, methodOopDesc::size_of_parameters_offset())); + __ load_unsigned_short(rcx, Address(rbx, methodOopDesc::size_of_parameters_offset())); __ lea(rsp, Address(rsp, rcx, Address::times_ptr)); __ movl(rcx, Address(rbx, methodOopDesc::result_index_offset())); @@ -2119,7 +2119,7 @@ // Make it look like call_stub calling conventions // Get (potential) receiver - __ load_unsigned_word(rcx, size_of_parameters); // get size of parameters in words + __ load_unsigned_short(rcx, size_of_parameters); // get size of parameters in words ExternalAddress recursive(CAST_FROM_FN_PTR(address, RecursiveInterpreterActivation)); __ pushptr(recursive.addr()); // make it look good in the debugger diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/interp_masm_x86_32.cpp --- a/src/cpu/x86/vm/interp_masm_x86_32.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -192,7 +192,7 @@ void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset) { assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); assert(cache != index, "must use different registers"); - load_unsigned_word(index, Address(rsi, bcp_offset)); + load_unsigned_short(index, Address(rsi, bcp_offset)); movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize)); assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); shlptr(index, 2); // convert from field index to ConstantPoolCacheEntry index @@ -202,7 +202,7 @@ void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset) { assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); assert(cache != tmp, "must use different register"); - load_unsigned_word(tmp, Address(rsi, bcp_offset)); + load_unsigned_short(tmp, Address(rsi, bcp_offset)); assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); // convert from field index to ConstantPoolCacheEntry index // and from word offset to byte offset @@ -1031,7 +1031,7 @@ // If the mdp is valid, it will point to a DataLayout header which is // consistent with the bcp. The converse is highly probable also. - load_unsigned_word(rdx, Address(rcx, in_bytes(DataLayout::bci_offset()))); + load_unsigned_short(rdx, Address(rcx, in_bytes(DataLayout::bci_offset()))); addptr(rdx, Address(rbx, methodOopDesc::const_offset())); lea(rdx, Address(rdx, constMethodOopDesc::codes_offset())); cmpptr(rdx, rsi); @@ -1512,6 +1512,15 @@ call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), rcx, rbx); } + + // RedefineClasses() tracing support for obsolete method entry + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { + get_thread(rcx); + get_method(rbx); + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + rcx, rbx); + } } diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/interp_masm_x86_64.cpp --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -190,7 +190,7 @@ int bcp_offset) { assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); assert(cache != index, "must use different registers"); - load_unsigned_word(index, Address(r13, bcp_offset)); + load_unsigned_short(index, Address(r13, bcp_offset)); movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize)); assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); // convert from field index to ConstantPoolCacheEntry index @@ -203,7 +203,7 @@ int bcp_offset) { assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); assert(cache != tmp, "must use different register"); - load_unsigned_word(tmp, Address(r13, bcp_offset)); + load_unsigned_short(tmp, Address(r13, bcp_offset)); assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); // convert from field index to ConstantPoolCacheEntry index // and from word offset to byte offset @@ -1063,8 +1063,8 @@ // If the mdp is valid, it will point to a DataLayout header which is // consistent with the bcp. The converse is highly probable also. - load_unsigned_word(c_rarg2, - Address(c_rarg3, in_bytes(DataLayout::bci_offset()))); + load_unsigned_short(c_rarg2, + Address(c_rarg3, in_bytes(DataLayout::bci_offset()))); addptr(c_rarg2, Address(rbx, methodOopDesc::const_offset())); lea(c_rarg2, Address(c_rarg2, constMethodOopDesc::codes_offset())); cmpptr(c_rarg2, r13); @@ -1593,6 +1593,14 @@ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), r15_thread, c_rarg1); } + + // RedefineClasses() tracing support for obsolete method entry + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { + get_method(c_rarg1); + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + r15_thread, c_rarg1); + } } diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/sharedRuntime_x86_32.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1534,6 +1534,13 @@ thread, rax); } + // RedefineClasses() tracing support for obsolete method entry + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { + __ movoop(rax, JNIHandles::make_local(method())); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + thread, rax); + } // These are register definitions we need for locking/unlocking const Register swap_reg = rax; // Must use rax, for cmpxchg instruction diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/sharedRuntime_x86_64.cpp --- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1350,7 +1350,7 @@ { Label L; __ mov(rax, rsp); - __ andptr(rax, -16); // must be 16 byte boundry (see amd64 ABI) + __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI) __ cmpptr(rax, rsp); __ jcc(Assembler::equal, L); __ stop("improperly aligned stack"); @@ -1508,6 +1508,17 @@ restore_args(masm, total_c_args, c_arg, out_regs); } + // RedefineClasses() tracing support for obsolete method entry + if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + __ movoop(c_rarg1, JNIHandles::make_local(method())); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + r15_thread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + } + // Lock a synchronized method // Register definitions used by locking and unlocking diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/templateInterpreter_x86_32.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -662,13 +662,13 @@ __ bind(notByte); __ cmpl(rdx, stos); __ jcc(Assembler::notEqual, notShort); - __ load_signed_word(rax, field_address); + __ load_signed_short(rax, field_address); __ jmp(xreturn_path); __ bind(notShort); __ cmpl(rdx, ctos); __ jcc(Assembler::notEqual, notChar); - __ load_unsigned_word(rax, field_address); + __ load_unsigned_short(rax, field_address); __ jmp(xreturn_path); __ bind(notChar); @@ -723,7 +723,7 @@ const Address access_flags (rbx, methodOopDesc::access_flags_offset()); // get parameter size (always needed) - __ load_unsigned_word(rcx, size_of_parameters); + __ load_unsigned_short(rcx, size_of_parameters); // native calls don't need the stack size check since they have no expression stack // and the arguments are already on the stack and we only add a handful of words @@ -838,7 +838,7 @@ // allocate space for parameters __ get_method(method); __ verify_oop(method); - __ load_unsigned_word(t, Address(method, methodOopDesc::size_of_parameters_offset())); + __ load_unsigned_short(t, Address(method, methodOopDesc::size_of_parameters_offset())); __ shlptr(t, Interpreter::logStackElementSize()); __ addptr(t, 2*wordSize); // allocate two more slots for JNIEnv and possible mirror __ subptr(rsp, t); @@ -1155,14 +1155,14 @@ const Address access_flags (rbx, methodOopDesc::access_flags_offset()); // get parameter size (always needed) - __ load_unsigned_word(rcx, size_of_parameters); + __ load_unsigned_short(rcx, size_of_parameters); // rbx,: methodOop // rcx: size of parameters // rsi: sender_sp (could differ from sp+wordSize if we were called via c2i ) - __ load_unsigned_word(rdx, size_of_locals); // get size of locals in words + __ load_unsigned_short(rdx, size_of_locals); // get size of locals in words __ subl(rdx, rcx); // rdx = no. of additional locals // see if we've got enough room on the stack for locals plus overhead. @@ -1558,7 +1558,7 @@ // Compute size of arguments for saving when returning to deoptimized caller __ get_method(rax); __ verify_oop(rax); - __ load_unsigned_word(rax, Address(rax, in_bytes(methodOopDesc::size_of_parameters_offset()))); + __ load_unsigned_short(rax, Address(rax, in_bytes(methodOopDesc::size_of_parameters_offset()))); __ shlptr(rax, Interpreter::logStackElementSize()); __ restore_locals(); __ subptr(rdi, rax); diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/templateInterpreter_x86_64.cpp --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -650,7 +650,7 @@ __ cmpl(rdx, stos); __ jcc(Assembler::notEqual, notShort); // stos - __ load_signed_word(rax, field_address); + __ load_signed_short(rax, field_address); __ jmp(xreturn_path); __ bind(notShort); @@ -662,7 +662,7 @@ __ bind(okay); #endif // ctos - __ load_unsigned_word(rax, field_address); + __ load_unsigned_short(rax, field_address); __ bind(xreturn_path); @@ -702,7 +702,7 @@ const Address access_flags (rbx, methodOopDesc::access_flags_offset()); // get parameter size (always needed) - __ load_unsigned_word(rcx, size_of_parameters); + __ load_unsigned_short(rcx, size_of_parameters); // native calls don't need the stack size check since they have no // expression stack and the arguments are already on the stack and @@ -819,14 +819,14 @@ // allocate space for parameters __ get_method(method); __ verify_oop(method); - __ load_unsigned_word(t, - Address(method, - methodOopDesc::size_of_parameters_offset())); + __ load_unsigned_short(t, + Address(method, + methodOopDesc::size_of_parameters_offset())); __ shll(t, Interpreter::logStackElementSize()); __ subptr(rsp, t); __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows - __ andptr(rsp, -16); // must be 16 byte boundry (see amd64 ABI) + __ andptr(rsp, -16); // must be 16 byte boundary (see amd64 ABI) // get signature handler { @@ -1165,13 +1165,13 @@ const Address access_flags(rbx, methodOopDesc::access_flags_offset()); // get parameter size (always needed) - __ load_unsigned_word(rcx, size_of_parameters); + __ load_unsigned_short(rcx, size_of_parameters); // rbx: methodOop // rcx: size of parameters // r13: sender_sp (could differ from sp+wordSize if we were called via c2i ) - __ load_unsigned_word(rdx, size_of_locals); // get size of locals in words + __ load_unsigned_short(rdx, size_of_locals); // get size of locals in words __ subl(rdx, rcx); // rdx = no. of additional locals // YYY @@ -1583,7 +1583,7 @@ // Compute size of arguments for saving when returning to // deoptimized caller __ get_method(rax); - __ load_unsigned_word(rax, Address(rax, in_bytes(methodOopDesc:: + __ load_unsigned_short(rax, Address(rax, in_bytes(methodOopDesc:: size_of_parameters_offset()))); __ shll(rax, Interpreter::logStackElementSize()); __ restore_locals(); // XXX do we need this? diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/templateTable_x86_32.cpp --- a/src/cpu/x86/vm/templateTable_x86_32.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -296,7 +296,7 @@ void TemplateTable::sipush() { transition(vtos, itos); - __ load_unsigned_word(rax, at_bcp(1)); + __ load_unsigned_short(rax, at_bcp(1)); __ bswapl(rax); __ sarl(rax, 16); } @@ -662,7 +662,7 @@ index_check(rdx, rax); // kills rbx, // rax,: index // can do better code for P5 - may want to improve this at some point - __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR))); + __ load_unsigned_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR))); __ mov(rax, rbx); } @@ -677,7 +677,7 @@ // rdx: array index_check(rdx, rax); // rax,: index - __ load_unsigned_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR))); + __ load_unsigned_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR))); __ mov(rax, rbx); } @@ -687,7 +687,7 @@ index_check(rdx, rax); // kills rbx, // rax,: index // can do better code for P5 - may want to improve this at some point - __ load_signed_word(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT))); + __ load_signed_short(rbx, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT))); __ mov(rax, rbx); } @@ -1586,7 +1586,7 @@ // Handle all the JSR stuff here, then exit. // It's much shorter and cleaner than intermingling with the - // non-JSR normal-branch stuff occuring below. + // non-JSR normal-branch stuff occurring below. if (is_jsr) { // Pre-load the next target bytecode into EBX __ load_unsigned_byte(rbx, Address(rsi, rdx, Address::times_1, 0)); @@ -2310,7 +2310,7 @@ __ cmpl(flags, ctos ); __ jcc(Assembler::notEqual, notChar); - __ load_unsigned_word(rax, lo ); + __ load_unsigned_short(rax, lo ); __ push(ctos); if (!is_static) { patch_bytecode(Bytecodes::_fast_cgetfield, rcx, rbx); @@ -2322,7 +2322,7 @@ __ cmpl(flags, stos ); __ jcc(Assembler::notEqual, notShort); - __ load_signed_word(rax, lo ); + __ load_signed_short(rax, lo ); __ push(stos); if (!is_static) { patch_bytecode(Bytecodes::_fast_sgetfield, rcx, rbx); @@ -2830,8 +2830,8 @@ // access field switch (bytecode()) { case Bytecodes::_fast_bgetfield: __ movsbl(rax, lo ); break; - case Bytecodes::_fast_sgetfield: __ load_signed_word(rax, lo ); break; - case Bytecodes::_fast_cgetfield: __ load_unsigned_word(rax, lo ); break; + case Bytecodes::_fast_sgetfield: __ load_signed_short(rax, lo ); break; + case Bytecodes::_fast_cgetfield: __ load_unsigned_short(rax, lo ); break; case Bytecodes::_fast_igetfield: __ movl(rax, lo); break; case Bytecodes::_fast_lgetfield: __ stop("should not be rewritten"); break; case Bytecodes::_fast_fgetfield: __ fld_s(lo); break; @@ -3055,35 +3055,44 @@ // profile this call __ profile_virtual_call(rdx, rsi, rdi); - __ mov(rdi, rdx); // Save klassOop in rdi - - // Compute start of first itableOffsetEntry (which is at the end of the vtable) - const int base = instanceKlass::vtable_start_offset() * wordSize; - assert(vtableEntry::size() * wordSize == (1 << (int)Address::times_ptr), "adjust the scaling in the code below"); - __ movl(rsi, Address(rdx, instanceKlass::vtable_length_offset() * wordSize)); // Get length of vtable - __ lea(rdx, Address(rdx, rsi, Address::times_4, base)); - if (HeapWordsPerLong > 1) { - // Round up to align_object_offset boundary - __ round_to(rdx, BytesPerLong); - } - - Label entry, search, interface_ok; - - __ jmpb(entry); - __ bind(search); - __ addptr(rdx, itableOffsetEntry::size() * wordSize); - - __ bind(entry); - - // Check that the entry is non-null. A null entry means that the receiver - // class doesn't implement the interface, and wasn't the same as the - // receiver class checked when the interface was resolved. - __ push(rdx); - __ movptr(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes())); - __ testptr(rdx, rdx); - __ jcc(Assembler::notZero, interface_ok); + Label no_such_interface, no_such_method; + + __ lookup_interface_method(// inputs: rec. class, interface, itable index + rdx, rax, rbx, + // outputs: method, scan temp. reg + rbx, rsi, + no_such_interface); + + // rbx,: methodOop to call + // rcx: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ testptr(rbx, rbx); + __ jcc(Assembler::zero, no_such_method); + + // do the call + // rcx: receiver + // rbx,: methodOop + __ jump_from_interpreted(rbx, rdx); + __ should_not_reach_here(); + + // exception handling code follows... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); // throw exception - __ pop(rdx); // pop saved register first. + __ pop(rbx); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); // rsi must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + // throw exception __ pop(rbx); // pop return address (pushed by prepare_invoke) __ restore_bcp(); // rsi must be correct for exception handler (was destroyed) __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) @@ -3091,42 +3100,6 @@ InterpreterRuntime::throw_IncompatibleClassChangeError)); // the call_VM checks for exception, so we should never return here. __ should_not_reach_here(); - __ bind(interface_ok); - - __ pop(rdx); - - __ cmpptr(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes())); - __ jcc(Assembler::notEqual, search); - - __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes())); - __ addptr(rdx, rdi); // Add offset to klassOop - assert(itableMethodEntry::size() * wordSize == (1 << (int)Address::times_ptr), "adjust the scaling in the code below"); - __ movptr(rbx, Address(rdx, rbx, Address::times_ptr)); - // rbx,: methodOop to call - // rcx: receiver - // Check for abstract method error - // Note: This should be done more efficiently via a throw_abstract_method_error - // interpreter entry point and a conditional jump to it in case of a null - // method. - { Label L; - __ testptr(rbx, rbx); - __ jcc(Assembler::notZero, L); - // throw exception - // note: must restore interpreter registers to canonical - // state for exception handling to work correctly! - __ pop(rbx); // pop return address (pushed by prepare_invoke) - __ restore_bcp(); // rsi must be correct for exception handler (was destroyed) - __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); - // the call_VM checks for exception, so we should never return here. - __ should_not_reach_here(); - __ bind(L); - } - - // do the call - // rcx: receiver - // rbx,: methodOop - __ jump_from_interpreted(rbx, rdx); } //---------------------------------------------------------------------------------------------------- diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/templateTable_x86_64.cpp --- a/src/cpu/x86/vm/templateTable_x86_64.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -307,7 +307,7 @@ void TemplateTable::sipush() { transition(vtos, itos); - __ load_unsigned_word(rax, at_bcp(1)); + __ load_unsigned_short(rax, at_bcp(1)); __ bswapl(rax); __ sarl(rax, 16); } @@ -645,10 +645,10 @@ // eax: index // rdx: array index_check(rdx, rax); // kills rbx - __ load_unsigned_word(rax, - Address(rdx, rax, - Address::times_2, - arrayOopDesc::base_offset_in_bytes(T_CHAR))); + __ load_unsigned_short(rax, + Address(rdx, rax, + Address::times_2, + arrayOopDesc::base_offset_in_bytes(T_CHAR))); } // iload followed by caload frequent pair @@ -663,10 +663,10 @@ // rdx: array __ pop_ptr(rdx); index_check(rdx, rax); // kills rbx - __ load_unsigned_word(rax, - Address(rdx, rax, - Address::times_2, - arrayOopDesc::base_offset_in_bytes(T_CHAR))); + __ load_unsigned_short(rax, + Address(rdx, rax, + Address::times_2, + arrayOopDesc::base_offset_in_bytes(T_CHAR))); } void TemplateTable::saload() { @@ -675,10 +675,10 @@ // eax: index // rdx: array index_check(rdx, rax); // kills rbx - __ load_signed_word(rax, - Address(rdx, rax, - Address::times_2, - arrayOopDesc::base_offset_in_bytes(T_SHORT))); + __ load_signed_short(rax, + Address(rdx, rax, + Address::times_2, + arrayOopDesc::base_offset_in_bytes(T_SHORT))); } void TemplateTable::iload(int n) { @@ -1559,7 +1559,7 @@ // Handle all the JSR stuff here, then exit. // It's much shorter and cleaner than intermingling with the non-JSR - // normal-branch stuff occuring below. + // normal-branch stuff occurring below. if (is_jsr) { // Pre-load the next target bytecode into rbx __ load_unsigned_byte(rbx, Address(r13, rdx, Address::times_1, 0)); @@ -2276,7 +2276,7 @@ __ cmpl(flags, ctos); __ jcc(Assembler::notEqual, notChar); // ctos - __ load_unsigned_word(rax, field); + __ load_unsigned_short(rax, field); __ push(ctos); // Rewrite bytecode to be faster if (!is_static) { @@ -2288,7 +2288,7 @@ __ cmpl(flags, stos); __ jcc(Assembler::notEqual, notShort); // stos - __ load_signed_word(rax, field); + __ load_signed_short(rax, field); __ push(stos); // Rewrite bytecode to be faster if (!is_static) { @@ -2751,10 +2751,10 @@ __ movsbl(rax, field); break; case Bytecodes::_fast_sgetfield: - __ load_signed_word(rax, field); + __ load_signed_short(rax, field); break; case Bytecodes::_fast_cgetfield: - __ load_unsigned_word(rax, field); + __ load_unsigned_short(rax, field); break; case Bytecodes::_fast_fgetfield: __ movflt(xmm0, field); @@ -3010,97 +3010,55 @@ // profile this call __ profile_virtual_call(rdx, r13, r14); - __ mov(r14, rdx); // Save klassOop in r14 - - // Compute start of first itableOffsetEntry (which is at the end of - // the vtable) - const int base = instanceKlass::vtable_start_offset() * wordSize; - // Get length of vtable - assert(vtableEntry::size() * wordSize == 8, - "adjust the scaling in the code below"); - __ movl(r13, Address(rdx, - instanceKlass::vtable_length_offset() * wordSize)); - __ lea(rdx, Address(rdx, r13, Address::times_8, base)); - - if (HeapWordsPerLong > 1) { - // Round up to align_object_offset boundary - __ round_to(rdx, BytesPerLong); - } - - Label entry, search, interface_ok; - - __ jmpb(entry); - __ bind(search); - __ addptr(rdx, itableOffsetEntry::size() * wordSize); - - __ bind(entry); - - // Check that the entry is non-null. A null entry means that the - // receiver class doesn't implement the interface, and wasn't the - // same as the receiver class checked when the interface was - // resolved. - __ push(rdx); - __ movptr(rdx, Address(rdx, itableOffsetEntry::interface_offset_in_bytes())); - __ testptr(rdx, rdx); - __ jcc(Assembler::notZero, interface_ok); + Label no_such_interface, no_such_method; + + __ lookup_interface_method(// inputs: rec. class, interface, itable index + rdx, rax, rbx, + // outputs: method, scan temp. reg + rbx, r13, + no_such_interface); + + // rbx,: methodOop to call + // rcx: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ testptr(rbx, rbx); + __ jcc(Assembler::zero, no_such_method); + + // do the call + // rcx: receiver + // rbx,: methodOop + __ jump_from_interpreted(rbx, rdx); + __ should_not_reach_here(); + + // exception handling code follows... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); // throw exception - __ pop(rdx); // pop saved register first. - __ pop(rbx); // pop return address (pushed by prepare_invoke) - __ restore_bcp(); // r13 must be correct for exception handler (was - // destroyed) - __ restore_locals(); // make sure locals pointer is correct as well - // (was destroyed) + __ pop(rbx); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); // r13 must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + // throw exception + __ pop(rbx); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); // r13 must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError)); // the call_VM checks for exception, so we should never return here. __ should_not_reach_here(); - __ bind(interface_ok); - - __ pop(rdx); - - __ cmpptr(rax, Address(rdx, itableOffsetEntry::interface_offset_in_bytes())); - __ jcc(Assembler::notEqual, search); - - __ movl(rdx, Address(rdx, itableOffsetEntry::offset_offset_in_bytes())); - - __ addptr(rdx, r14); // Add offset to klassOop - assert(itableMethodEntry::size() * wordSize == 8, - "adjust the scaling in the code below"); - __ movptr(rbx, Address(rdx, rbx, Address::times_8)); - // rbx: methodOop to call - // rcx: receiver - // Check for abstract method error - // Note: This should be done more efficiently via a - // throw_abstract_method_error interpreter entry point and a - // conditional jump to it in case of a null method. - { - Label L; - __ testptr(rbx, rbx); - __ jcc(Assembler::notZero, L); - // throw exception - // note: must restore interpreter registers to canonical - // state for exception handling to work correctly! - __ pop(rbx); // pop return address (pushed by prepare_invoke) - __ restore_bcp(); // r13 must be correct for exception handler - // (was destroyed) - __ restore_locals(); // make sure locals pointer is correct as - // well (was destroyed) - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::throw_AbstractMethodError)); - // the call_VM checks for exception, so we should never return here. - __ should_not_reach_here(); - __ bind(L); - } - - __ movptr(rcx, Address(rbx, methodOopDesc::interpreter_entry_offset())); - - // do the call - // rcx: receiver - // rbx: methodOop - __ jump_from_interpreted(rbx, rdx); + return; } + //----------------------------------------------------------------------------- // Allocation diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/vm_version_x86.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/vm_version_x86.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,514 @@ +/* + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_vm_version_x86.cpp.incl" + + +int VM_Version::_cpu; +int VM_Version::_model; +int VM_Version::_stepping; +int VM_Version::_cpuFeatures; +const char* VM_Version::_features_str = ""; +VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; + +static BufferBlob* stub_blob; +static const int stub_size = 300; + +extern "C" { + typedef void (*getPsrInfo_stub_t)(void*); +} +static getPsrInfo_stub_t getPsrInfo_stub = NULL; + + +class VM_Version_StubGenerator: public StubCodeGenerator { + public: + + VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} + + address generate_getPsrInfo() { + // Flags to test CPU type. + const uint32_t EFL_AC = 0x40000; + const uint32_t EFL_ID = 0x200000; + // Values for when we don't have a CPUID instruction. + const int CPU_FAMILY_SHIFT = 8; + const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); + const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); + + Label detect_486, cpu486, detect_586, std_cpuid1; + Label ext_cpuid1, ext_cpuid5, done; + + StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); +# define __ _masm-> + + address start = __ pc(); + + // + // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); + // + // LP64: rcx and rdx are first and second argument registers on windows + + __ push(rbp); +#ifdef _LP64 + __ mov(rbp, c_rarg0); // cpuid_info address +#else + __ movptr(rbp, Address(rsp, 8)); // cpuid_info address +#endif + __ push(rbx); + __ push(rsi); + __ pushf(); // preserve rbx, and flags + __ pop(rax); + __ push(rax); + __ mov(rcx, rax); + // + // if we are unable to change the AC flag, we have a 386 + // + __ xorl(rax, EFL_AC); + __ push(rax); + __ popf(); + __ pushf(); + __ pop(rax); + __ cmpptr(rax, rcx); + __ jccb(Assembler::notEqual, detect_486); + + __ movl(rax, CPU_FAMILY_386); + __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); + __ jmp(done); + + // + // If we are unable to change the ID flag, we have a 486 which does + // not support the "cpuid" instruction. + // + __ bind(detect_486); + __ mov(rax, rcx); + __ xorl(rax, EFL_ID); + __ push(rax); + __ popf(); + __ pushf(); + __ pop(rax); + __ cmpptr(rcx, rax); + __ jccb(Assembler::notEqual, detect_586); + + __ bind(cpu486); + __ movl(rax, CPU_FAMILY_486); + __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); + __ jmp(done); + + // + // At this point, we have a chip which supports the "cpuid" instruction + // + __ bind(detect_586); + __ xorl(rax, rax); + __ cpuid(); + __ orl(rax, rax); + __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input + // value of at least 1, we give up and + // assume a 486 + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + __ cmpl(rax, 3); // Is cpuid(0x4) supported? + __ jccb(Assembler::belowEqual, std_cpuid1); + + // + // cpuid(0x4) Deterministic cache params + // + __ movl(rax, 4); + __ xorl(rcx, rcx); // L1 cache + __ cpuid(); + __ push(rax); + __ andl(rax, 0x1f); // Determine if valid cache parameters used + __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache + __ pop(rax); + __ jccb(Assembler::equal, std_cpuid1); + + __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + // + // Standard cpuid(0x1) + // + __ bind(std_cpuid1); + __ movl(rax, 1); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + __ movl(rax, 0x80000000); + __ cpuid(); + __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? + __ jcc(Assembler::belowEqual, done); + __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? + __ jccb(Assembler::belowEqual, ext_cpuid1); + __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? + __ jccb(Assembler::belowEqual, ext_cpuid5); + // + // Extended cpuid(0x80000008) + // + __ movl(rax, 0x80000008); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + // + // Extended cpuid(0x80000005) + // + __ bind(ext_cpuid5); + __ movl(rax, 0x80000005); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + // + // Extended cpuid(0x80000001) + // + __ bind(ext_cpuid1); + __ movl(rax, 0x80000001); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + + // + // return + // + __ bind(done); + __ popf(); + __ pop(rsi); + __ pop(rbx); + __ pop(rbp); + __ ret(0); + +# undef __ + + return start; + }; +}; + + +void VM_Version::get_processor_features() { + + _cpu = 4; // 486 by default + _model = 0; + _stepping = 0; + _cpuFeatures = 0; + _logical_processors_per_package = 1; + + if (!Use486InstrsOnly) { + // Get raw processor info + getPsrInfo_stub(&_cpuid_info); + assert_is_initialized(); + _cpu = extended_cpu_family(); + _model = extended_cpu_model(); + _stepping = cpu_stepping(); + + if (cpu_family() > 4) { // it supports CPUID + _cpuFeatures = feature_flags(); + // Logical processors are only available on P4s and above, + // and only if hyperthreading is available. + _logical_processors_per_package = logical_processor_count(); + } + } + + _supports_cx8 = supports_cmpxchg8(); + +#ifdef _LP64 + // OS should support SSE for x64 and hardware should support at least SSE2. + if (!VM_Version::supports_sse2()) { + vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); + } +#endif + + // If the OS doesn't support SSE, we can't use this feature even if the HW does + if (!os::supports_sse()) + _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); + + if (UseSSE < 4) { + _cpuFeatures &= ~CPU_SSE4_1; + _cpuFeatures &= ~CPU_SSE4_2; + } + + if (UseSSE < 3) { + _cpuFeatures &= ~CPU_SSE3; + _cpuFeatures &= ~CPU_SSSE3; + _cpuFeatures &= ~CPU_SSE4A; + } + + if (UseSSE < 2) + _cpuFeatures &= ~CPU_SSE2; + + if (UseSSE < 1) + _cpuFeatures &= ~CPU_SSE; + + if (logical_processors_per_package() == 1) { + // HT processor could be installed on a system which doesn't support HT. + _cpuFeatures &= ~CPU_HT; + } + + char buf[256]; + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + cores_per_cpu(), threads_per_core(), + cpu_family(), _model, _stepping, + (supports_cmov() ? ", cmov" : ""), + (supports_cmpxchg8() ? ", cx8" : ""), + (supports_fxsr() ? ", fxsr" : ""), + (supports_mmx() ? ", mmx" : ""), + (supports_sse() ? ", sse" : ""), + (supports_sse2() ? ", sse2" : ""), + (supports_sse3() ? ", sse3" : ""), + (supports_ssse3()? ", ssse3": ""), + (supports_sse4_1() ? ", sse4.1" : ""), + (supports_sse4_2() ? ", sse4.2" : ""), + (supports_mmx_ext() ? ", mmxext" : ""), + (supports_3dnow() ? ", 3dnow" : ""), + (supports_3dnow2() ? ", 3dnowext" : ""), + (supports_sse4a() ? ", sse4a": ""), + (supports_ht() ? ", ht": "")); + _features_str = strdup(buf); + + // UseSSE is set to the smaller of what hardware supports and what + // the command line requires. I.e., you cannot set UseSSE to 2 on + // older Pentiums which do not support it. + if( UseSSE > 4 ) UseSSE=4; + if( UseSSE < 0 ) UseSSE=0; + if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support + UseSSE = MIN2((intx)3,UseSSE); + if( !supports_sse3() ) // Drop to 2 if no SSE3 support + UseSSE = MIN2((intx)2,UseSSE); + if( !supports_sse2() ) // Drop to 1 if no SSE2 support + UseSSE = MIN2((intx)1,UseSSE); + if( !supports_sse () ) // Drop to 0 if no SSE support + UseSSE = 0; + + // On new cpus instructions which update whole XMM register should be used + // to prevent partial register stall due to dependencies on high half. + // + // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) + // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) + // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). + // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). + + if( is_amd() ) { // AMD cpus specific settings + if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { + // Use it on new AMD cpus starting from Opteron. + UseAddressNop = true; + } + if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { + // Use it on new AMD cpus starting from Opteron. + UseNewLongLShift = true; + } + if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { + if( supports_sse4a() ) { + UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron + } else { + UseXmmLoadAndClearUpper = false; + } + } + if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { + if( supports_sse4a() ) { + UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' + } else { + UseXmmRegToRegMoveAll = false; + } + } + if( FLAG_IS_DEFAULT(UseXmmI2F) ) { + if( supports_sse4a() ) { + UseXmmI2F = true; + } else { + UseXmmI2F = false; + } + } + if( FLAG_IS_DEFAULT(UseXmmI2D) ) { + if( supports_sse4a() ) { + UseXmmI2D = true; + } else { + UseXmmI2D = false; + } + } + } + + if( is_intel() ) { // Intel cpus specific settings + if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { + UseStoreImmI16 = false; // don't use it on Intel cpus + } + if( cpu_family() == 6 || cpu_family() == 15 ) { + if( FLAG_IS_DEFAULT(UseAddressNop) ) { + // Use it on all Intel cpus starting from PentiumPro + UseAddressNop = true; + } + } + if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { + UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus + } + if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { + if( supports_sse3() ) { + UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus + } else { + UseXmmRegToRegMoveAll = false; + } + } + if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus +#ifdef COMPILER2 + if( FLAG_IS_DEFAULT(MaxLoopPad) ) { + // For new Intel cpus do the next optimization: + // don't align the beginning of a loop if there are enough instructions + // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) + // in current fetch line (OptoLoopAlignment) or the padding + // is big (> MaxLoopPad). + // Set MaxLoopPad to 11 for new Intel cpus to reduce number of + // generated NOP instructions. 11 is the largest size of one + // address NOP instruction '0F 1F' (see Assembler::nop(i)). + MaxLoopPad = 11; + } +#endif // COMPILER2 + if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { + UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus + } + if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus + if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { + UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus + } + } + } + } + + assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); + assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); + + // set valid Prefetch instruction + if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; + if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; + if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; + if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3; + + if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; + if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; + if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; + if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3; + + // Allocation prefetch settings + intx cache_line_size = L1_data_cache_line_size(); + if( cache_line_size > AllocatePrefetchStepSize ) + AllocatePrefetchStepSize = cache_line_size; + if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) + AllocatePrefetchLines = 3; // Optimistic value + assert(AllocatePrefetchLines > 0, "invalid value"); + if( AllocatePrefetchLines < 1 ) // set valid value in product VM + AllocatePrefetchLines = 1; // Conservative value + + AllocatePrefetchDistance = allocate_prefetch_distance(); + AllocatePrefetchStyle = allocate_prefetch_style(); + + if( AllocatePrefetchStyle == 2 && is_intel() && + cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core +#ifdef _LP64 + AllocatePrefetchDistance = 384; +#else + AllocatePrefetchDistance = 320; +#endif + } + assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); + +#ifdef _LP64 + // Prefetch settings + PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); + PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); + PrefetchFieldsAhead = prefetch_fields_ahead(); +#endif + +#ifndef PRODUCT + if (PrintMiscellaneous && Verbose) { + tty->print_cr("Logical CPUs per core: %u", + logical_processors_per_package()); + tty->print_cr("UseSSE=%d",UseSSE); + tty->print("Allocation: "); + if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { + tty->print_cr("no prefetching"); + } else { + if (UseSSE == 0 && supports_3dnow()) { + tty->print("PREFETCHW"); + } else if (UseSSE >= 1) { + if (AllocatePrefetchInstr == 0) { + tty->print("PREFETCHNTA"); + } else if (AllocatePrefetchInstr == 1) { + tty->print("PREFETCHT0"); + } else if (AllocatePrefetchInstr == 2) { + tty->print("PREFETCHT2"); + } else if (AllocatePrefetchInstr == 3) { + tty->print("PREFETCHW"); + } + } + if (AllocatePrefetchLines > 1) { + tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); + } else { + tty->print_cr(" %d, one line", AllocatePrefetchDistance); + } + } + + if (PrefetchCopyIntervalInBytes > 0) { + tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); + } + if (PrefetchScanIntervalInBytes > 0) { + tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); + } + if (PrefetchFieldsAhead > 0) { + tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); + } + } +#endif // !PRODUCT +} + +void VM_Version::initialize() { + ResourceMark rm; + // Making this stub must be FIRST use of assembler + + stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); + if (stub_blob == NULL) { + vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); + } + CodeBuffer c(stub_blob->instructions_begin(), + stub_blob->instructions_size()); + VM_Version_StubGenerator g(&c); + getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, + g.generate_getPsrInfo()); + + get_processor_features(); +} diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/vm_version_x86.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/x86/vm/vm_version_x86.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,459 @@ +/* + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class VM_Version : public Abstract_VM_Version { +public: + // cpuid result register layouts. These are all unions of a uint32_t + // (in case anyone wants access to the register as a whole) and a bitfield. + + union StdCpuid1Eax { + uint32_t value; + struct { + uint32_t stepping : 4, + model : 4, + family : 4, + proc_type : 2, + : 2, + ext_model : 4, + ext_family : 8, + : 4; + } bits; + }; + + union StdCpuid1Ebx { // example, unused + uint32_t value; + struct { + uint32_t brand_id : 8, + clflush_size : 8, + threads_per_cpu : 8, + apic_id : 8; + } bits; + }; + + union StdCpuid1Ecx { + uint32_t value; + struct { + uint32_t sse3 : 1, + : 2, + monitor : 1, + : 1, + vmx : 1, + : 1, + est : 1, + : 1, + ssse3 : 1, + cid : 1, + : 2, + cmpxchg16: 1, + : 4, + dca : 1, + sse4_1 : 1, + sse4_2 : 1, + : 11; + } bits; + }; + + union StdCpuid1Edx { + uint32_t value; + struct { + uint32_t : 4, + tsc : 1, + : 3, + cmpxchg8 : 1, + : 6, + cmov : 1, + : 7, + mmx : 1, + fxsr : 1, + sse : 1, + sse2 : 1, + : 1, + ht : 1, + : 3; + } bits; + }; + + union DcpCpuid4Eax { + uint32_t value; + struct { + uint32_t cache_type : 5, + : 21, + cores_per_cpu : 6; + } bits; + }; + + union DcpCpuid4Ebx { + uint32_t value; + struct { + uint32_t L1_line_size : 12, + partitions : 10, + associativity : 10; + } bits; + }; + + union ExtCpuid1Ecx { + uint32_t value; + struct { + uint32_t LahfSahf : 1, + CmpLegacy : 1, + : 4, + abm : 1, + sse4a : 1, + misalignsse : 1, + prefetchw : 1, + : 22; + } bits; + }; + + union ExtCpuid1Edx { + uint32_t value; + struct { + uint32_t : 22, + mmx_amd : 1, + mmx : 1, + fxsr : 1, + : 4, + long_mode : 1, + tdnow2 : 1, + tdnow : 1; + } bits; + }; + + union ExtCpuid5Ex { + uint32_t value; + struct { + uint32_t L1_line_size : 8, + L1_tag_lines : 8, + L1_assoc : 8, + L1_size : 8; + } bits; + }; + + union ExtCpuid8Ecx { + uint32_t value; + struct { + uint32_t cores_per_cpu : 8, + : 24; + } bits; + }; + +protected: + static int _cpu; + static int _model; + static int _stepping; + static int _cpuFeatures; // features returned by the "cpuid" instruction + // 0 if this instruction is not available + static const char* _features_str; + + enum { + CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) + CPU_CMOV = (1 << 1), + CPU_FXSR = (1 << 2), + CPU_HT = (1 << 3), + CPU_MMX = (1 << 4), + CPU_3DNOW = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) + CPU_SSE = (1 << 6), + CPU_SSE2 = (1 << 7), + CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) + CPU_SSSE3 = (1 << 9), + CPU_SSE4A = (1 << 10), + CPU_SSE4_1 = (1 << 11), + CPU_SSE4_2 = (1 << 12) + } cpuFeatureFlags; + + // cpuid information block. All info derived from executing cpuid with + // various function numbers is stored here. Intel and AMD info is + // merged in this block: accessor methods disentangle it. + // + // The info block is laid out in subblocks of 4 dwords corresponding to + // eax, ebx, ecx and edx, whether or not they contain anything useful. + struct CpuidInfo { + // cpuid function 0 + uint32_t std_max_function; + uint32_t std_vendor_name_0; + uint32_t std_vendor_name_1; + uint32_t std_vendor_name_2; + + // cpuid function 1 + StdCpuid1Eax std_cpuid1_eax; + StdCpuid1Ebx std_cpuid1_ebx; + StdCpuid1Ecx std_cpuid1_ecx; + StdCpuid1Edx std_cpuid1_edx; + + // cpuid function 4 (deterministic cache parameters) + DcpCpuid4Eax dcp_cpuid4_eax; + DcpCpuid4Ebx dcp_cpuid4_ebx; + uint32_t dcp_cpuid4_ecx; // unused currently + uint32_t dcp_cpuid4_edx; // unused currently + + // cpuid function 0x80000000 // example, unused + uint32_t ext_max_function; + uint32_t ext_vendor_name_0; + uint32_t ext_vendor_name_1; + uint32_t ext_vendor_name_2; + + // cpuid function 0x80000001 + uint32_t ext_cpuid1_eax; // reserved + uint32_t ext_cpuid1_ebx; // reserved + ExtCpuid1Ecx ext_cpuid1_ecx; + ExtCpuid1Edx ext_cpuid1_edx; + + // cpuid functions 0x80000002 thru 0x80000004: example, unused + uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; + uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; + uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; + + // cpuid function 0x80000005 //AMD L1, Intel reserved + uint32_t ext_cpuid5_eax; // unused currently + uint32_t ext_cpuid5_ebx; // reserved + ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) + ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) + + // cpuid function 0x80000008 + uint32_t ext_cpuid8_eax; // unused currently + uint32_t ext_cpuid8_ebx; // reserved + ExtCpuid8Ecx ext_cpuid8_ecx; + uint32_t ext_cpuid8_edx; // reserved + }; + + // The actual cpuid info block + static CpuidInfo _cpuid_info; + + // Extractors and predicates + static uint32_t extended_cpu_family() { + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; + result += _cpuid_info.std_cpuid1_eax.bits.ext_family; + return result; + } + static uint32_t extended_cpu_model() { + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; + result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; + return result; + } + static uint32_t cpu_stepping() { + uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; + return result; + } + static uint logical_processor_count() { + uint result = threads_per_core(); + return result; + } + static uint32_t feature_flags() { + uint32_t result = 0; + if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) + result |= CPU_CX8; + if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) + result |= CPU_CMOV; + if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() && + _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0) + result |= CPU_FXSR; + // HT flag is set for multi-core processors also. + if (threads_per_core() > 1) + result |= CPU_HT; + if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && + _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) + result |= CPU_MMX; + if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) + result |= CPU_3DNOW; + if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) + result |= CPU_SSE; + if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) + result |= CPU_SSE2; + if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) + result |= CPU_SSE3; + if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) + result |= CPU_SSSE3; + if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) + result |= CPU_SSE4A; + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) + result |= CPU_SSE4_1; + if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) + result |= CPU_SSE4_2; + return result; + } + + static void get_processor_features(); + +public: + // Offsets for cpuid asm stub + static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } + static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } + static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } + static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } + static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } + static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } + + // Initialization + static void initialize(); + + // Asserts + static void assert_is_initialized() { + assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); + } + + // + // Processor family: + // 3 - 386 + // 4 - 486 + // 5 - Pentium + // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, + // Pentium M, Core Solo, Core Duo, Core2 Duo + // family 6 model: 9, 13, 14, 15 + // 0x0f - Pentium 4, Opteron + // + // Note: The cpu family should be used to select between + // instruction sequences which are valid on all Intel + // processors. Use the feature test functions below to + // determine whether a particular instruction is supported. + // + static int cpu_family() { return _cpu;} + static bool is_P6() { return cpu_family() >= 6; } + + static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' + static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' + + static uint cores_per_cpu() { + uint result = 1; + if (is_intel()) { + result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); + } else if (is_amd()) { + result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); + } + return result; + } + + static uint threads_per_core() { + uint result = 1; + if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / + cores_per_cpu(); + } + return result; + } + + static intx L1_data_cache_line_size() { + intx result = 0; + if (is_intel()) { + result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); + } else if (is_amd()) { + result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; + } + if (result < 32) // not defined ? + result = 32; // 32 bytes by default on x86 and other x64 + return result; + } + + // + // Feature identification + // + static bool supports_cpuid() { return _cpuFeatures != 0; } + static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } + static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } + static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } + static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } + static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } + static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } + static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } + static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } + static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } + static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } + static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } + // + // AMD features + // + static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } + static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } + static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } + static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } + + static bool supports_compare_and_exchange() { return true; } + + static const char* cpu_features() { return _features_str; } + + static intx allocate_prefetch_distance() { + // This method should be called before allocate_prefetch_style(). + // + // Hardware prefetching (distance/size in bytes): + // Pentium 3 - 64 / 32 + // Pentium 4 - 256 / 128 + // Athlon - 64 / 32 ???? + // Opteron - 128 / 64 only when 2 sequential cache lines accessed + // Core - 128 / 64 + // + // Software prefetching (distance in bytes / instruction with best score): + // Pentium 3 - 128 / prefetchnta + // Pentium 4 - 512 / prefetchnta + // Athlon - 128 / prefetchnta + // Opteron - 256 / prefetchnta + // Core - 256 / prefetchnta + // It will be used only when AllocatePrefetchStyle > 0 + + intx count = AllocatePrefetchDistance; + if (count < 0) { // default ? + if (is_amd()) { // AMD + if (supports_sse2()) + count = 256; // Opteron + else + count = 128; // Athlon + } else { // Intel + if (supports_sse2()) + if (cpu_family() == 6) { + count = 256; // Pentium M, Core, Core2 + } else { + count = 512; // Pentium 4 + } + else + count = 128; // Pentium 3 (and all other old CPUs) + } + } + return count; + } + static intx allocate_prefetch_style() { + assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); + // Return 0 if AllocatePrefetchDistance was not defined. + return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; + } + + // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from + // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. + // Tested intervals from 128 to 2048 in increments of 64 == one cache line. + // 256 bytes (4 dcache lines) was the nearest runner-up to 576. + + // gc copy/scan is disabled if prefetchw isn't supported, because + // Prefetch::write emits an inlined prefetchw on Linux. + // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. + // The used prefetcht0 instruction works for both amd64 and em64t. + static intx prefetch_copy_interval_in_bytes() { + intx interval = PrefetchCopyIntervalInBytes; + return interval >= 0 ? interval : 576; + } + static intx prefetch_scan_interval_in_bytes() { + intx interval = PrefetchScanIntervalInBytes; + return interval >= 0 ? interval : 576; + } + static intx prefetch_fields_ahead() { + intx count = PrefetchFieldsAhead; + return count >= 0 ? count : 1; + } +}; diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/vm_version_x86_32.cpp --- a/src/cpu/x86/vm/vm_version_x86_32.cpp Mon Mar 09 13:34:00 2009 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,472 +0,0 @@ -/* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -# include "incls/_precompiled.incl" -# include "incls/_vm_version_x86_32.cpp.incl" - - -int VM_Version::_cpu; -int VM_Version::_model; -int VM_Version::_stepping; -int VM_Version::_cpuFeatures; -const char* VM_Version::_features_str = ""; -VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; - -static BufferBlob* stub_blob; -static const int stub_size = 300; - -extern "C" { - typedef void (*getPsrInfo_stub_t)(void*); -} -static getPsrInfo_stub_t getPsrInfo_stub = NULL; - - -class VM_Version_StubGenerator: public StubCodeGenerator { - public: - - VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} - - address generate_getPsrInfo() { - // Flags to test CPU type. - const uint32_t EFL_AC = 0x40000; - const uint32_t EFL_ID = 0x200000; - // Values for when we don't have a CPUID instruction. - const int CPU_FAMILY_SHIFT = 8; - const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); - const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); - - Label detect_486, cpu486, detect_586, std_cpuid1; - Label ext_cpuid1, ext_cpuid5, done; - - StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); -# define __ _masm-> - - address start = __ pc(); - - // - // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); - // - __ push(rbp); - __ movptr(rbp, Address(rsp, 8)); // cpuid_info address - __ push(rbx); - __ push(rsi); - __ pushf(); // preserve rbx, and flags - __ pop(rax); - __ push(rax); - __ mov(rcx, rax); - // - // if we are unable to change the AC flag, we have a 386 - // - __ xorl(rax, EFL_AC); - __ push(rax); - __ popf(); - __ pushf(); - __ pop(rax); - __ cmpptr(rax, rcx); - __ jccb(Assembler::notEqual, detect_486); - - __ movl(rax, CPU_FAMILY_386); - __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); - __ jmp(done); - - // - // If we are unable to change the ID flag, we have a 486 which does - // not support the "cpuid" instruction. - // - __ bind(detect_486); - __ mov(rax, rcx); - __ xorl(rax, EFL_ID); - __ push(rax); - __ popf(); - __ pushf(); - __ pop(rax); - __ cmpptr(rcx, rax); - __ jccb(Assembler::notEqual, detect_586); - - __ bind(cpu486); - __ movl(rax, CPU_FAMILY_486); - __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); - __ jmp(done); - - // - // at this point, we have a chip which supports the "cpuid" instruction - // - __ bind(detect_586); - __ xorptr(rax, rax); - __ cpuid(); - __ orptr(rax, rax); - __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input - // value of at least 1, we give up and - // assume a 486 - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - __ cmpl(rax, 3); // Is cpuid(0x4) supported? - __ jccb(Assembler::belowEqual, std_cpuid1); - - // - // cpuid(0x4) Deterministic cache params - // - __ movl(rax, 4); // and rcx already set to 0x0 - __ xorl(rcx, rcx); - __ cpuid(); - __ push(rax); - __ andl(rax, 0x1f); // Determine if valid cache parameters used - __ orl(rax, rax); // rax,[4:0] == 0 indicates invalid cache - __ pop(rax); - __ jccb(Assembler::equal, std_cpuid1); - - __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Standard cpuid(0x1) - // - __ bind(std_cpuid1); - __ movl(rax, 1); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - __ movl(rax, 0x80000000); - __ cpuid(); - __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? - __ jcc(Assembler::belowEqual, done); - __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? - __ jccb(Assembler::belowEqual, ext_cpuid1); - __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? - __ jccb(Assembler::belowEqual, ext_cpuid5); - // - // Extended cpuid(0x80000008) - // - __ movl(rax, 0x80000008); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Extended cpuid(0x80000005) - // - __ bind(ext_cpuid5); - __ movl(rax, 0x80000005); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Extended cpuid(0x80000001) - // - __ bind(ext_cpuid1); - __ movl(rax, 0x80000001); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // return - // - __ bind(done); - __ popf(); - __ pop(rsi); - __ pop(rbx); - __ pop(rbp); - __ ret(0); - -# undef __ - - return start; - }; -}; - - -void VM_Version::get_processor_features() { - - _cpu = 4; // 486 by default - _model = 0; - _stepping = 0; - _cpuFeatures = 0; - _logical_processors_per_package = 1; - if (!Use486InstrsOnly) { - // Get raw processor info - getPsrInfo_stub(&_cpuid_info); - assert_is_initialized(); - _cpu = extended_cpu_family(); - _model = extended_cpu_model(); - _stepping = cpu_stepping(); - if (cpu_family() > 4) { // it supports CPUID - _cpuFeatures = feature_flags(); - // Logical processors are only available on P4s and above, - // and only if hyperthreading is available. - _logical_processors_per_package = logical_processor_count(); - } - } - _supports_cx8 = supports_cmpxchg8(); - // if the OS doesn't support SSE, we can't use this feature even if the HW does - if( !os::supports_sse()) - _cpuFeatures &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); - if (UseSSE < 4) { - _cpuFeatures &= ~CPU_SSE4_1; - _cpuFeatures &= ~CPU_SSE4_2; - } - if (UseSSE < 3) { - _cpuFeatures &= ~CPU_SSE3; - _cpuFeatures &= ~CPU_SSSE3; - _cpuFeatures &= ~CPU_SSE4A; - } - if (UseSSE < 2) - _cpuFeatures &= ~CPU_SSE2; - if (UseSSE < 1) - _cpuFeatures &= ~CPU_SSE; - - if (logical_processors_per_package() == 1) { - // HT processor could be installed on a system which doesn't support HT. - _cpuFeatures &= ~CPU_HT; - } - - char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - cores_per_cpu(), threads_per_core(), - cpu_family(), _model, _stepping, - (supports_cmov() ? ", cmov" : ""), - (supports_cmpxchg8() ? ", cx8" : ""), - (supports_fxsr() ? ", fxsr" : ""), - (supports_mmx() ? ", mmx" : ""), - (supports_sse() ? ", sse" : ""), - (supports_sse2() ? ", sse2" : ""), - (supports_sse3() ? ", sse3" : ""), - (supports_ssse3()? ", ssse3": ""), - (supports_sse4_1() ? ", sse4.1" : ""), - (supports_sse4_2() ? ", sse4.2" : ""), - (supports_mmx_ext() ? ", mmxext" : ""), - (supports_3dnow() ? ", 3dnow" : ""), - (supports_3dnow2() ? ", 3dnowext" : ""), - (supports_sse4a() ? ", sse4a": ""), - (supports_ht() ? ", ht": "")); - _features_str = strdup(buf); - - // UseSSE is set to the smaller of what hardware supports and what - // the command line requires. I.e., you cannot set UseSSE to 2 on - // older Pentiums which do not support it. - if( UseSSE > 4 ) UseSSE=4; - if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support - UseSSE = MIN2((intx)3,UseSSE); - if( !supports_sse3() ) // Drop to 2 if no SSE3 support - UseSSE = MIN2((intx)2,UseSSE); - if( !supports_sse2() ) // Drop to 1 if no SSE2 support - UseSSE = MIN2((intx)1,UseSSE); - if( !supports_sse () ) // Drop to 0 if no SSE support - UseSSE = 0; - - // On new cpus instructions which update whole XMM register should be used - // to prevent partial register stall due to dependencies on high half. - // - // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) - // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) - // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). - // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). - - if( is_amd() ) { // AMD cpus specific settings - if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) { - // Use it on new AMD cpus starting from Opteron. - UseAddressNop = true; - } - if( supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift) ) { - // Use it on new AMD cpus starting from Opteron. - UseNewLongLShift = true; - } - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - if( supports_sse4a() ) { - UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron - } else { - UseXmmLoadAndClearUpper = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { - if( supports_sse4a() ) { - UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' - } else { - UseXmmRegToRegMoveAll = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmI2F) ) { - if( supports_sse4a() ) { - UseXmmI2F = true; - } else { - UseXmmI2F = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmI2D) ) { - if( supports_sse4a() ) { - UseXmmI2D = true; - } else { - UseXmmI2D = false; - } - } - } - - if( is_intel() ) { // Intel cpus specific settings - if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { - UseStoreImmI16 = false; // don't use it on Intel cpus - } - if( cpu_family() == 6 || cpu_family() == 15 ) { - if( FLAG_IS_DEFAULT(UseAddressNop) ) { - // Use it on all Intel cpus starting from PentiumPro - UseAddressNop = true; - } - } - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus - } - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { - if( supports_sse3() ) { - UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus - } else { - UseXmmRegToRegMoveAll = false; - } - } - if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus -#ifdef COMPILER2 - if( FLAG_IS_DEFAULT(MaxLoopPad) ) { - // For new Intel cpus do the next optimization: - // don't align the beginning of a loop if there are enough instructions - // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) - // in current fetch line (OptoLoopAlignment) or the padding - // is big (> MaxLoopPad). - // Set MaxLoopPad to 11 for new Intel cpus to reduce number of - // generated NOP instructions. 11 is the largest size of one - // address NOP instruction '0F 1F' (see Assembler::nop(i)). - MaxLoopPad = 11; - } -#endif // COMPILER2 - if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { - UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus - } - if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus - if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { - UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus - } - } - } - } - - assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); - assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); - - // set valid Prefetch instruction - if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; - if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; - if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; - if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3; - - if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; - if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; - if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; - if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3; - - // Allocation prefetch settings - intx cache_line_size = L1_data_cache_line_size(); - if( cache_line_size > AllocatePrefetchStepSize ) - AllocatePrefetchStepSize = cache_line_size; - if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) - AllocatePrefetchLines = 3; // Optimistic value - assert(AllocatePrefetchLines > 0, "invalid value"); - if( AllocatePrefetchLines < 1 ) // set valid value in product VM - AllocatePrefetchLines = 1; // Conservative value - - AllocatePrefetchDistance = allocate_prefetch_distance(); - AllocatePrefetchStyle = allocate_prefetch_style(); - - if( AllocatePrefetchStyle == 2 && is_intel() && - cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core - AllocatePrefetchDistance = 320; - } - assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); - -#ifndef PRODUCT - if (PrintMiscellaneous && Verbose) { - tty->print_cr("Logical CPUs per core: %u", - logical_processors_per_package()); - tty->print_cr("UseSSE=%d",UseSSE); - tty->print("Allocation: "); - if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) { - tty->print_cr("no prefetching"); - } else { - if (UseSSE == 0 && supports_3dnow()) { - tty->print("PREFETCHW"); - } else if (UseSSE >= 1) { - if (AllocatePrefetchInstr == 0) { - tty->print("PREFETCHNTA"); - } else if (AllocatePrefetchInstr == 1) { - tty->print("PREFETCHT0"); - } else if (AllocatePrefetchInstr == 2) { - tty->print("PREFETCHT2"); - } else if (AllocatePrefetchInstr == 3) { - tty->print("PREFETCHW"); - } - } - if (AllocatePrefetchLines > 1) { - tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); - } else { - tty->print_cr(" %d, one line", AllocatePrefetchDistance); - } - } - } -#endif // !PRODUCT -} - -void VM_Version::initialize() { - ResourceMark rm; - // Making this stub must be FIRST use of assembler - - stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); - if (stub_blob == NULL) { - vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); - } - CodeBuffer c(stub_blob->instructions_begin(), - stub_blob->instructions_size()); - VM_Version_StubGenerator g(&c); - getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, - g.generate_getPsrInfo()); - - get_processor_features(); -} diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/vm_version_x86_32.hpp --- a/src/cpu/x86/vm/vm_version_x86_32.hpp Mon Mar 09 13:34:00 2009 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,439 +0,0 @@ -/* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -class VM_Version: public Abstract_VM_Version { -public: - // cpuid result register layouts. These are all unions of a uint32_t - // (in case anyone wants access to the register as a whole) and a bitfield. - - union StdCpuid1Eax { - uint32_t value; - struct { - uint32_t stepping : 4, - model : 4, - family : 4, - proc_type : 2, - : 2, - ext_model : 4, - ext_family : 8, - : 4; - } bits; - }; - - union StdCpuid1Ebx { // example, unused - uint32_t value; - struct { - uint32_t brand_id : 8, - clflush_size : 8, - threads_per_cpu : 8, - apic_id : 8; - } bits; - }; - - union StdCpuid1Ecx { - uint32_t value; - struct { - uint32_t sse3 : 1, - : 2, - monitor : 1, - : 1, - vmx : 1, - : 1, - est : 1, - : 1, - ssse3 : 1, - cid : 1, - : 2, - cmpxchg16: 1, - : 4, - dca : 1, - sse4_1 : 1, - sse4_2 : 1, - : 11; - } bits; - }; - - union StdCpuid1Edx { - uint32_t value; - struct { - uint32_t : 4, - tsc : 1, - : 3, - cmpxchg8 : 1, - : 6, - cmov : 1, - : 7, - mmx : 1, - fxsr : 1, - sse : 1, - sse2 : 1, - : 1, - ht : 1, - : 3; - } bits; - }; - - union DcpCpuid4Eax { - uint32_t value; - struct { - uint32_t cache_type : 5, - : 21, - cores_per_cpu : 6; - } bits; - }; - - union DcpCpuid4Ebx { - uint32_t value; - struct { - uint32_t L1_line_size : 12, - partitions : 10, - associativity : 10; - } bits; - }; - - union ExtCpuid1Ecx { - uint32_t value; - struct { - uint32_t LahfSahf : 1, - CmpLegacy : 1, - : 4, - abm : 1, - sse4a : 1, - misalignsse : 1, - prefetchw : 1, - : 22; - } bits; - }; - - union ExtCpuid1Edx { - uint32_t value; - struct { - uint32_t : 22, - mmx_amd : 1, - mmx : 1, - fxsr : 1, - : 4, - long_mode : 1, - tdnow2 : 1, - tdnow : 1; - } bits; - }; - - union ExtCpuid5Ex { - uint32_t value; - struct { - uint32_t L1_line_size : 8, - L1_tag_lines : 8, - L1_assoc : 8, - L1_size : 8; - } bits; - }; - - union ExtCpuid8Ecx { - uint32_t value; - struct { - uint32_t cores_per_cpu : 8, - : 24; - } bits; - }; - -protected: - static int _cpu; - static int _model; - static int _stepping; - static int _cpuFeatures; // features returned by the "cpuid" instruction - // 0 if this instruction is not available - static const char* _features_str; - - enum { - CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) - CPU_CMOV = (1 << 1), - CPU_FXSR = (1 << 2), - CPU_HT = (1 << 3), - CPU_MMX = (1 << 4), - CPU_3DNOW= (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX) - CPU_SSE = (1 << 6), - CPU_SSE2 = (1 << 7), - CPU_SSE3 = (1 << 8), // sse3 comes from cpuid 1 (ECX) - CPU_SSSE3= (1 << 9), - CPU_SSE4A= (1 <<10), - CPU_SSE4_1 = (1 << 11), - CPU_SSE4_2 = (1 << 12) - } cpuFeatureFlags; - - // cpuid information block. All info derived from executing cpuid with - // various function numbers is stored here. Intel and AMD info is - // merged in this block: accessor methods disentangle it. - // - // The info block is laid out in subblocks of 4 dwords corresponding to - // rax, rbx, rcx and rdx, whether or not they contain anything useful. - struct CpuidInfo { - // cpuid function 0 - uint32_t std_max_function; - uint32_t std_vendor_name_0; - uint32_t std_vendor_name_1; - uint32_t std_vendor_name_2; - - // cpuid function 1 - StdCpuid1Eax std_cpuid1_rax; - StdCpuid1Ebx std_cpuid1_rbx; - StdCpuid1Ecx std_cpuid1_rcx; - StdCpuid1Edx std_cpuid1_rdx; - - // cpuid function 4 (deterministic cache parameters) - DcpCpuid4Eax dcp_cpuid4_rax; - DcpCpuid4Ebx dcp_cpuid4_rbx; - uint32_t dcp_cpuid4_rcx; // unused currently - uint32_t dcp_cpuid4_rdx; // unused currently - - // cpuid function 0x80000000 // example, unused - uint32_t ext_max_function; - uint32_t ext_vendor_name_0; - uint32_t ext_vendor_name_1; - uint32_t ext_vendor_name_2; - - // cpuid function 0x80000001 - uint32_t ext_cpuid1_rax; // reserved - uint32_t ext_cpuid1_rbx; // reserved - ExtCpuid1Ecx ext_cpuid1_rcx; - ExtCpuid1Edx ext_cpuid1_rdx; - - // cpuid functions 0x80000002 thru 0x80000004: example, unused - uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; - uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; - uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; - - // cpuid function 0x80000005 //AMD L1, Intel reserved - uint32_t ext_cpuid5_rax; // unused currently - uint32_t ext_cpuid5_rbx; // reserved - ExtCpuid5Ex ext_cpuid5_rcx; // L1 data cache info (AMD) - ExtCpuid5Ex ext_cpuid5_rdx; // L1 instruction cache info (AMD) - - // cpuid function 0x80000008 - uint32_t ext_cpuid8_rax; // unused currently - uint32_t ext_cpuid8_rbx; // reserved - ExtCpuid8Ecx ext_cpuid8_rcx; - uint32_t ext_cpuid8_rdx; // reserved - }; - - // The actual cpuid info block - static CpuidInfo _cpuid_info; - - // Extractors and predicates - static uint32_t extended_cpu_family() { - uint32_t result = _cpuid_info.std_cpuid1_rax.bits.family; - result += _cpuid_info.std_cpuid1_rax.bits.ext_family; - return result; - } - static uint32_t extended_cpu_model() { - uint32_t result = _cpuid_info.std_cpuid1_rax.bits.model; - result |= _cpuid_info.std_cpuid1_rax.bits.ext_model << 4; - return result; - } - static uint32_t cpu_stepping() { - uint32_t result = _cpuid_info.std_cpuid1_rax.bits.stepping; - return result; - } - static uint logical_processor_count() { - uint result = threads_per_core(); - return result; - } - static uint32_t feature_flags() { - uint32_t result = 0; - if (_cpuid_info.std_cpuid1_rdx.bits.cmpxchg8 != 0) - result |= CPU_CX8; - if (_cpuid_info.std_cpuid1_rdx.bits.cmov != 0) - result |= CPU_CMOV; - if (_cpuid_info.std_cpuid1_rdx.bits.fxsr != 0 || is_amd() && - _cpuid_info.ext_cpuid1_rdx.bits.fxsr != 0) - result |= CPU_FXSR; - // HT flag is set for multi-core processors also. - if (threads_per_core() > 1) - result |= CPU_HT; - if (_cpuid_info.std_cpuid1_rdx.bits.mmx != 0 || is_amd() && - _cpuid_info.ext_cpuid1_rdx.bits.mmx != 0) - result |= CPU_MMX; - if (is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow != 0) - result |= CPU_3DNOW; - if (_cpuid_info.std_cpuid1_rdx.bits.sse != 0) - result |= CPU_SSE; - if (_cpuid_info.std_cpuid1_rdx.bits.sse2 != 0) - result |= CPU_SSE2; - if (_cpuid_info.std_cpuid1_rcx.bits.sse3 != 0) - result |= CPU_SSE3; - if (_cpuid_info.std_cpuid1_rcx.bits.ssse3 != 0) - result |= CPU_SSSE3; - if (is_amd() && _cpuid_info.ext_cpuid1_rcx.bits.sse4a != 0) - result |= CPU_SSE4A; - if (_cpuid_info.std_cpuid1_rcx.bits.sse4_1 != 0) - result |= CPU_SSE4_1; - if (_cpuid_info.std_cpuid1_rcx.bits.sse4_2 != 0) - result |= CPU_SSE4_2; - return result; - } - - static void get_processor_features(); - -public: - // Offsets for cpuid asm stub - static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } - static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_rax); } - static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_rax); } - static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_rax); } - static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_rax); } - static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_rax); } - - // Initialization - static void initialize(); - - // Asserts - static void assert_is_initialized() { - assert(_cpuid_info.std_cpuid1_rax.bits.family != 0, "VM_Version not initialized"); - } - - // - // Processor family: - // 3 - 386 - // 4 - 486 - // 5 - Pentium - // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, - // Pentium M, Core Solo, Core Duo, Core2 Duo - // family 6 model: 9, 13, 14, 15 - // 0x0f - Pentium 4, Opteron - // - // Note: The cpu family should be used to select between - // instruction sequences which are valid on all Intel - // processors. Use the feature test functions below to - // determine whether a particular instruction is supported. - // - static int cpu_family() { return _cpu;} - static bool is_P6() { return cpu_family() >= 6; } - - static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' - static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' - - static uint cores_per_cpu() { - uint result = 1; - if (is_intel()) { - result = (_cpuid_info.dcp_cpuid4_rax.bits.cores_per_cpu + 1); - } else if (is_amd()) { - result = (_cpuid_info.ext_cpuid8_rcx.bits.cores_per_cpu + 1); - } - return result; - } - - static uint threads_per_core() { - uint result = 1; - if (_cpuid_info.std_cpuid1_rdx.bits.ht != 0) { - result = _cpuid_info.std_cpuid1_rbx.bits.threads_per_cpu / - cores_per_cpu(); - } - return result; - } - - static intx L1_data_cache_line_size() { - intx result = 0; - if (is_intel()) { - result = (_cpuid_info.dcp_cpuid4_rbx.bits.L1_line_size + 1); - } else if (is_amd()) { - result = _cpuid_info.ext_cpuid5_rcx.bits.L1_line_size; - } - if (result < 32) // not defined ? - result = 32; // 32 bytes by default on x86 - return result; - } - - // - // Feature identification - // - static bool supports_cpuid() { return _cpuFeatures != 0; } - static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } - static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } - static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } - static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } - static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } - static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } - static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } - static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } - static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } - static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } - static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } - // - // AMD features - // - static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } - static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.mmx_amd != 0; } - static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_rdx.bits.tdnow2 != 0; } - static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } - - static bool supports_compare_and_exchange() { return true; } - - static const char* cpu_features() { return _features_str; } - - static intx allocate_prefetch_distance() { - // This method should be called before allocate_prefetch_style(). - // - // Hardware prefetching (distance/size in bytes): - // Pentium 3 - 64 / 32 - // Pentium 4 - 256 / 128 - // Athlon - 64 / 32 ???? - // Opteron - 128 / 64 only when 2 sequential cache lines accessed - // Core - 128 / 64 - // - // Software prefetching (distance in bytes / instruction with best score): - // Pentium 3 - 128 / prefetchnta - // Pentium 4 - 512 / prefetchnta - // Athlon - 128 / prefetchnta - // Opteron - 256 / prefetchnta - // Core - 256 / prefetchnta - // It will be used only when AllocatePrefetchStyle > 0 - - intx count = AllocatePrefetchDistance; - if (count < 0) { // default ? - if (is_amd()) { // AMD - if (supports_sse2()) - count = 256; // Opteron - else - count = 128; // Athlon - } else { // Intel - if (supports_sse2()) - if (cpu_family() == 6) { - count = 256; // Pentium M, Core, Core2 - } else { - count = 512; // Pentium 4 - } - else - count = 128; // Pentium 3 (and all other old CPUs) - } - } - return count; - } - static intx allocate_prefetch_style() { - assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); - // Return 0 if AllocatePrefetchDistance was not defined or - // prefetch instruction is not supported. - return (AllocatePrefetchDistance > 0 && - (supports_3dnow() || supports_sse())) ? AllocatePrefetchStyle : 0; - } -}; diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/vm_version_x86_64.cpp --- a/src/cpu/x86/vm/vm_version_x86_64.cpp Mon Mar 09 13:34:00 2009 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,419 +0,0 @@ -/* - * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -# include "incls/_precompiled.incl" -# include "incls/_vm_version_x86_64.cpp.incl" - -int VM_Version::_cpu; -int VM_Version::_model; -int VM_Version::_stepping; -int VM_Version::_cpuFeatures; -const char* VM_Version::_features_str = ""; -VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; - -static BufferBlob* stub_blob; -static const int stub_size = 300; - -extern "C" { - typedef void (*getPsrInfo_stub_t)(void*); -} -static getPsrInfo_stub_t getPsrInfo_stub = NULL; - - -class VM_Version_StubGenerator: public StubCodeGenerator { - public: - - VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} - - address generate_getPsrInfo() { - - Label std_cpuid1, ext_cpuid1, ext_cpuid5, done; - - StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); -# define __ _masm-> - - address start = __ pc(); - - // - // void getPsrInfo(VM_Version::CpuidInfo* cpuid_info); - // - // rcx and rdx are first and second argument registers on windows - - __ push(rbp); - __ mov(rbp, c_rarg0); // cpuid_info address - __ push(rbx); - __ push(rsi); - - // - // we have a chip which supports the "cpuid" instruction - // - __ xorl(rax, rax); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - __ cmpl(rax, 3); // Is cpuid(0x4) supported? - __ jccb(Assembler::belowEqual, std_cpuid1); - - // - // cpuid(0x4) Deterministic cache params - // - __ movl(rax, 4); - __ xorl(rcx, rcx); // L1 cache - __ cpuid(); - __ push(rax); - __ andl(rax, 0x1f); // Determine if valid cache parameters used - __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache - __ pop(rax); - __ jccb(Assembler::equal, std_cpuid1); - - __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Standard cpuid(0x1) - // - __ bind(std_cpuid1); - __ movl(rax, 1); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - __ movl(rax, 0x80000000); - __ cpuid(); - __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? - __ jcc(Assembler::belowEqual, done); - __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? - __ jccb(Assembler::belowEqual, ext_cpuid1); - __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? - __ jccb(Assembler::belowEqual, ext_cpuid5); - // - // Extended cpuid(0x80000008) - // - __ movl(rax, 0x80000008); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Extended cpuid(0x80000005) - // - __ bind(ext_cpuid5); - __ movl(rax, 0x80000005); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // Extended cpuid(0x80000001) - // - __ bind(ext_cpuid1); - __ movl(rax, 0x80000001); - __ cpuid(); - __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); - __ movl(Address(rsi, 0), rax); - __ movl(Address(rsi, 4), rbx); - __ movl(Address(rsi, 8), rcx); - __ movl(Address(rsi,12), rdx); - - // - // return - // - __ bind(done); - __ pop(rsi); - __ pop(rbx); - __ pop(rbp); - __ ret(0); - -# undef __ - - return start; - }; -}; - - -void VM_Version::get_processor_features() { - - _logical_processors_per_package = 1; - // Get raw processor info - getPsrInfo_stub(&_cpuid_info); - assert_is_initialized(); - _cpu = extended_cpu_family(); - _model = extended_cpu_model(); - _stepping = cpu_stepping(); - _cpuFeatures = feature_flags(); - // Logical processors are only available on P4s and above, - // and only if hyperthreading is available. - _logical_processors_per_package = logical_processor_count(); - _supports_cx8 = supports_cmpxchg8(); - // OS should support SSE for x64 and hardware should support at least SSE2. - if (!VM_Version::supports_sse2()) { - vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); - } - if (UseSSE < 4) { - _cpuFeatures &= ~CPU_SSE4_1; - _cpuFeatures &= ~CPU_SSE4_2; - } - if (UseSSE < 3) { - _cpuFeatures &= ~CPU_SSE3; - _cpuFeatures &= ~CPU_SSSE3; - _cpuFeatures &= ~CPU_SSE4A; - } - if (UseSSE < 2) - _cpuFeatures &= ~CPU_SSE2; - if (UseSSE < 1) - _cpuFeatures &= ~CPU_SSE; - - if (logical_processors_per_package() == 1) { - // HT processor could be installed on a system which doesn't support HT. - _cpuFeatures &= ~CPU_HT; - } - - char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", - cores_per_cpu(), threads_per_core(), - cpu_family(), _model, _stepping, - (supports_cmov() ? ", cmov" : ""), - (supports_cmpxchg8() ? ", cx8" : ""), - (supports_fxsr() ? ", fxsr" : ""), - (supports_mmx() ? ", mmx" : ""), - (supports_sse() ? ", sse" : ""), - (supports_sse2() ? ", sse2" : ""), - (supports_sse3() ? ", sse3" : ""), - (supports_ssse3()? ", ssse3": ""), - (supports_sse4_1() ? ", sse4.1" : ""), - (supports_sse4_2() ? ", sse4.2" : ""), - (supports_mmx_ext() ? ", mmxext" : ""), - (supports_3dnow() ? ", 3dnow" : ""), - (supports_3dnow2() ? ", 3dnowext" : ""), - (supports_sse4a() ? ", sse4a": ""), - (supports_ht() ? ", ht": "")); - _features_str = strdup(buf); - - // UseSSE is set to the smaller of what hardware supports and what - // the command line requires. I.e., you cannot set UseSSE to 2 on - // older Pentiums which do not support it. - if( UseSSE > 4 ) UseSSE=4; - if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support - UseSSE = MIN2((intx)3,UseSSE); - if( !supports_sse3() ) // Drop to 2 if no SSE3 support - UseSSE = MIN2((intx)2,UseSSE); - if( !supports_sse2() ) // Drop to 1 if no SSE2 support - UseSSE = MIN2((intx)1,UseSSE); - if( !supports_sse () ) // Drop to 0 if no SSE support - UseSSE = 0; - - // On new cpus instructions which update whole XMM register should be used - // to prevent partial register stall due to dependencies on high half. - // - // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) - // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) - // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). - // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). - - if( is_amd() ) { // AMD cpus specific settings - if( FLAG_IS_DEFAULT(UseAddressNop) ) { - // Use it on all AMD cpus starting from Opteron (don't need - // a cpu check since only Opteron and new cpus support 64-bits mode). - UseAddressNop = true; - } - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - if( supports_sse4a() ) { - UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron - } else { - UseXmmLoadAndClearUpper = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { - if( supports_sse4a() ) { - UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' - } else { - UseXmmRegToRegMoveAll = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmI2F) ) { - if( supports_sse4a() ) { - UseXmmI2F = true; - } else { - UseXmmI2F = false; - } - } - if( FLAG_IS_DEFAULT(UseXmmI2D) ) { - if( supports_sse4a() ) { - UseXmmI2D = true; - } else { - UseXmmI2D = false; - } - } - } - - if( is_intel() ) { // Intel cpus specific settings - if( FLAG_IS_DEFAULT(UseStoreImmI16) ) { - UseStoreImmI16 = false; // don't use it on Intel cpus - } - if( FLAG_IS_DEFAULT(UseAddressNop) ) { - // Use it on all Intel cpus starting from PentiumPro - // (don't need a cpu check since only new cpus support 64-bits mode). - UseAddressNop = true; - } - if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) { - UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus - } - if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) { - if( supports_sse3() ) { - UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus - } else { - UseXmmRegToRegMoveAll = false; - } - } - if( cpu_family() == 6 && supports_sse3() ) { // New Intel cpus -#ifdef COMPILER2 - if( FLAG_IS_DEFAULT(MaxLoopPad) ) { - // For new Intel cpus do the next optimization: - // don't align the beginning of a loop if there are enough instructions - // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) - // in current fetch line (OptoLoopAlignment) or the padding - // is big (> MaxLoopPad). - // Set MaxLoopPad to 11 for new Intel cpus to reduce number of - // generated NOP instructions. 11 is the largest size of one - // address NOP instruction '0F 1F' (see Assembler::nop(i)). - MaxLoopPad = 11; - } -#endif // COMPILER2 - if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) { - UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus - } - if( supports_sse4_2() && supports_ht() ) { // Newest Intel cpus - if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) { - UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus - } - } - } - } - - assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); - assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); - - // set valid Prefetch instruction - if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0; - if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3; - if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0; - - if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0; - if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3; - if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0; - - // Allocation prefetch settings - intx cache_line_size = L1_data_cache_line_size(); - if( cache_line_size > AllocatePrefetchStepSize ) - AllocatePrefetchStepSize = cache_line_size; - if( FLAG_IS_DEFAULT(AllocatePrefetchLines) ) - AllocatePrefetchLines = 3; // Optimistic value - assert(AllocatePrefetchLines > 0, "invalid value"); - if( AllocatePrefetchLines < 1 ) // set valid value in product VM - AllocatePrefetchLines = 1; // Conservative value - - AllocatePrefetchDistance = allocate_prefetch_distance(); - AllocatePrefetchStyle = allocate_prefetch_style(); - - if( AllocatePrefetchStyle == 2 && is_intel() && - cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core - AllocatePrefetchDistance = 384; - } - assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value"); - - // Prefetch settings - PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); - PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes(); - PrefetchFieldsAhead = prefetch_fields_ahead(); - -#ifndef PRODUCT - if (PrintMiscellaneous && Verbose) { - tty->print_cr("Logical CPUs per core: %u", - logical_processors_per_package()); - tty->print_cr("UseSSE=%d",UseSSE); - tty->print("Allocation: "); - if (AllocatePrefetchStyle <= 0) { - tty->print_cr("no prefetching"); - } else { - if (AllocatePrefetchInstr == 0) { - tty->print("PREFETCHNTA"); - } else if (AllocatePrefetchInstr == 1) { - tty->print("PREFETCHT0"); - } else if (AllocatePrefetchInstr == 2) { - tty->print("PREFETCHT2"); - } else if (AllocatePrefetchInstr == 3) { - tty->print("PREFETCHW"); - } - if (AllocatePrefetchLines > 1) { - tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); - } else { - tty->print_cr(" %d, one line", AllocatePrefetchDistance); - } - } - if (PrefetchCopyIntervalInBytes > 0) { - tty->print_cr("PrefetchCopyIntervalInBytes %d", PrefetchCopyIntervalInBytes); - } - if (PrefetchScanIntervalInBytes > 0) { - tty->print_cr("PrefetchScanIntervalInBytes %d", PrefetchScanIntervalInBytes); - } - if (PrefetchFieldsAhead > 0) { - tty->print_cr("PrefetchFieldsAhead %d", PrefetchFieldsAhead); - } - } -#endif // !PRODUCT -} - -void VM_Version::initialize() { - ResourceMark rm; - // Making this stub must be FIRST use of assembler - - stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); - if (stub_blob == NULL) { - vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); - } - CodeBuffer c(stub_blob->instructions_begin(), - stub_blob->instructions_size()); - VM_Version_StubGenerator g(&c); - getPsrInfo_stub = CAST_TO_FN_PTR(getPsrInfo_stub_t, - g.generate_getPsrInfo()); - - get_processor_features(); -} diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/vm_version_x86_64.hpp --- a/src/cpu/x86/vm/vm_version_x86_64.hpp Mon Mar 09 13:34:00 2009 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,449 +0,0 @@ -/* - * Copyright 2003-2008 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -class VM_Version : public Abstract_VM_Version { -public: - // cpuid result register layouts. These are all unions of a uint32_t - // (in case anyone wants access to the register as a whole) and a bitfield. - - union StdCpuid1Eax { - uint32_t value; - struct { - uint32_t stepping : 4, - model : 4, - family : 4, - proc_type : 2, - : 2, - ext_model : 4, - ext_family : 8, - : 4; - } bits; - }; - - union StdCpuid1Ebx { // example, unused - uint32_t value; - struct { - uint32_t brand_id : 8, - clflush_size : 8, - threads_per_cpu : 8, - apic_id : 8; - } bits; - }; - - union StdCpuid1Ecx { - uint32_t value; - struct { - uint32_t sse3 : 1, - : 2, - monitor : 1, - : 1, - vmx : 1, - : 1, - est : 1, - : 1, - ssse3 : 1, - cid : 1, - : 2, - cmpxchg16: 1, - : 4, - dca : 1, - sse4_1 : 1, - sse4_2 : 1, - : 11; - } bits; - }; - - union StdCpuid1Edx { - uint32_t value; - struct { - uint32_t : 4, - tsc : 1, - : 3, - cmpxchg8 : 1, - : 6, - cmov : 1, - : 7, - mmx : 1, - fxsr : 1, - sse : 1, - sse2 : 1, - : 1, - ht : 1, - : 3; - } bits; - }; - - union DcpCpuid4Eax { - uint32_t value; - struct { - uint32_t cache_type : 5, - : 21, - cores_per_cpu : 6; - } bits; - }; - - union DcpCpuid4Ebx { - uint32_t value; - struct { - uint32_t L1_line_size : 12, - partitions : 10, - associativity : 10; - } bits; - }; - - union ExtCpuid1Edx { - uint32_t value; - struct { - uint32_t : 22, - mmx_amd : 1, - mmx : 1, - fxsr : 1, - : 4, - long_mode : 1, - tdnow2 : 1, - tdnow : 1; - } bits; - }; - - union ExtCpuid1Ecx { - uint32_t value; - struct { - uint32_t LahfSahf : 1, - CmpLegacy : 1, - : 4, - abm : 1, - sse4a : 1, - misalignsse : 1, - prefetchw : 1, - : 22; - } bits; - }; - - union ExtCpuid5Ex { - uint32_t value; - struct { - uint32_t L1_line_size : 8, - L1_tag_lines : 8, - L1_assoc : 8, - L1_size : 8; - } bits; - }; - - union ExtCpuid8Ecx { - uint32_t value; - struct { - uint32_t cores_per_cpu : 8, - : 24; - } bits; - }; - -protected: - static int _cpu; - static int _model; - static int _stepping; - static int _cpuFeatures; // features returned by the "cpuid" instruction - // 0 if this instruction is not available - static const char* _features_str; - - enum { - CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) - CPU_CMOV = (1 << 1), - CPU_FXSR = (1 << 2), - CPU_HT = (1 << 3), - CPU_MMX = (1 << 4), - CPU_3DNOW= (1 << 5), - CPU_SSE = (1 << 6), - CPU_SSE2 = (1 << 7), - CPU_SSE3 = (1 << 8), - CPU_SSSE3= (1 << 9), - CPU_SSE4A= (1 <<10), - CPU_SSE4_1 = (1 << 11), - CPU_SSE4_2 = (1 << 12) - } cpuFeatureFlags; - - // cpuid information block. All info derived from executing cpuid with - // various function numbers is stored here. Intel and AMD info is - // merged in this block: accessor methods disentangle it. - // - // The info block is laid out in subblocks of 4 dwords corresponding to - // eax, ebx, ecx and edx, whether or not they contain anything useful. - struct CpuidInfo { - // cpuid function 0 - uint32_t std_max_function; - uint32_t std_vendor_name_0; - uint32_t std_vendor_name_1; - uint32_t std_vendor_name_2; - - // cpuid function 1 - StdCpuid1Eax std_cpuid1_eax; - StdCpuid1Ebx std_cpuid1_ebx; - StdCpuid1Ecx std_cpuid1_ecx; - StdCpuid1Edx std_cpuid1_edx; - - // cpuid function 4 (deterministic cache parameters) - DcpCpuid4Eax dcp_cpuid4_eax; - DcpCpuid4Ebx dcp_cpuid4_ebx; - uint32_t dcp_cpuid4_ecx; // unused currently - uint32_t dcp_cpuid4_edx; // unused currently - - // cpuid function 0x80000000 // example, unused - uint32_t ext_max_function; - uint32_t ext_vendor_name_0; - uint32_t ext_vendor_name_1; - uint32_t ext_vendor_name_2; - - // cpuid function 0x80000001 - uint32_t ext_cpuid1_eax; // reserved - uint32_t ext_cpuid1_ebx; // reserved - ExtCpuid1Ecx ext_cpuid1_ecx; - ExtCpuid1Edx ext_cpuid1_edx; - - // cpuid functions 0x80000002 thru 0x80000004: example, unused - uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3; - uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7; - uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11; - - // cpuid function 0x80000005 //AMD L1, Intel reserved - uint32_t ext_cpuid5_eax; // unused currently - uint32_t ext_cpuid5_ebx; // reserved - ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) - ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) - - // cpuid function 0x80000008 - uint32_t ext_cpuid8_eax; // unused currently - uint32_t ext_cpuid8_ebx; // reserved - ExtCpuid8Ecx ext_cpuid8_ecx; - uint32_t ext_cpuid8_edx; // reserved - }; - - // The actual cpuid info block - static CpuidInfo _cpuid_info; - - // Extractors and predicates - static uint32_t extended_cpu_family() { - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.family; - result += _cpuid_info.std_cpuid1_eax.bits.ext_family; - return result; - } - static uint32_t extended_cpu_model() { - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; - result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; - return result; - } - static uint32_t cpu_stepping() { - uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; - return result; - } - static uint logical_processor_count() { - uint result = threads_per_core(); - return result; - } - static uint32_t feature_flags() { - uint32_t result = 0; - if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) - result |= CPU_CX8; - if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) - result |= CPU_CMOV; - if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || is_amd() && - _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0) - result |= CPU_FXSR; - // HT flag is set for multi-core processors also. - if (threads_per_core() > 1) - result |= CPU_HT; - if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || is_amd() && - _cpuid_info.ext_cpuid1_edx.bits.mmx != 0) - result |= CPU_MMX; - if (is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) - result |= CPU_3DNOW; - if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) - result |= CPU_SSE; - if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) - result |= CPU_SSE2; - if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) - result |= CPU_SSE3; - if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) - result |= CPU_SSSE3; - if (is_amd() && _cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) - result |= CPU_SSE4A; - if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) - result |= CPU_SSE4_1; - if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) - result |= CPU_SSE4_2; - return result; - } - - static void get_processor_features(); - -public: - // Offsets for cpuid asm stub - static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } - static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } - static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } - static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } - static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } - static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } - - // Initialization - static void initialize(); - - // Asserts - static void assert_is_initialized() { - assert(_cpuid_info.std_cpuid1_eax.bits.family != 0, "VM_Version not initialized"); - } - - // - // Processor family: - // 3 - 386 - // 4 - 486 - // 5 - Pentium - // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon, - // Pentium M, Core Solo, Core Duo, Core2 Duo - // family 6 model: 9, 13, 14, 15 - // 0x0f - Pentium 4, Opteron - // - // Note: The cpu family should be used to select between - // instruction sequences which are valid on all Intel - // processors. Use the feature test functions below to - // determine whether a particular instruction is supported. - // - static int cpu_family() { return _cpu;} - static bool is_P6() { return cpu_family() >= 6; } - - static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' - static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' - - static uint cores_per_cpu() { - uint result = 1; - if (is_intel()) { - result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); - } else if (is_amd()) { - result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); - } - return result; - } - - static uint threads_per_core() { - uint result = 1; - if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / - cores_per_cpu(); - } - return result; - } - - static intx L1_data_cache_line_size() { - intx result = 0; - if (is_intel()) { - result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); - } else if (is_amd()) { - result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; - } - if (result < 32) // not defined ? - result = 32; // 32 bytes by default for other x64 - return result; - } - - // - // Feature identification - // - static bool supports_cpuid() { return _cpuFeatures != 0; } - static bool supports_cmpxchg8() { return (_cpuFeatures & CPU_CX8) != 0; } - static bool supports_cmov() { return (_cpuFeatures & CPU_CMOV) != 0; } - static bool supports_fxsr() { return (_cpuFeatures & CPU_FXSR) != 0; } - static bool supports_ht() { return (_cpuFeatures & CPU_HT) != 0; } - static bool supports_mmx() { return (_cpuFeatures & CPU_MMX) != 0; } - static bool supports_sse() { return (_cpuFeatures & CPU_SSE) != 0; } - static bool supports_sse2() { return (_cpuFeatures & CPU_SSE2) != 0; } - static bool supports_sse3() { return (_cpuFeatures & CPU_SSE3) != 0; } - static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } - static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } - static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } - // - // AMD features - // - static bool supports_3dnow() { return (_cpuFeatures & CPU_3DNOW) != 0; } - static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } - static bool supports_3dnow2() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; } - static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } - - static bool supports_compare_and_exchange() { return true; } - - static const char* cpu_features() { return _features_str; } - - static intx allocate_prefetch_distance() { - // This method should be called before allocate_prefetch_style(). - // - // Hardware prefetching (distance/size in bytes): - // Pentium 4 - 256 / 128 - // Opteron - 128 / 64 only when 2 sequential cache lines accessed - // Core - 128 / 64 - // - // Software prefetching (distance in bytes / instruction with best score): - // Pentium 4 - 512 / prefetchnta - // Opteron - 256 / prefetchnta - // Core - 256 / prefetchnta - // It will be used only when AllocatePrefetchStyle > 0 - - intx count = AllocatePrefetchDistance; - if (count < 0) { // default ? - if (is_amd()) { // AMD - count = 256; // Opteron - } else { // Intel - if (cpu_family() == 6) { - count = 256;// Pentium M, Core, Core2 - } else { - count = 512;// Pentium 4 - } - } - } - return count; - } - static intx allocate_prefetch_style() { - assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive"); - // Return 0 if AllocatePrefetchDistance was not defined. - return AllocatePrefetchDistance > 0 ? AllocatePrefetchStyle : 0; - } - - // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from - // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. - // Tested intervals from 128 to 2048 in increments of 64 == one cache line. - // 256 bytes (4 dcache lines) was the nearest runner-up to 576. - - // gc copy/scan is disabled if prefetchw isn't supported, because - // Prefetch::write emits an inlined prefetchw on Linux. - // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. - // The used prefetcht0 instruction works for both amd64 and em64t. - static intx prefetch_copy_interval_in_bytes() { - intx interval = PrefetchCopyIntervalInBytes; - return interval >= 0 ? interval : 576; - } - static intx prefetch_scan_interval_in_bytes() { - intx interval = PrefetchScanIntervalInBytes; - return interval >= 0 ? interval : 576; - } - static intx prefetch_fields_ahead() { - intx count = PrefetchFieldsAhead; - return count >= 0 ? count : 1; - } -}; diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/vtableStubs_x86_32.cpp --- a/src/cpu/x86/vm/vtableStubs_x86_32.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/vtableStubs_x86_32.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -34,10 +34,16 @@ extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); #endif -// used by compiler only; may use only caller saved registers rax, rbx, rcx. -// rdx holds first int arg, rsi, rdi, rbp are callee-save & must be preserved. -// Leave receiver in rcx; required behavior when +OptoArgsInRegisters -// is modifed to put first oop in rcx. +// These stubs are used by the compiler only. +// Argument registers, which must be preserved: +// rcx - receiver (always first argument) +// rdx - second argument (if any) +// Other registers that might be usable: +// rax - inline cache register (is interface for itable stub) +// rbx - method (used when calling out to interpreter) +// Available now, but may become callee-save at some point: +// rsi, rdi +// Note that rax and rdx are also used for return values. // VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { const int i486_code_length = VtableStub::pd_code_size_limit(true); @@ -94,16 +100,25 @@ __ jmp( Address(method, methodOopDesc::from_compiled_offset())); masm->flush(); + + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d", + vtable_index, s->entry_point(), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + s->set_exception_points(npe_addr, ame_addr); return s; } -VtableStub* VtableStubs::create_itable_stub(int vtable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index) { // Note well: pd_code_size_limit is the absolute minimum we can get away with. If you // add code here, bump the code stub size returned by pd_code_size_limit! const int i486_code_length = VtableStub::pd_code_size_limit(false); - VtableStub* s = new(i486_code_length) VtableStub(false, vtable_index); + VtableStub* s = new(i486_code_length) VtableStub(false, itable_index); ResourceMark rm; CodeBuffer cb(s->entry_point(), i486_code_length); MacroAssembler* masm = new MacroAssembler(&cb); @@ -123,50 +138,19 @@ // get receiver klass (also an implicit null-check) address npe_addr = __ pc(); - __ movptr(rbx, Address(rcx, oopDesc::klass_offset_in_bytes())); - - __ mov(rsi, rbx); // Save klass in free register - // Most registers are in use, so save a few - __ push(rdx); - // compute itable entry offset (in words) - const int base = instanceKlass::vtable_start_offset() * wordSize; - assert(vtableEntry::size() * wordSize == 4, "adjust the scaling in the code below"); - __ movl(rdx, Address(rbx, instanceKlass::vtable_length_offset() * wordSize)); // Get length of vtable - __ lea(rbx, Address(rbx, rdx, Address::times_ptr, base)); - if (HeapWordsPerLong > 1) { - // Round up to align_object_offset boundary - __ round_to(rbx, BytesPerLong); - } - - Label hit, next, entry, throw_icce; - - __ jmpb(entry); + __ movptr(rsi, Address(rcx, oopDesc::klass_offset_in_bytes())); - __ bind(next); - __ addptr(rbx, itableOffsetEntry::size() * wordSize); - - __ bind(entry); - - // If the entry is NULL then we've reached the end of the table - // without finding the expected interface, so throw an exception - __ movptr(rdx, Address(rbx, itableOffsetEntry::interface_offset_in_bytes())); - __ testptr(rdx, rdx); - __ jcc(Assembler::zero, throw_icce); - __ cmpptr(rax, rdx); - __ jcc(Assembler::notEqual, next); - - // We found a hit, move offset into rbx, - __ movl(rdx, Address(rbx, itableOffsetEntry::offset_offset_in_bytes())); - - // Compute itableMethodEntry. - const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) + itableMethodEntry::method_offset_in_bytes(); + // Most registers are in use; we'll use rax, rbx, rsi, rdi + // (If we need to make rsi, rdi callee-save, do a push/pop here.) + const Register method = rbx; + Label throw_icce; // Get methodOop and entrypoint for compiler - const Register method = rbx; - __ movptr(method, Address(rsi, rdx, Address::times_1, method_offset)); - - // Restore saved register, before possible trap. - __ pop(rdx); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + rsi, rax, itable_index, + // outputs: method, scan temp. reg + method, rdi, + throw_icce); // method (rbx): methodOop // rcx: receiver @@ -187,12 +171,15 @@ __ jmp(Address(method, methodOopDesc::from_compiled_offset())); __ bind(throw_icce); - // Restore saved register - __ pop(rdx); __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); - masm->flush(); + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d", + itable_index, s->entry_point(), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); s->set_exception_points(npe_addr, ame_addr); @@ -207,7 +194,7 @@ return (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0); } else { // Itable stub size - return (DebugVtables ? 144 : 64) + (CountCompiledCalls ? 6 : 0); + return (DebugVtables ? 256 : 66) + (CountCompiledCalls ? 6 : 0); } } diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/vtableStubs_x86_64.cpp --- a/src/cpu/x86/vm/vtableStubs_x86_64.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/vtableStubs_x86_64.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -98,17 +98,26 @@ __ jmp( Address(rbx, methodOopDesc::from_compiled_offset())); __ flush(); + + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d", + vtable_index, s->entry_point(), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + s->set_exception_points(npe_addr, ame_addr); return s; } -VtableStub* VtableStubs::create_itable_stub(int vtable_index) { +VtableStub* VtableStubs::create_itable_stub(int itable_index) { // Note well: pd_code_size_limit is the absolute minimum we can get // away with. If you add code here, bump the code stub size // returned by pd_code_size_limit! const int amd64_code_length = VtableStub::pd_code_size_limit(false); - VtableStub* s = new(amd64_code_length) VtableStub(false, vtable_index); + VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index); ResourceMark rm; CodeBuffer cb(s->entry_point(), amd64_code_length); MacroAssembler* masm = new MacroAssembler(&cb); @@ -131,68 +140,28 @@ // get receiver klass (also an implicit null-check) address npe_addr = __ pc(); - __ load_klass(rbx, j_rarg0); + // Most registers are in use; we'll use rax, rbx, r10, r11 + // (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them) + __ load_klass(r10, j_rarg0); // If we take a trap while this arg is on the stack we will not // be able to walk the stack properly. This is not an issue except // when there are mistakes in this assembly code that could generate // a spurious fault. Ask me how I know... - __ push(j_rarg1); // Most registers are in use, so save one - - // compute itable entry offset (in words) - const int base = instanceKlass::vtable_start_offset() * wordSize; - assert(vtableEntry::size() * wordSize == 8, - "adjust the scaling in the code below"); - // Get length of vtable - __ movl(j_rarg1, - Address(rbx, instanceKlass::vtable_length_offset() * wordSize)); - __ lea(rbx, Address(rbx, j_rarg1, Address::times_8, base)); - - if (HeapWordsPerLong > 1) { - // Round up to align_object_offset boundary - __ round_to(rbx, BytesPerLong); - } - Label hit, next, entry, throw_icce; - - __ jmpb(entry); - - __ bind(next); - __ addptr(rbx, itableOffsetEntry::size() * wordSize); - - __ bind(entry); - - // If the entry is NULL then we've reached the end of the table - // without finding the expected interface, so throw an exception - __ movptr(j_rarg1, Address(rbx, itableOffsetEntry::interface_offset_in_bytes())); - __ testptr(j_rarg1, j_rarg1); - __ jcc(Assembler::zero, throw_icce); - __ cmpptr(rax, j_rarg1); - __ jccb(Assembler::notEqual, next); - - // We found a hit, move offset into j_rarg1 - __ movl(j_rarg1, Address(rbx, itableOffsetEntry::offset_offset_in_bytes())); - - // Compute itableMethodEntry - const int method_offset = - (itableMethodEntry::size() * wordSize * vtable_index) + - itableMethodEntry::method_offset_in_bytes(); + const Register method = rbx; + Label throw_icce; // Get methodOop and entrypoint for compiler - - // Get klass pointer again - __ load_klass(rax, j_rarg0); - - const Register method = rbx; - __ movptr(method, Address(rax, j_rarg1, Address::times_1, method_offset)); - - // Restore saved register, before possible trap. - __ pop(j_rarg1); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + r10, rax, itable_index, + // outputs: method, scan temp. reg + method, r11, + throw_icce); // method (rbx): methodOop // j_rarg0: receiver - #ifdef ASSERT if (DebugVtables) { Label L2; @@ -211,12 +180,16 @@ __ jmp(Address(method, methodOopDesc::from_compiled_offset())); __ bind(throw_icce); - // Restore saved register - __ pop(j_rarg1); __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); __ flush(); + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d", + itable_index, s->entry_point(), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } guarantee(__ pc() <= s->code_end(), "overflowed buffer"); s->set_exception_points(npe_addr, ame_addr); @@ -230,7 +203,7 @@ (UseCompressedOops ? 16 : 0); // 1 leaq can be 3 bytes + 1 long } else { // Itable stub size - return (DebugVtables ? 636 : 72) + (CountCompiledCalls ? 13 : 0) + + return (DebugVtables ? 512 : 72) + (CountCompiledCalls ? 13 : 0) + (UseCompressedOops ? 32 : 0); // 2 leaqs } } diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Thu Mar 12 18:16:36 2009 -0700 @@ -130,7 +130,7 @@ // allocation. Highest priority is first. A useful heuristic is to // give registers a low priority when they are required by machine // instructions, like EAX and EDX. Registers which are used as -// pairs must fall on an even boundry (witness the FPR#L's in this list). +// pairs must fall on an even boundary (witness the FPR#L's in this list). // For the Intel integer registers, the equivalent Long pairs are // EDX:EAX, EBX:ECX, and EDI:EBP. alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, @@ -3126,14 +3126,12 @@ enc_class movq_ld(regXD dst, memory mem) %{ MacroAssembler _masm(&cbuf); - Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); - __ movq(as_XMMRegister($dst$$reg), madr); + __ movq($dst$$XMMRegister, $mem$$Address); %} enc_class movq_st(memory mem, regXD src) %{ MacroAssembler _masm(&cbuf); - Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); - __ movq(madr, as_XMMRegister($src$$reg)); + __ movq($mem$$Address, $src$$XMMRegister); %} enc_class pshufd_8x8(regX dst, regX src) %{ @@ -3751,8 +3749,8 @@ masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL); // Load first characters - masm.load_unsigned_word(rcx, Address(rbx, 0)); - masm.load_unsigned_word(rdi, Address(rax, 0)); + masm.load_unsigned_short(rcx, Address(rbx, 0)); + masm.load_unsigned_short(rdi, Address(rax, 0)); // Compare first characters masm.subl(rcx, rdi); @@ -3782,8 +3780,8 @@ // Compare the rest of the characters masm.bind(WHILE_HEAD_LABEL); - masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0)); - masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0)); + masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); + masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); masm.subl(rcx, rdi); masm.jcc(Assembler::notZero, POP_LABEL); masm.incrementl(rsi); @@ -3840,8 +3838,8 @@ masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); // Compare 2-byte "tail" at end of arrays - masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); + masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); + masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); masm.cmpl(tmp1Reg, tmp2Reg); masm.jcc(Assembler::notEqual, FALSE_LABEL); masm.testl(resultReg, resultReg); @@ -5857,7 +5855,7 @@ //----------OPERAND CLASSES---------------------------------------------------- // Operand Classes are groups of operands that are used as to simplify -// instruction definitions by not requiring the AD writer to specify seperate +// instruction definitions by not requiring the AD writer to specify separate // instructions for every form of operand when the instruction accepts // multiple operand types with the same basic encoding and format. The classic // case of this is memory operands. @@ -6396,21 +6394,94 @@ match(Set dst (LoadB mem)); ins_cost(125); - format %{ "MOVSX8 $dst,$mem" %} - opcode(0xBE, 0x0F); - ins_encode( OpcS, OpcP, RegMem(dst,mem)); - ins_pipe( ialu_reg_mem ); -%} - -// Load Byte (8bit UNsigned) -instruct loadUB(xRegI dst, memory mem, immI_255 bytemask) %{ - match(Set dst (AndI (LoadB mem) bytemask)); + format %{ "MOVSX8 $dst,$mem\t# byte" %} + + ins_encode %{ + __ movsbl($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Byte (8bit signed) into Long Register +instruct loadB2L(eRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + + ins_cost(375); + format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" + "MOV $dst.hi,$dst.lo\n\t" + "SAR $dst.hi,7" %} + + ins_encode %{ + __ movsbl($dst$$Register, $mem$$Address); + __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. + __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Byte (8bit UNsigned) +instruct loadUB(xRegI dst, memory mem) %{ + match(Set dst (LoadUB mem)); ins_cost(125); - format %{ "MOVZX8 $dst,$mem" %} - opcode(0xB6, 0x0F); - ins_encode( OpcS, OpcP, RegMem(dst,mem)); - ins_pipe( ialu_reg_mem ); + format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} + + ins_encode %{ + __ movzbl($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Byte (8 bit UNsigned) into Long Register +instruct loadUB2L(eRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUB mem))); + + ins_cost(250); + format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" + "XOR $dst.hi,$dst.hi" %} + + ins_encode %{ + __ movzbl($dst$$Register, $mem$$Address); + __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Short (16bit signed) +instruct loadS(eRegI dst, memory mem) %{ + match(Set dst (LoadS mem)); + + ins_cost(125); + format %{ "MOVSX $dst,$mem\t# short" %} + + ins_encode %{ + __ movswl($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Short (16bit signed) into Long Register +instruct loadS2L(eRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadS mem))); + + ins_cost(375); + format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" + "MOV $dst.hi,$dst.lo\n\t" + "SAR $dst.hi,15" %} + + ins_encode %{ + __ movswl($dst$$Register, $mem$$Address); + __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. + __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. + %} + + ins_pipe(ialu_reg_mem); %} // Load Unsigned Short/Char (16bit unsigned) @@ -6418,10 +6489,30 @@ match(Set dst (LoadUS mem)); ins_cost(125); - format %{ "MOVZX $dst,$mem" %} - opcode(0xB7, 0x0F); - ins_encode( OpcS, OpcP, RegMem(dst,mem)); - ins_pipe( ialu_reg_mem ); + format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} + + ins_encode %{ + __ movzwl($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Short/Char (16 bit UNsigned) into Long Register +instruct loadUS2L(eRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUS mem))); + + ins_cost(250); + format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" + "XOR $dst.hi,$dst.hi" %} + + ins_encode %{ + __ movzwl($dst$$Register, $mem$$Address); + __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); + %} + + ins_pipe(ialu_reg_mem); %} // Load Integer @@ -6429,10 +6520,47 @@ match(Set dst (LoadI mem)); ins_cost(125); - format %{ "MOV $dst,$mem" %} - opcode(0x8B); - ins_encode( OpcP, RegMem(dst,mem)); - ins_pipe( ialu_reg_mem ); + format %{ "MOV $dst,$mem\t# int" %} + + ins_encode %{ + __ movl($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Integer into Long Register +instruct loadI2L(eRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + + ins_cost(375); + format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" + "MOV $dst.hi,$dst.lo\n\t" + "SAR $dst.hi,31" %} + + ins_encode %{ + __ movl($dst$$Register, $mem$$Address); + __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. + __ sarl(HIGH_FROM_LOW($dst$$Register), 31); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Integer into Long Register +instruct loadUI2L(eRegL dst, memory mem) %{ + match(Set dst (LoadUI2L mem)); + + ins_cost(250); + format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" + "XOR $dst.hi,$dst.hi" %} + + ins_encode %{ + __ movl($dst$$Register, $mem$$Address); + __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); + %} + + ins_pipe(ialu_reg_mem); %} // Load Long. Cannot clobber address while loading, so restrict address @@ -6442,11 +6570,17 @@ match(Set dst (LoadL mem)); ins_cost(250); - format %{ "MOV $dst.lo,$mem\n\t" + format %{ "MOV $dst.lo,$mem\t# long\n\t" "MOV $dst.hi,$mem+4" %} - opcode(0x8B, 0x8B); - ins_encode( OpcP, RegMem(dst,mem), OpcS, RegMem_Hi(dst,mem)); - ins_pipe( ialu_reg_long_mem ); + + ins_encode %{ + Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false); + Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false); + __ movl($dst$$Register, Amemlo); + __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); + %} + + ins_pipe(ialu_reg_long_mem); %} // Volatile Load Long. Must be atomic, so do 64-bit FILD @@ -6521,17 +6655,6 @@ ins_pipe( ialu_reg_mem ); %} -// Load Short (16bit signed) -instruct loadS(eRegI dst, memory mem) %{ - match(Set dst (LoadS mem)); - - ins_cost(125); - format %{ "MOVSX $dst,$mem" %} - opcode(0xBF, 0x0F); - ins_encode( OpcS, OpcP, RegMem(dst,mem)); - ins_pipe( ialu_reg_mem ); -%} - // Load Double instruct loadD(regD dst, memory mem) %{ predicate(UseSSE<=1); @@ -7957,7 +8080,7 @@ __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); if( os::is_MP() ) __ lock(); - __ cmpxchg8(Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp)); + __ cmpxchg8($mem$$Address); __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); %} ins_pipe( pipe_cmpxchg ); @@ -11467,6 +11590,7 @@ instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{ match(Set dst (ConvI2L src)); effect(KILL cr); + ins_cost(375); format %{ "MOV $dst.lo,$src\n\t" "MOV $dst.hi,$src\n\t" "SAR $dst.hi,31" %} @@ -11478,6 +11602,7 @@ instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{ match(Set dst (AndL (ConvI2L src) mask) ); effect( KILL flags ); + ins_cost(250); format %{ "MOV $dst.lo,$src\n\t" "XOR $dst.hi,$dst.hi" %} opcode(0x33); // XOR @@ -11489,6 +11614,7 @@ instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ match(Set dst (AndL src mask) ); effect( KILL flags ); + ins_cost(250); format %{ "MOV $dst.lo,$src.lo\n\t" "XOR $dst.hi,$dst.hi\n\t" %} opcode(0x33); // XOR @@ -13220,7 +13346,7 @@ // These must follow all instruction definitions as they use the names // defined in the instructions definitions. // -// peepmatch ( root_instr_name [preceeding_instruction]* ); +// peepmatch ( root_instr_name [preceding_instruction]* ); // // peepconstraint %{ // (instruction_number.operand_name relational_op instruction_number.operand_name diff -r ce2272390558 -r 7bb995fbd3c0 src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Mon Mar 09 13:34:00 2009 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Thu Mar 12 18:16:36 2009 -0700 @@ -3462,14 +3462,12 @@ enc_class movq_ld(regD dst, memory mem) %{ MacroAssembler _masm(&cbuf); - Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); - __ movq(as_XMMRegister($dst$$reg), madr); + __ movq($dst$$XMMRegister, $mem$$Address); %} enc_class movq_st(memory mem, regD src) %{ MacroAssembler _masm(&cbuf); - Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); - __ movq(madr, as_XMMRegister($src$$reg)); + __ movq($mem$$Address, $src$$XMMRegister); %} enc_class pshufd_8x8(regF dst, regF src) %{ @@ -3765,8 +3763,8 @@ masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL); // Load first characters - masm.load_unsigned_word(rcx, Address(rbx, 0)); - masm.load_unsigned_word(rdi, Address(rax, 0)); + masm.load_unsigned_short(rcx, Address(rbx, 0)); + masm.load_unsigned_short(rdi, Address(rax, 0)); // Compare first characters masm.subl(rcx, rdi); @@ -3796,8 +3794,8 @@ // Compare the rest of the characters masm.bind(WHILE_HEAD_LABEL); - masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0)); - masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0)); + masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); + masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); masm.subl(rcx, rdi); masm.jcc(Assembler::notZero, POP_LABEL); masm.increment(rsi); @@ -3854,8 +3852,8 @@ masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); // Compare 2-byte "tail" at end of arrays - masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); + masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); + masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); masm.cmpl(tmp1Reg, tmp2Reg); masm.jcc(Assembler::notEqual, FALSE_LABEL); masm.testl(resultReg, resultReg); @@ -5483,7 +5481,7 @@ //----------OPERAND CLASSES---------------------------------------------------- // Operand Classes are groups of operands that are used as to simplify -// instruction definitions by not requiring the AD writer to specify seperate +// instruction definitions by not requiring the AD writer to specify separate // instructions for every form of operand when the instruction accepts // multiple operand types with the same basic encoding and format. The classic // case of this is memory operands. @@ -6031,70 +6029,88 @@ ins_cost(125); format %{ "movsbl $dst, $mem\t# byte" %} - opcode(0x0F, 0xBE); - ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem)); + + ins_encode %{ + __ movsbl($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); %} -// Load Byte (8 bit signed) into long -// instruct loadB2L(rRegL dst, memory mem) -// %{ -// match(Set dst (ConvI2L (LoadB mem))); - -// ins_cost(125); -// format %{ "movsbq $dst, $mem\t# byte -> long" %} -// opcode(0x0F, 0xBE); -// ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem)); -// ins_pipe(ialu_reg_mem); -// %} - -// Load Byte (8 bit UNsigned) -instruct loadUB(rRegI dst, memory mem, immI_255 bytemask) -%{ - match(Set dst (AndI (LoadB mem) bytemask)); +// Load Byte (8 bit signed) into Long Register +instruct loadB2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadB mem))); + + ins_cost(125); + format %{ "movsbq $dst, $mem\t# byte -> long" %} + + ins_encode %{ + __ movsbq($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Byte (8 bit UNsigned) +instruct loadUB(rRegI dst, memory mem) +%{ + match(Set dst (LoadUB mem)); ins_cost(125); format %{ "movzbl $dst, $mem\t# ubyte" %} - opcode(0x0F, 0xB6); - ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem)); + + ins_encode %{ + __ movzbl($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); %} -// Load Byte (8 bit UNsigned) into long -// instruct loadUB2L(rRegL dst, memory mem, immI_255 bytemask) -// %{ -// match(Set dst (ConvI2L (AndI (LoadB mem) bytemask))); - -// ins_cost(125); -// format %{ "movzbl $dst, $mem\t# ubyte -> long" %} -// opcode(0x0F, 0xB6); -// ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem)); -// ins_pipe(ialu_reg_mem); -// %} +// Load Unsigned Byte (8 bit UNsigned) into Long Register +instruct loadUB2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUB mem))); + + ins_cost(125); + format %{ "movzbq $dst, $mem\t# ubyte -> long" %} + + ins_encode %{ + __ movzbq($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} // Load Short (16 bit signed) instruct loadS(rRegI dst, memory mem) %{ match(Set dst (LoadS mem)); - ins_cost(125); // XXX + ins_cost(125); format %{ "movswl $dst, $mem\t# short" %} - opcode(0x0F, 0xBF); - ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem)); + + ins_encode %{ + __ movswl($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); %} -// Load Short (16 bit signed) into long -// instruct loadS2L(rRegL dst, memory mem) -// %{ -// match(Set dst (ConvI2L (LoadS mem))); - -// ins_cost(125); // XXX -// format %{ "movswq $dst, $mem\t# short -> long" %} -// opcode(0x0F, 0xBF); -// ins_encode(REX_reg_mem_wide(dst, mem), OpcP, OpcS, reg_mem(dst, mem)); -// ins_pipe(ialu_reg_mem); -// %} +// Load Short (16 bit signed) into Long Register +instruct loadS2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadS mem))); + + ins_cost(125); + format %{ "movswq $dst, $mem\t# short -> long" %} + + ins_encode %{ + __ movswq($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} // Load Unsigned Short/Char (16 bit UNsigned) instruct loadUS(rRegI dst, memory mem) @@ -6103,32 +6119,71 @@ ins_cost(125); format %{ "movzwl $dst, $mem\t# ushort/char" %} - opcode(0x0F, 0xB7); - ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem)); + + ins_encode %{ + __ movzwl($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); %} -// Load Unsigned Short/Char (16 bit UNsigned) into long -// instruct loadUS2L(rRegL dst, memory mem) -// %{ -// match(Set dst (ConvI2L (LoadUS mem))); - -// ins_cost(125); -// format %{ "movzwl $dst, $mem\t# ushort/char -> long" %} -// opcode(0x0F, 0xB7); -// ins_encode(REX_reg_mem(dst, mem), OpcP, OpcS, reg_mem(dst, mem)); -// ins_pipe(ialu_reg_mem); -// %} +// Load Unsigned Short/Char (16 bit UNsigned) into Long Register +instruct loadUS2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadUS mem))); + + ins_cost(125); + format %{ "movzwq $dst, $mem\t# ushort/char -> long" %} + + ins_encode %{ + __ movzwq($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} // Load Integer instruct loadI(rRegI dst, memory mem) %{ match(Set dst (LoadI mem)); - ins_cost(125); // XXX + ins_cost(125); format %{ "movl $dst, $mem\t# int" %} - opcode(0x8B); - ins_encode(REX_reg_mem(dst, mem), OpcP, reg_mem(dst, mem)); + + ins_encode %{ + __ movl($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Integer into Long Register +instruct loadI2L(rRegL dst, memory mem) +%{ + match(Set dst (ConvI2L (LoadI mem))); + + ins_cost(125); + format %{ "movslq $dst, $mem\t# int -> long" %} + + ins_encode %{ + __ movslq($dst$$Register, $mem$$Address); + %} + + ins_pipe(ialu_reg_mem); +%} + +// Load Unsigned Integer into Long Register +instruct loadUI2L(rRegL dst, memory mem) +%{ + match(Set dst (LoadUI2L mem)); + + ins_cost(125); + format %{ "movl $dst, $mem\t# uint -> long" %} + + ins_encode %{ + __ movl($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); %} @@ -6137,10 +6192,13 @@ %{ match(Set dst (LoadL mem)); - ins_cost(125); // XXX + ins_cost(125); format %{ "movq $dst, $mem\t# long" %} - opcode(0x8B); - ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem)); + + ins_encode %{ + __ movq($dst$$Register, $mem$$Address); + %} + ins_pipe(ialu_reg_mem); // XXX %} @@ -8363,7 +8421,7 @@ //----------- DivL-By-Constant-Expansions-------------------------------------- // DivI cases are handled by the compiler -// Magic constant, reciprical of 10 +// Magic constant, reciprocal of 10 instruct loadConL_0x6666666666666667(rRegL dst) %{ effect(DEF dst); @@ -10804,16 +10862,6 @@ // ins_pipe(ialu_reg_reg); // %} -instruct convI2L_reg_mem(rRegL dst, memory src) -%{ - match(Set dst (ConvI2L (LoadI src))); - - format %{ "movslq $dst, $src\t# i2l" %} - opcode(0x63); // needs REX.W - ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst,src)); - ins_pipe(ialu_reg_mem); -%} - // Zero-extend convert int to long instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask) %{ @@ -12082,7 +12130,7 @@ // These must follow all instruction definitions as they use the names // defined in the instructions definitions. // -// peepmatch ( root_instr_name [precerding_instruction]* ); +// peepmatch ( root_instr_name [preceding_instruction]* ); // // peepconstraint %{ // (instruction_number.operand_name relational_op instruction_number.operand_name diff -r ce2272390558 -r 7bb995fbd3c0 src/os/linux/launcher/java.c --- a/src/os/linux/launcher/java.c Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os/linux/launcher/java.c Thu Mar 12 18:16:36 2009 -0700 @@ -419,7 +419,7 @@ goto leave; } mainClass = LoadClass(env, classname); - if(mainClass == NULL) { /* exception occured */ + if(mainClass == NULL) { /* exception occurred */ ReportExceptionDescription(env); message = "Could not find the main class. Program will exit."; goto leave; @@ -441,7 +441,7 @@ goto leave; } mainClass = LoadClass(env, classname); - if(mainClass == NULL) { /* exception occured */ + if(mainClass == NULL) { /* exception occurred */ ReportExceptionDescription(env); message = "Could not find the main class. Program will exit."; goto leave; diff -r ce2272390558 -r 7bb995fbd3c0 src/os/linux/launcher/java_md.h --- a/src/os/linux/launcher/java_md.h Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os/linux/launcher/java_md.h Thu Mar 12 18:16:36 2009 -0700 @@ -47,7 +47,7 @@ #ifdef JAVA_ARGS /* * ApplicationHome is prepended to each of these entries; the resulting - * strings are concatenated (seperated by PATH_SEPARATOR) and used as the + * strings are concatenated (separated by PATH_SEPARATOR) and used as the * value of -cp option to the launcher. */ #ifndef APP_CLASSPATH diff -r ce2272390558 -r 7bb995fbd3c0 src/os/linux/vm/perfMemory_linux.cpp --- a/src/os/linux/vm/perfMemory_linux.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os/linux/vm/perfMemory_linux.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -192,7 +192,7 @@ // check if the given path is considered a secure directory for // the backing store files. Returns true if the directory exists // and is considered a secure location. Returns false if the path -// is a symbolic link or if an error occured. +// is a symbolic link or if an error occurred. // static bool is_directory_secure(const char* path) { struct stat statbuf; diff -r ce2272390558 -r 7bb995fbd3c0 src/os/solaris/launcher/java.c --- a/src/os/solaris/launcher/java.c Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os/solaris/launcher/java.c Thu Mar 12 18:16:36 2009 -0700 @@ -419,7 +419,7 @@ goto leave; } mainClass = LoadClass(env, classname); - if(mainClass == NULL) { /* exception occured */ + if(mainClass == NULL) { /* exception occurred */ ReportExceptionDescription(env); message = "Could not find the main class. Program will exit."; goto leave; @@ -441,7 +441,7 @@ goto leave; } mainClass = LoadClass(env, classname); - if(mainClass == NULL) { /* exception occured */ + if(mainClass == NULL) { /* exception occurred */ ReportExceptionDescription(env); message = "Could not find the main class. Program will exit."; goto leave; diff -r ce2272390558 -r 7bb995fbd3c0 src/os/solaris/launcher/java_md.h --- a/src/os/solaris/launcher/java_md.h Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os/solaris/launcher/java_md.h Thu Mar 12 18:16:36 2009 -0700 @@ -47,7 +47,7 @@ #ifdef JAVA_ARGS /* * ApplicationHome is prepended to each of these entries; the resulting - * strings are concatenated (seperated by PATH_SEPARATOR) and used as the + * strings are concatenated (separated by PATH_SEPARATOR) and used as the * value of -cp option to the launcher. */ #ifndef APP_CLASSPATH diff -r ce2272390558 -r 7bb995fbd3c0 src/os/solaris/vm/perfMemory_solaris.cpp --- a/src/os/solaris/vm/perfMemory_solaris.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os/solaris/vm/perfMemory_solaris.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -194,7 +194,7 @@ // check if the given path is considered a secure directory for // the backing store files. Returns true if the directory exists // and is considered a secure location. Returns false if the path -// is a symbolic link or if an error occured. +// is a symbolic link or if an error occurred. // static bool is_directory_secure(const char* path) { struct stat statbuf; diff -r ce2272390558 -r 7bb995fbd3c0 src/os/windows/vm/perfMemory_windows.cpp --- a/src/os/windows/vm/perfMemory_windows.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os/windows/vm/perfMemory_windows.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -195,7 +195,7 @@ // check if the given path is considered a secure directory for // the backing store files. Returns true if the directory exists // and is considered a secure location. Returns false if the path -// is a symbolic link or if an error occured. +// is a symbolic link or if an error occurred. // static bool is_directory_secure(const char* path) { @@ -994,7 +994,7 @@ return false; } - // if running on windows 2000 or later, set the automatic inheritence + // if running on windows 2000 or later, set the automatic inheritance // control flags. SetSecurityDescriptorControlFnPtr _SetSecurityDescriptorControl; _SetSecurityDescriptorControl = (SetSecurityDescriptorControlFnPtr) @@ -1002,7 +1002,7 @@ "SetSecurityDescriptorControl"); if (_SetSecurityDescriptorControl != NULL) { - // We do not want to further propogate inherited DACLs, so making them + // We do not want to further propagate inherited DACLs, so making them // protected prevents that. if (!_SetSecurityDescriptorControl(pSD, SE_DACL_PROTECTED, SE_DACL_PROTECTED)) { diff -r ce2272390558 -r 7bb995fbd3c0 src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp --- a/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -532,7 +532,7 @@ if (oldAct.sa_sigaction != signalHandler) { void* sighand = oldAct.sa_sigaction ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction) : CAST_FROM_FN_PTR(void*, oldAct.sa_handler); - warning("Unexpected Signal %d occured under user-defined signal handler " INTPTR_FORMAT, sig, (intptr_t)sighand); + warning("Unexpected Signal %d occurred under user-defined signal handler " INTPTR_FORMAT, sig, (intptr_t)sighand); } } diff -r ce2272390558 -r 7bb995fbd3c0 src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -299,14 +299,18 @@ } +#endif // AMD64 + bool os::supports_sse() { +#ifdef AMD64 + return true; +#else if (sse_status == SSE_UNKNOWN) check_for_sse_support(); return sse_status == SSE_SUPPORTED; +#endif // AMD64 } -#endif // AMD64 - bool os::is_allocatable(size_t bytes) { #ifdef AMD64 return true; @@ -690,7 +694,7 @@ if (oldAct.sa_sigaction != signalHandler) { void* sighand = oldAct.sa_sigaction ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction) : CAST_FROM_FN_PTR(void*, oldAct.sa_handler); - warning("Unexpected Signal %d occured under user-defined signal handler %#lx", sig, (long)sighand); + warning("Unexpected Signal %d occurred under user-defined signal handler %#lx", sig, (long)sighand); } } diff -r ce2272390558 -r 7bb995fbd3c0 src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp --- a/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1999-2004 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,8 +41,9 @@ static void fence_bootstrap (); static void setup_fpu(); +#endif // AMD64 + static bool supports_sse(); -#endif // AMD64 static bool is_allocatable(size_t bytes); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/Makefile Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,75 @@ +# +# Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, +# CA 95054 USA or visit www.sun.com if you need additional information or +# have any questions. +# +# +PKGLIST = \ +com.sun.hotspot.tools.compiler +#END PKGLIST + +FILELIST = com/sun/hotspot/tools/compiler/*.java + +ifneq "x$(ALT_BOOTDIR)" "x" + BOOTDIR := $(ALT_BOOTDIR) +endif + +ifeq "x$(BOOTDIR)" "x" + JDK_HOME := $(shell dirname $(shell which java))/.. +else + JDK_HOME := $(BOOTDIR) +endif + +isUnix := $(shell test -r c:/; echo $$?) + +ifeq "$(isUnix)" "1" + CPS := : +else + CPS := ";" +endif + +SRC_DIR = src +BUILD_DIR = build +OUTPUT_DIR = $(BUILD_DIR)/classes + +# gnumake 3.78.1 does not accept the *s, +# so use the shell to expand them +ALLFILES := $(patsubst %,$(SRC_DIR)/%,$(FILELIST)) +ALLFILES := $(shell /bin/ls $(ALLFILES)) + +JAVAC = $(JDK_HOME)/bin/javac +JAR = $(JDK_HOME)/bin/jar + +# Tagging it on because there's no reason not to run it +all: logc.jar + +logc.jar: filelist manifest.mf + @mkdir -p $(OUTPUT_DIR) + $(JAVAC) -source 1.5 -deprecation -sourcepath $(SRC_DIR) -d $(OUTPUT_DIR) @filelist + $(JAR) cvfm logc.jar manifest.mf -C $(OUTPUT_DIR) com + +.PHONY: filelist +filelist: $(ALLFILES) + @rm -f $@ + @echo $(ALLFILES) > $@ + +clean:: + rm -rf filelist logc.jar + rm -rf $(BUILD_DIR) diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/README Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,18 @@ +This is a very rough tool for parsing -XX:+LogCompilation output. +It's main purpose is to recreate output similar to +-XX:+PrintCompilation -XX:+PrintInlining output from a debug JVM. It +requires a 1.5 JDK to build and simply typing make should build it. + +It produces a jar file, logc.jar, that can be run on the +hotspot.log from LogCompilation output like this: + + java -jar logc.jar hotspot.log + +This will produce something like the normal PrintCompilation output. +Adding the -i option with also report inlining like PrintInlining. + +More information about the LogCompilation output can be found at + +http://wikis.sun.com/display/HotSpotInternals/LogCompilation+overview +http://wikis.sun.com/display/HotSpotInternals/PrintCompilation +http://wikis.sun.com/display/HotSpotInternals/LogCompilation+tool diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/manifest.mf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/manifest.mf Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,1 @@ +Main-Class: com.sun.hotspot.tools.compiler.LogCompilation diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/BasicLogEvent.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/BasicLogEvent.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,75 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.PrintStream; + +/** + * + * @author never + */ +public abstract class BasicLogEvent implements LogEvent { + + protected final String id; + protected final double start; + protected double end; + protected Compilation compilation; + + BasicLogEvent(double start, String id) { + this.start = start; + this.end = start; + this.id = id; + } + + public double getStart() { + return start; + } + + public double getEnd() { + return end; + } + + public void setEnd(double end) { + this.end = end; + } + + public double getElapsedTime() { + return ((int) ((getEnd() - getStart()) * 1000)) / 1000.0; + } + + public String getId() { + return id; + } + + public Compilation getCompilation() { + return compilation; + } + + public void setCompilation(Compilation compilation) { + this.compilation = compilation; + } + + abstract public void print(PrintStream stream); +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,183 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +public class CallSite { + + private int bci; + private Method method; + private int count; + private String receiver; + private int receiver_count; + private String reason; + private List calls; + + CallSite() { + } + + CallSite(int bci, Method m) { + this.bci = bci; + this.method = m; + } + + void add(CallSite site) { + if (getCalls() == null) { + setCalls(new ArrayList()); + } + getCalls().add(site); + } + + CallSite last() { + return last(-1); + } + + CallSite last(int fromEnd) { + return getCalls().get(getCalls().size() + fromEnd); + } + + public String toString() { + StringBuilder sb = new StringBuilder(); + if (getReason() == null) { + sb.append(" @ " + getBci() + " " + getMethod()); + } else { + sb.append("- @ " + getBci() + " " + getMethod() + " " + getReason()); + } + sb.append("\n"); + if (getCalls() != null) { + for (CallSite site : getCalls()) { + sb.append(site); + sb.append("\n"); + } + } + return sb.toString(); + } + + public void print(PrintStream stream) { + print(stream, 0); + } + + void emit(PrintStream stream, int indent) { + for (int i = 0; i < indent; i++) { + stream.print(' '); + } + } + private static boolean compat = true; + + public void print(PrintStream stream, int indent) { + emit(stream, indent); + String m = getMethod().getHolder().replace('/', '.') + "::" + getMethod().getName(); + if (getReason() == null) { + stream.println(" @ " + getBci() + " " + m + " (" + getMethod().getBytes() + " bytes)"); + + } else { + if (isCompat()) { + stream.println(" @ " + getBci() + " " + m + " " + getReason()); + } else { + stream.println("- @ " + getBci() + " " + m + + " (" + getMethod().getBytes() + " bytes) " + getReason()); + } + } + if (getReceiver() != null) { + emit(stream, indent + 3); + // stream.println("type profile " + method.holder + " -> " + receiver + " (" + + // receiver_count + "/" + count + "," + (receiver_count * 100 / count) + "%)"); + stream.println("type profile " + getMethod().getHolder() + " -> " + getReceiver() + " (" + + (getReceiverCount() * 100 / getCount()) + "%)"); + } + if (getCalls() != null) { + for (CallSite site : getCalls()) { + site.print(stream, indent + 2); + } + } + } + + public int getBci() { + return bci; + } + + public void setBci(int bci) { + this.bci = bci; + } + + public Method getMethod() { + return method; + } + + public void setMethod(Method method) { + this.method = method; + } + + public int getCount() { + return count; + } + + public void setCount(int count) { + this.count = count; + } + + public String getReceiver() { + return receiver; + } + + public void setReceiver(String receiver) { + this.receiver = receiver; + } + + public int getReceiverCount() { + return receiver_count; + } + + public void setReceiver_count(int receiver_count) { + this.receiver_count = receiver_count; + } + + public String getReason() { + return reason; + } + + public void setReason(String reason) { + this.reason = reason; + } + + public List getCalls() { + return calls; + } + + public void setCalls(List calls) { + this.calls = calls; + } + + public static boolean isCompat() { + return compat; + } + + public static void setCompat(boolean aCompat) { + compat = aCompat; + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Compilation.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Compilation.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,236 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.PrintStream; +import java.util.ArrayList; + +public class Compilation implements LogEvent { + + private int id; + private boolean osr; + private Method method; + private CallSite call = new CallSite(); + private int osrBci; + private String icount; + private String bcount; + private String special; + private double start; + private double end; + private int attempts; + private NMethod nmethod; + private ArrayList phases = new ArrayList(4); + private String failureReason; + + Compilation(int id) { + this.id = id; + } + + Phase getPhase(String s) { + for (Phase p : getPhases()) { + if (p.getName().equals(s)) { + return p; + } + } + return null; + } + + double getRegallocTime() { + return getPhase("regalloc").getElapsedTime(); + } + + public double getStart() { + return start; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getId()); + sb.append(" "); + sb.append(getMethod()); + sb.append(" "); + sb.append(getIcount()); + sb.append("+"); + sb.append(getBcount()); + sb.append("\n"); + for (CallSite site : getCall().getCalls()) { + sb.append(site); + sb.append("\n"); + } + return sb.toString(); + } + + public void printShort(PrintStream stream) { + if (getMethod() == null) { + stream.println(getSpecial()); + } else { + int bc = isOsr() ? getOsr_bci() : -1; + stream.print(getId() + getMethod().decodeFlags(bc) + getMethod().format(bc)); + } + } + + public void print(PrintStream stream) { + print(stream, 0, false); + } + + public void print(PrintStream stream, boolean printInlining) { + print(stream, 0, printInlining); + } + + public void print(PrintStream stream, int indent, boolean printInlining) { + if (getMethod() == null) { + stream.println(getSpecial()); + } else { + int bc = isOsr() ? getOsr_bci() : -1; + stream.print(getId() + getMethod().decodeFlags(bc) + getMethod().format(bc)); + stream.println(); + if (getFailureReason() != null) { + stream.println("COMPILE FAILED " + getFailureReason()); + } + if (printInlining && call.getCalls() != null) { + for (CallSite site : call.getCalls()) { + site.print(stream, indent + 2); + } + } + } + } + + public int getId() { + return id; + } + + public void setId(int id) { + this.id = id; + } + + public boolean isOsr() { + return osr; + } + + public void setOsr(boolean osr) { + this.osr = osr; + } + + public int getOsr_bci() { + return osrBci; + } + + public void setOsr_bci(int osrBci) { + this.osrBci = osrBci; + } + + public String getIcount() { + return icount; + } + + public void setICount(String icount) { + this.icount = icount; + } + + public String getBcount() { + return bcount; + } + + public void setBCount(String bcount) { + this.bcount = bcount; + } + + public String getSpecial() { + return special; + } + + public void setSpecial(String special) { + this.special = special; + } + + public void setStart(double start) { + this.start = start; + } + + public double getEnd() { + return end; + } + + public void setEnd(double end) { + this.end = end; + } + + public int getAttempts() { + return attempts; + } + + public void setAttempts(int attempts) { + this.attempts = attempts; + } + + public NMethod getNMethod() { + return nmethod; + } + + public void setNMethod(NMethod NMethod) { + this.nmethod = NMethod; + } + + public ArrayList getPhases() { + return phases; + } + + public void setPhases(ArrayList phases) { + this.setPhases(phases); + } + + public String getFailureReason() { + return failureReason; + } + + public void setFailureReason(String failureReason) { + this.failureReason = failureReason; + } + + public Method getMethod() { + return method; + } + + public void setMethod(Method method) { + this.method = method; + } + + public CallSite getCall() { + return call; + } + + public void setCall(CallSite call) { + this.call = call; + } + + public double getElapsedTime() { + return end - start; + } + + public Compilation getCompilation() { + return this; + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Constants.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Constants.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,46 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +interface Constants { + static final int JVM_ACC_PUBLIC = 0x0001; /* visible to everyone */ + static final int JVM_ACC_PRIVATE = 0x0002; /* visible only to the defining class */ + static final int JVM_ACC_PROTECTED = 0x0004; /* visible to subclasses */ + static final int JVM_ACC_STATIC = 0x0008; /* instance variable is static */ + static final int JVM_ACC_FINAL = 0x0010; /* no further subclassing, overriding */ + static final int JVM_ACC_SYNCHRONIZED = 0x0020; /* wrap method call in monitor lock */ + static final int JVM_ACC_SUPER = 0x0020; /* funky handling of invokespecial */ + static final int JVM_ACC_VOLATILE = 0x0040; /* can not cache in registers */ + static final int JVM_ACC_BRIDGE = 0x0040; /* bridge method generated by compiler */ + static final int JVM_ACC_TRANSIENT = 0x0080; /* not persistent */ + static final int JVM_ACC_VARARGS = 0x0080; /* method declared with variable number of args */ + static final int JVM_ACC_NATIVE = 0x0100; /* implemented in C */ + static final int JVM_ACC_INTERFACE = 0x0200; /* class is an interface */ + static final int JVM_ACC_ABSTRACT = 0x0400; /* no definition provided */ + static final int JVM_ACC_STRICT = 0x0800; /* strict floating point */ + static final int JVM_ACC_SYNTHETIC = 0x1000; /* compiler-generated class, method or field */ + static final int JVM_ACC_ANNOTATION = 0x2000; /* annotation type */ + static final int JVM_ACC_ENUM = 0x4000; /* field is declared as element of enum */ +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCleanupReader.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCleanupReader.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,212 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.*; +import java.util.regex.*; + +/** + * This class is a filter class to deal with malformed XML that used + * to be produced by the JVM when generating LogCompilation. In 1.6 + * and later releases it shouldn't be required. + * @author never + */ + +class LogCleanupReader extends Reader { + private Reader reader; + + private char[] buffer = new char[4096]; + + private int bufferCount; + + private int bufferOffset; + + private char[] line = new char[1024]; + + private int index; + + private int length; + + private char[] one = new char[1]; + + LogCleanupReader(Reader r) { + reader = r; + } + + static final private Matcher pattern = Pattern.compile(".+ compile_id='[0-9]+'.*( compile_id='[0-9]+)").matcher(""); + static final private Matcher pattern2 = Pattern.compile("' (C[12]) compile_id=").matcher(""); + static final private Matcher pattern3 = Pattern.compile("'(destroy_vm)/").matcher(""); + + private void fill() throws IOException { + rawFill(); + if (length != -1) { + boolean changed = false; + String s = new String(line, 0, length); + String orig = s; + + pattern2.reset(s); + if (pattern2.find()) { + s = s.substring(0, pattern2.start(1)) + s.substring(pattern2.end(1) + 1); + changed = true; + } + + pattern.reset(s); + if (pattern.lookingAt()) { + s = s.substring(0, pattern.start(1)) + s.substring(pattern.end(1) + 1); + changed = true; + } + + pattern3.reset(s); + if (pattern3.find()) { + s = s.substring(0, pattern3.start(1)) + s.substring(pattern3.end(1)); + changed = true; + } + + if (changed) { + s.getChars(0, s.length(), line, 0); + length = s.length(); + } + } + } + + private void rawFill() throws IOException { + if (bufferCount == -1) { + length = -1; + return; + } + + int i = 0; + boolean fillNonEOL = true; + outer: + while (true) { + if (fillNonEOL) { + int p; + for (p = bufferOffset; p < bufferCount; p++) { + char c = buffer[p]; + if (c == '\r' || c == '\n') { + bufferOffset = p; + fillNonEOL = false; + continue outer; + } + if (i >= line.length) { + // copy and enlarge the line array + char[] newLine = new char[line.length * 2]; + System.arraycopy(line, 0, newLine, 0, line.length); + line = newLine; + } + line[i++] = c; + } + bufferOffset = p; + } else { + int p; + for (p = bufferOffset; p < bufferCount; p++) { + char c = buffer[p]; + if (c != '\r' && c != '\n') { + bufferOffset = p; + length = i; + index = 0; + return; + } + line[i++] = c; + } + bufferOffset = p; + } + if (bufferCount == -1) { + if (i == 0) { + length = -1; + } else { + length = i; + } + index = 0; + return; + } + if (bufferOffset != bufferCount) { + System.out.println(bufferOffset); + System.out.println(bufferCount); + throw new InternalError("how did we get here"); + } + // load more data and try again. + bufferCount = reader.read(buffer, 0, buffer.length); + bufferOffset = 0; + } + } + + public int read() throws java.io.IOException { + read(one, 0, 1); + return one[0]; + } + + public int read(char[] buffer) throws java.io.IOException { + return read(buffer, 0, buffer.length); + } + + public int read(char[] b, int off, int len) throws java.io.IOException { + if (length == -1) { + return -1; + } + + if (index == length) { + fill(); + if (length == -1) { + return -1; + } + } + int n = Math.min(length - index, Math.min(b.length - off, len)); + // System.out.printf("%d %d %d %d %d\n", index, length, off, len, n); + System.arraycopy(line, index, b, off, n); + index += n; + return n; + } + + public long skip(long n) throws java.io.IOException { + long result = n; + while (n-- > 0) read(); + return result; + } + + public boolean ready() throws java.io.IOException { + return reader.ready() || (line != null && length > 0); + } + + public boolean markSupported() { + return false; + } + + public void mark(int unused) throws java.io.IOException { + throw new UnsupportedOperationException("mark not supported"); + } + + public void reset() throws java.io.IOException { + reader.reset(); + line = null; + index = 0; + } + + public void close() throws java.io.IOException { + reader.close(); + line = null; + index = 0; + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,177 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +/** + * The main command line driver of a parser for LogCompilation output. + * @author never + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.PrintStream; +import java.util.*; +import org.xml.sax.*; +import org.xml.sax.helpers.*; + +public class LogCompilation extends DefaultHandler implements ErrorHandler, Constants { + + public static void usage(int exitcode) { + System.out.println("Usage: LogCompilation [ -v ] [ -c ] [ -s ] [ -e | -N ] file1 ..."); + System.out.println(" -c: clean up malformed 1.5 xml"); + System.out.println(" -i: print inlining decisions"); + System.out.println(" -S: print compilation statistics"); + System.out.println(" -s: sort events by start time"); + System.out.println(" -e: sort events by elapsed time"); + System.out.println(" -N: sort events by name and start"); + System.exit(exitcode); + } + + public static void main(String[] args) throws Exception { + Comparator defaultSort = LogParser.sortByStart; + boolean statistics = false; + boolean printInlining = false; + boolean cleanup = false; + int index = 0; + + while (args.length > index) { + if (args[index].equals("-e")) { + defaultSort = LogParser.sortByElapsed; + index++; + } else if (args[index].equals("-n")) { + defaultSort = LogParser.sortByNameAndStart; + index++; + } else if (args[index].equals("-s")) { + defaultSort = LogParser.sortByStart; + index++; + } else if (args[index].equals("-c")) { + cleanup = true; + index++; + } else if (args[index].equals("-S")) { + statistics = true; + index++; + } else if (args[index].equals("-h")) { + usage(0); + } else if (args[index].equals("-i")) { + printInlining = true; + index++; + } else { + break; + } + } + + if (index >= args.length) { + usage(1); + } + + while (index < args.length) { + ArrayList events = LogParser.parse(args[index], cleanup); + + if (statistics) { + printStatistics(events, System.out); + } else { + Collections.sort(events, defaultSort); + for (LogEvent c : events) { + if (printInlining && c instanceof Compilation) { + Compilation comp = (Compilation)c; + comp.print(System.out, true); + } else { + c.print(System.out); + } + } + } + index++; + } + } + + public static void printStatistics(ArrayList events, PrintStream out) { + long cacheSize = 0; + long maxCacheSize = 0; + int nmethodsCreated = 0; + int nmethodsLive = 0; + int[] attempts = new int[32]; + double regallocTime = 0; + int maxattempts = 0; + + LinkedHashMap phaseTime = new LinkedHashMap(7); + LinkedHashMap phaseNodes = new LinkedHashMap(7); + double elapsed = 0; + + for (LogEvent e : events) { + if (e instanceof Compilation) { + Compilation c = (Compilation) e; + c.printShort(out); + out.printf(" %6.4f\n", c.getElapsedTime()); + attempts[c.getAttempts()]++; + maxattempts = Math.max(maxattempts,c.getAttempts()); + elapsed += c.getElapsedTime(); + for (Phase phase : c.getPhases()) { + out.printf("\t%s %6.4f\n", phase.getName(), phase.getElapsedTime()); + Double v = phaseTime.get(phase.getName()); + if (v == null) { + v = Double.valueOf(0.0); + } + phaseTime.put(phase.getName(), Double.valueOf(v.doubleValue() + phase.getElapsedTime())); + + Integer v2 = phaseNodes.get(phase.getName()); + if (v2 == null) { + v2 = Integer.valueOf(0); + } + phaseNodes.put(phase.getName(), Integer.valueOf(v2.intValue() + phase.getNodes())); + } + } else if (e instanceof MakeNotEntrantEvent) { + MakeNotEntrantEvent mne = (MakeNotEntrantEvent) e; + NMethod nm = mne.getNMethod(); + if (mne.isZombie()) { + if (nm == null) { + System.err.println(mne.getId()); + } + cacheSize -= nm.getSize(); + nmethodsLive--; + } + } else if (e instanceof NMethod) { + nmethodsLive++; + nmethodsCreated++; + NMethod nm = (NMethod) e; + cacheSize += nm.getSize(); + maxCacheSize = Math.max(cacheSize, maxCacheSize); + } + } + out.printf("NMethods: %d created %d live %d bytes (%d peak) in the code cache\n", + nmethodsCreated, nmethodsLive, cacheSize, maxCacheSize); + out.println("Phase times:"); + for (String name : phaseTime.keySet()) { + Double v = phaseTime.get(name); + Integer v2 = phaseNodes.get(name); + out.printf("%20s %6.4f %d\n", name, v.doubleValue(), v2.intValue()); + } + out.printf("%20s %6.4f\n", "total", elapsed); + + if (maxattempts > 0) { + out.println("Distribution of regalloc passes:"); + for (int i = 0; i <= maxattempts; i++) { + out.printf("%2d %8d\n", i, attempts[i]); + } + } + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogEvent.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogEvent.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,38 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.PrintStream; +import java.util.*; + +public interface LogEvent { + public double getStart(); + + public double getElapsedTime(); + + public Compilation getCompilation(); + + public void print(PrintStream stream); +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,430 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +/** + * A SAX based parser of LogCompilation output from HotSpot. It takes a complete + * @author never + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.FileReader; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Stack; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import org.xml.sax.Attributes; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.helpers.DefaultHandler; + +public class LogParser extends DefaultHandler implements ErrorHandler, Constants { + + static final HashMap typeMap; + static { + typeMap = new HashMap(); + typeMap.put("[I", "int[]"); + typeMap.put("[C", "char[]"); + typeMap.put("[Z", "boolean[]"); + typeMap.put("[L", "Object[]"); + typeMap.put("[B", "byte[]"); + } + + static Comparator sortByStart = new Comparator() { + + public int compare(LogEvent a, LogEvent b) { + double difference = (a.getStart() - b.getStart()); + if (difference < 0) { + return -1; + } + if (difference > 0) { + return 1; + } + return 0; + } + + @Override + public boolean equals(Object other) { + return false; + } + + @Override + public int hashCode() { + return 7; + } + }; + static Comparator sortByNameAndStart = new Comparator() { + + public int compare(LogEvent a, LogEvent b) { + Compilation c1 = a.getCompilation(); + Compilation c2 = b.getCompilation(); + if (c1 != null && c2 != null) { + int result = c1.getMethod().toString().compareTo(c2.getMethod().toString()); + if (result != 0) { + return result; + } + } + double difference = (a.getStart() - b.getStart()); + if (difference < 0) { + return -1; + } + if (difference > 0) { + return 1; + } + return 0; + } + + public boolean equals(Object other) { + return false; + } + + @Override + public int hashCode() { + return 7; + } + }; + static Comparator sortByElapsed = new Comparator() { + + public int compare(LogEvent a, LogEvent b) { + double difference = (a.getElapsedTime() - b.getElapsedTime()); + if (difference < 0) { + return -1; + } + if (difference > 0) { + return 1; + } + return 0; + } + + @Override + public boolean equals(Object other) { + return false; + } + + @Override + public int hashCode() { + return 7; + } + }; + + private ArrayList events = new ArrayList(); + + private HashMap types = new HashMap(); + private HashMap methods = new HashMap(); + private LinkedHashMap nmethods = new LinkedHashMap(); + private HashMap compiles = new HashMap(); + private String failureReason; + private int bci; + private Stack scopes = new Stack(); + private Compilation compile; + private CallSite site; + private Stack phaseStack = new Stack(); + private UncommonTrapEvent currentTrap; + + long parseLong(String l) { + try { + return Long.decode(l).longValue(); + } catch (NumberFormatException nfe) { + int split = l.length() - 8; + String s1 = "0x" + l.substring(split); + String s2 = l.substring(0, split); + long v1 = Long.decode(s1).longValue() & 0xffffffffL; + long v2 = (Long.decode(s2).longValue() & 0xffffffffL) << 32; + if (!l.equals("0x" + Long.toHexString(v1 + v2))) { + System.out.println(l); + System.out.println(s1); + System.out.println(s2); + System.out.println(v1); + System.out.println(v2); + System.out.println(Long.toHexString(v1 + v2)); + throw new InternalError("bad conversion"); + } + return v1 + v2; + } + } + + public static ArrayList parse(String file, boolean cleanup) throws Exception { + return parse(new FileReader(file), cleanup); + } + + public static ArrayList parse(Reader reader, boolean cleanup) throws Exception { + // Create the XML input factory + SAXParserFactory factory = SAXParserFactory.newInstance(); + + // Create the XML LogEvent reader + SAXParser p = factory.newSAXParser(); + + if (cleanup) { + // some versions of the log have slightly malformed XML, so clean it + // up before passing it to SAX + reader = new LogCleanupReader(reader); + } + + LogParser log = new LogParser(); + p.parse(new InputSource(reader), log); + + // Associate compilations with their NMethods + for (NMethod nm : log.nmethods.values()) { + Compilation c = log.compiles.get(nm.getId()); + nm.setCompilation(c); + // Native wrappers for methods don't have a compilation + if (c != null) { + c.setNMethod(nm); + } + } + + // Initially we want the LogEvent log sorted by timestamp + Collections.sort(log.events, sortByStart); + + return log.events; + } + + String search(Attributes attr, String name) { + return search(attr, name, null); + } + + String search(Attributes attr, String name, String defaultValue) { + String result = attr.getValue(name); + if (result != null) { + return result; + } + if (defaultValue != null) { + return defaultValue; + } + for (int i = 0; i < attr.getLength(); i++) { + System.out.println(attr.getQName(i) + " " + attr.getValue(attr.getQName(i))); + } + throw new InternalError("can't find " + name); + } + int indent = 0; + String compile_id; + + String type(String id) { + String result = types.get(id); + if (result == null) { + throw new InternalError(id); + } + String remapped = typeMap.get(result); + if (remapped != null) { + return remapped; + } + return result; + } + + void type(String id, String name) { + assert type(id) == null; + types.put(id, name); + } + + Method method(String id) { + Method result = methods.get(id); + if (result == null) { + throw new InternalError(id); + } + return result; + } + + public String makeId(Attributes atts) { + String id = atts.getValue("compile_id"); + String kind = atts.getValue("kind"); + if (kind != null && kind.equals("osr")) { + id += "%"; + } + return id; + } + + @Override + public void startElement(String uri, + String localName, + String qname, + Attributes atts) { + if (qname.equals("phase")) { + Phase p = new Phase(search(atts, "name"), + Double.parseDouble(search(atts, "stamp")), + Integer.parseInt(search(atts, "nodes"))); + phaseStack.push(p); + } else if (qname.equals("phase_done")) { + Phase p = phaseStack.pop(); + p.setEndNodes(Integer.parseInt(search(atts, "nodes"))); + p.setEnd(Double.parseDouble(search(atts, "stamp"))); + compile.getPhases().add(p); + } else if (qname.equals("task")) { + compile = new Compilation(Integer.parseInt(search(atts, "compile_id", "-1"))); + compile.setStart(Double.parseDouble(search(atts, "stamp"))); + compile.setICount(search(atts, "count", "0")); + compile.setBCount(search(atts, "backedge_count", "0")); + + String method = atts.getValue("method"); + int space = method.indexOf(' '); + method = method.substring(0, space) + "::" + + method.substring(space + 1, method.indexOf(' ', space + 1) + 1); + String compiler = atts.getValue("compiler"); + if (compiler == null) { + compiler = ""; + } + String kind = atts.getValue("compile_kind"); + if (kind == null) { + kind = "normal"; + } + if (kind.equals("osr")) { + compile.setOsr(true); + compile.setOsr_bci(Integer.parseInt(search(atts, "osr_bci"))); + } else if (kind.equals("c2i")) { + compile.setSpecial("--- adapter " + method); + } else { + compile.setSpecial(compile.getId() + " " + method + " (0 bytes)"); + } + events.add(compile); + compiles.put(makeId(atts), compile); + } else if (qname.equals("type")) { + type(search(atts, "id"), search(atts, "name")); + } else if (qname.equals("bc")) { + bci = Integer.parseInt(search(atts, "bci")); + } else if (qname.equals("klass")) { + type(search(atts, "id"), search(atts, "name")); + } else if (qname.equals("method")) { + String id = search(atts, "id"); + Method m = new Method(); + m.setHolder(type(search(atts, "holder"))); + m.setName(search(atts, "name")); + m.setReturnType(type(search(atts, "return"))); + m.setArguments(search(atts, "arguments", "void")); + m.setBytes(search(atts, "bytes")); + m.setIICount(search(atts, "iicount")); + m.setFlags(search(atts, "flags")); + methods.put(id, m); + } else if (qname.equals("call")) { + site = new CallSite(bci, method(search(atts, "method"))); + site.setCount(Integer.parseInt(search(atts, "count"))); + String receiver = atts.getValue("receiver"); + if (receiver != null) { + site.setReceiver(type(receiver)); + site.setReceiver_count(Integer.parseInt(search(atts, "receiver_count"))); + } + scopes.peek().add(site); + } else if (qname.equals("regalloc")) { + compile.setAttempts(Integer.parseInt(search(atts, "attempts"))); + } else if (qname.equals("inline_fail")) { + scopes.peek().last().setReason(search(atts, "reason")); + } else if (qname.equals("failure")) { + failureReason = search(atts, "reason"); + } else if (qname.equals("task_done")) { + compile.setEnd(Double.parseDouble(search(atts, "stamp"))); + if (Integer.parseInt(search(atts, "success")) == 0) { + compile.setFailureReason(failureReason); + } + } else if (qname.equals("make_not_entrant")) { + String id = makeId(atts); + NMethod nm = nmethods.get(id); + if (nm == null) throw new InternalError(); + LogEvent e = new MakeNotEntrantEvent(Double.parseDouble(search(atts, "stamp")), id, + atts.getValue("zombie") != null, nm); + events.add(e); + } else if (qname.equals("uncommon_trap")) { + String id = atts.getValue("compile_id"); + if (id != null) { + id = makeId(atts); + currentTrap = new UncommonTrapEvent(Double.parseDouble(search(atts, "stamp")), + id, + atts.getValue("reason"), + atts.getValue("action"), + Integer.parseInt(search(atts, "count", "0"))); + events.add(currentTrap); + } else { + // uncommon trap inserted during parsing. + // ignore for now + } + } else if (qname.equals("jvms")) { + // + if (currentTrap != null) { + currentTrap.addJVMS(atts.getValue("method"), Integer.parseInt(atts.getValue("bci"))); + } else { + System.err.println("Missing uncommon_trap for jvms"); + } + } else if (qname.equals("nmethod")) { + String id = makeId(atts); + NMethod nm = new NMethod(Double.parseDouble(search(atts, "stamp")), + id, + parseLong(atts.getValue("address")), + parseLong(atts.getValue("size"))); + nmethods.put(id, nm); + events.add(nm); + } else if (qname.equals("parse")) { + Method m = method(search(atts, "method")); + if (scopes.size() == 0) { + compile.setMethod(m); + scopes.push(compile.getCall()); + } else { + if (site.getMethod() == m) { + scopes.push(site); + } else if (scopes.peek().getCalls().size() > 2 && m == scopes.peek().last(-2).getMethod()) { + scopes.push(scopes.peek().last(-2)); + } else { + System.out.println(site.getMethod()); + System.out.println(m); + throw new InternalError("call site and parse don't match"); + } + } + } + } + + @Override + public void endElement(String uri, + String localName, + String qname) { + if (qname.equals("parse")) { + indent -= 2; + scopes.pop(); + } else if (qname.equals("uncommon_trap")) { + currentTrap = null; + } else if (qname.equals("task")) { + types.clear(); + methods.clear(); + site = null; + } + } + + @Override + public void warning(org.xml.sax.SAXParseException e) { + System.err.println(e.getMessage() + " at line " + e.getLineNumber() + ", column " + e.getColumnNumber()); + e.printStackTrace(); + } + + @Override + public void error(org.xml.sax.SAXParseException e) { + System.err.println(e.getMessage() + " at line " + e.getLineNumber() + ", column " + e.getColumnNumber()); + e.printStackTrace(); + } + + @Override + public void fatalError(org.xml.sax.SAXParseException e) { + System.err.println(e.getMessage() + " at line " + e.getLineNumber() + ", column " + e.getColumnNumber()); + e.printStackTrace(); + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/MakeNotEntrantEvent.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/MakeNotEntrantEvent.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,55 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.PrintStream; + +class MakeNotEntrantEvent extends BasicLogEvent { + private final boolean zombie; + + private NMethod nmethod; + + MakeNotEntrantEvent(double s, String i, boolean z, NMethod nm) { + super(s, i); + zombie = z; + nmethod = nm; + } + + public NMethod getNMethod() { + return nmethod; + } + + public void print(PrintStream stream) { + if (isZombie()) { + stream.printf("%s make_zombie\n", getId()); + } else { + stream.printf("%s make_not_entrant\n", getId()); + } + } + + public boolean isZombie() { + return zombie; + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Method.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Method.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,120 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.util.Arrays; + +public class Method implements Constants { + + private String holder; + private String name; + private String returnType; + private String arguments; + private String bytes; + private String iicount; + private String flags; + + String decodeFlags(int osr_bci) { + int f = Integer.parseInt(getFlags()); + char[] c = new char[4]; + Arrays.fill(c, ' '); + if (osr_bci >= 0) { + c[0] = '%'; + } + if ((f & JVM_ACC_SYNCHRONIZED) != 0) { + c[1] = 's'; + } + return new String(c); + } + + String format(int osr_bci) { + if (osr_bci >= 0) { + return getHolder().replace('/', '.') + "::" + getName() + " @ " + osr_bci + " (" + getBytes() + " bytes)"; + } else { + return getHolder().replace('/', '.') + "::" + getName() + " (" + getBytes() + " bytes)"; + } + } + + @Override + public String toString() { + return getHolder().replace('/', '.') + "::" + getName() + " (" + getBytes() + " bytes)"; + } + + public String getHolder() { + return holder; + } + + public void setHolder(String holder) { + this.holder = holder; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getReturnType() { + return returnType; + } + + public void setReturnType(String returnType) { + this.returnType = returnType; + } + + public String getArguments() { + return arguments; + } + + public void setArguments(String arguments) { + this.arguments = arguments; + } + + public String getBytes() { + return bytes; + } + + public void setBytes(String bytes) { + this.bytes = bytes; + } + + public String getIICount() { + return iicount; + } + + public void setIICount(String iicount) { + this.iicount = iicount; + } + + public String getFlags() { + return flags; + } + + public void setFlags(String flags) { + this.flags = flags; + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/NMethod.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/NMethod.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,60 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.PrintStream; + +public class NMethod extends BasicLogEvent { + + private long address; + private long size; + + NMethod(double s, String i, long a, long sz) { + super(s, i); + address = a; + size = sz; + } + + public void print(PrintStream out) { + // XXX Currently we do nothing + // throw new InternalError(); + } + + public long getAddress() { + return address; + } + + public void setAddress(long address) { + this.address = address; + } + + public long getSize() { + return size; + } + + public void setSize(long size) { + this.size = size; + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,63 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.PrintStream; + +public class Phase extends BasicLogEvent { + + private final int startNodes; + private int endNodes; + + Phase(String n, double s, int nodes) { + super(s, n); + startNodes = nodes; + } + + int getNodes() { + return getStartNodes(); + } + + void setEndNodes(int n) { + endNodes = n; + } + + public String getName() { + return getId(); + } + + public int getStartNodes() { + return startNodes; + } + + public int getEndNodes() { + return endNodes; + } + + @Override + public void print(PrintStream stream) { + throw new UnsupportedOperationException("Not supported yet."); + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/UncommonTrapEvent.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/UncommonTrapEvent.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,84 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +package com.sun.hotspot.tools.compiler; + +import java.io.PrintStream; + +class UncommonTrapEvent extends BasicLogEvent { + + private final String reason; + private final String action; + private int count; + private String jvms = ""; + + UncommonTrapEvent(double s, String i, String r, String a, int c) { + super(s, i); + reason = r; + action = a; + count = c; + } + + + public void addJVMS(String method, int bci) { + setJvms(getJvms() + " @" + bci + " " + method + "\n"); + } + + public void updateCount(UncommonTrapEvent trap) { + setCount(Math.max(getCount(), trap.getCount())); + } + + public void print(PrintStream stream) { + stream.printf("%s uncommon trap %s %s\n", getId(), getReason(), getAction()); + stream.print(getJvms()); + } + + public String getReason() { + return reason; + } + + public String getAction() { + return action; + } + + public int getCount() { + return count; + } + + public void setCount(int count) { + this.count = count; + } + + public String getJvms() { + return jvms; + } + + public void setJvms(String jvms) { + this.jvms = jvms; + } + + public void setCompilation(Compilation compilation) { + this.compilation = compilation; + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/MakeDeps/BuildConfig.java --- a/src/share/tools/MakeDeps/BuildConfig.java Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/tools/MakeDeps/BuildConfig.java Thu Mar 12 18:16:36 2009 -0700 @@ -247,6 +247,7 @@ sysDefines.add("HOTSPOT_BUILD_USER="+System.getProperty("user.name")); sysDefines.add("HOTSPOT_BUILD_TARGET=\\\""+get("Build")+"\\\""); sysDefines.add("_JNI_IMPLEMENTATION_"); + sysDefines.add("HOTSPOT_LIB_ARCH=\\\"i486\\\""); sysDefines.addAll(defines); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/tools/MakeDeps/Database.java --- a/src/share/tools/MakeDeps/Database.java Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/tools/MakeDeps/Database.java Thu Mar 12 18:16:36 2009 -0700 @@ -365,7 +365,7 @@ // HACK ALERT. The compilation of ad_ files is very slow. // We want to start compiling them as early as possible. The compilation - // order on unix is dependant on the order we emit files here. + // order on unix is dependent on the order we emit files here. // By sorting the output before emitting it, we expect // that ad_ will be compiled early. boolean shouldSortObjFiles = true; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/Doc/Syntax.doc --- a/src/share/vm/adlc/Doc/Syntax.doc Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/Doc/Syntax.doc Thu Mar 12 18:16:36 2009 -0700 @@ -88,7 +88,7 @@ // these are used for constraints, etc. alloc_class class1(AX, BX); // form an allocation class of registers - // used by the register allocator for seperate + // used by the register allocator for separate // allocation of target register classes 3. Pipeline Syntax for Scheduling @@ -150,7 +150,7 @@ b. %} (block terminator) c. EOF (file terminator) - 4. Each statement must start on a seperate line + 4. Each statement must start on a separate line 5. Identifiers cannot contain: (){}%;,"/\ diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/adlc.hpp --- a/src/share/vm/adlc/adlc.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/adlc.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -79,6 +79,7 @@ // Macros // Debugging note: Put a breakpoint on "abort". +#undef assert #define assert(cond, msg) { if (!(cond)) { fprintf(stderr, "assert fails %s %d: %s\n", __FILE__, __LINE__, msg); abort(); }} #define max(a, b) (((a)>(b)) ? (a) : (b)) diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/adlparse.cpp --- a/src/share/vm/adlc/adlparse.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/adlparse.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -298,7 +298,7 @@ rule->count_commutative_op(count); if (count > 0) { // Clone match rule and swap commutative operation's operands. - rule->swap_commutative_op(instr_ident, count, match_rules_cnt); + rule->matchrule_swap_commutative_op(instr_ident, count, match_rules_cnt); } } @@ -2586,7 +2586,7 @@ while( _curchar != ')' ) { // Get information on the left instruction and its operand // left-instructions's number - intptr_t left_inst = get_int(); + int left_inst = get_int(); // Left-instruction's operand skipws(); if( _curchar != '.' ) { @@ -2602,7 +2602,7 @@ skipws(); // Get information on the right instruction and its operand - intptr_t right_inst; // Right-instructions's number + int right_inst; // Right-instructions's number if( isdigit(_curchar) ) { right_inst = get_int(); // Right-instruction's operand @@ -3497,22 +3497,24 @@ // (1) // Check if there is a string to pass through to output - char *start = _ptr; // Record start of the next string - while ((_curchar != '$') && ((_curchar != '%') || (*(_ptr+1) != '}')) ) { - // If at the start of a comment, skip past it - if( (_curchar == '/') && ((*(_ptr+1) == '/') || (*(_ptr+1) == '*')) ) { - skipws_no_preproc(); - } else { - // ELSE advance to the next character, or start of the next line - next_char_or_line(); + { + char *start = _ptr; // Record start of the next string + while ((_curchar != '$') && ((_curchar != '%') || (*(_ptr+1) != '}')) ) { + // If at the start of a comment, skip past it + if( (_curchar == '/') && ((*(_ptr+1) == '/') || (*(_ptr+1) == '*')) ) { + skipws_no_preproc(); + } else { + // ELSE advance to the next character, or start of the next line + next_char_or_line(); + } } - } - // If a string was found, terminate it and record in EncClass - if ( start != _ptr ) { - *_ptr = '\0'; // Terminate the string - // Add flag to _strings list indicating we should check _rep_vars - format->_strings.addName(NameList::_signal2); - format->_strings.addName(start); + // If a string was found, terminate it and record in EncClass + if ( start != _ptr ) { + *_ptr = '\0'; // Terminate the string + // Add flag to _strings list indicating we should check _rep_vars + format->_strings.addName(NameList::_signal2); + format->_strings.addName(start); + } } // (2) @@ -3563,10 +3565,10 @@ // copy it and record in FormatRule if ( _curchar == '$' ) { next_char(); // Move past the '$' - char* rep_var = get_ident(); // Nil terminate the variable name - rep_var = strdup(rep_var);// Copy the string + char* next_rep_var = get_ident(); // Nil terminate the variable name + next_rep_var = strdup(next_rep_var);// Copy the string *_ptr = _curchar; // and replace Nil with original character - format->_rep_vars.addName(rep_var); + format->_rep_vars.addName(next_rep_var); // Add flag to _strings list indicating we should check _rep_vars format->_strings.addName(NameList::_signal); } @@ -3714,13 +3716,13 @@ parse_err(SYNERR, "identifier expected at %c\n", _curchar); continue; } // Check that you have a valid operand - const Form *form = instr->_localNames[ident2]; - if (!form) { + const Form *form2 = instr->_localNames[ident2]; + if (!form2) { parse_err(SYNERR, "operand name expected at %s\n", ident2); continue; } - oper = form->is_operand(); - if (oper == NULL && !form->is_opclass()) { + oper = form2->is_operand(); + if (oper == NULL && !form2->is_opclass()) { parse_err(SYNERR, "operand name expected at %s\n", ident2); continue; } // Add operand to list @@ -4271,7 +4273,7 @@ int result; // Storage for integer result if( _curline == NULL ) // Return NULL at EOF. - return NULL; + return 0; skipws(); // Skip whitespace before identifier start = end = _ptr; // Start points at first character @@ -4553,7 +4555,7 @@ //---------------------------ensure_start_of_line------------------------------ // A preprocessor directive has been encountered. Be sure it has fallen at -// the begining of a line, or else report an error. +// the beginning of a line, or else report an error. void ADLParser::ensure_start_of_line(void) { if (_curchar == '\n') { next_line(); return; } assert( _ptr >= _curline && _ptr < _curline+strlen(_curline), diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/archDesc.cpp --- a/src/share/vm/adlc/archDesc.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/archDesc.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -436,10 +436,12 @@ if ((oper->_matrule) && (oper->_matrule->_lChild == NULL) && (oper->_matrule->_rChild == NULL)) { - const Form *form = _globalNames[oper->_matrule->_opType]; - if ((form) && form->is_operand() && - (form->ideal_only() == false)) { - add_chain_rule_entry(oper->_matrule->_opType, oper->cost(), oper->_ident); + { + const Form *form = _globalNames[oper->_matrule->_opType]; + if ((form) && form->is_operand() && + (form->ideal_only() == false)) { + add_chain_rule_entry(oper->_matrule->_opType, oper->cost(), oper->_ident); + } } // Check for additional chain rules if (oper->_matrule->_next) { @@ -1015,12 +1017,12 @@ int idealIndex = 0; for (idealIndex = 1; idealIndex < _last_machine_leaf; ++idealIndex) { const char *idealName = NodeClassNames[idealIndex]; - _idealIndex.Insert((void*)idealName, (void*)idealIndex); + _idealIndex.Insert((void*) idealName, (void*) (intptr_t) idealIndex); } for ( idealIndex = _last_machine_leaf+1; idealIndex < _last_opcode; ++idealIndex) { const char *idealName = NodeClassNames[idealIndex]; - _idealIndex.Insert((void*)idealName, (void*)idealIndex); + _idealIndex.Insert((void*) idealName, (void*) (intptr_t) idealIndex); } } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/dfa.cpp --- a/src/share/vm/adlc/dfa.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/dfa.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -870,7 +870,7 @@ } // Print out the dictionary contents as key-value pairs -static void dumpekey(const void* key) { fprintf(stdout, "%s", key); } +static void dumpekey(const void* key) { fprintf(stdout, "%s", (char*) key); } static void dumpexpr(const void* expr) { fflush(stdout); ((Expr*)expr)->print(); } void ExprDict::dump() { @@ -1020,7 +1020,7 @@ } // Print out the dictionary contents as key-value pairs -static void print_key (const void* key) { fprintf(stdout, "%s", key); } +static void print_key (const void* key) { fprintf(stdout, "%s", (char*) key); } static void print_production(const void* production) { fflush(stdout); ((Production*)production)->print(); } void ProductionState::print() { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/dict2.cpp --- a/src/share/vm/adlc/dict2.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/dict2.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -275,7 +275,7 @@ // Convert string to hash key. This algorithm implements a universal hash // function with the multipliers frozen (ok, so it's not universal). The // multipliers (and allowable characters) are all odd, so the resultant sum -// is odd - guarenteed not divisible by any power of two, so the hash tables +// is odd - guaranteed not divisible by any power of two, so the hash tables // can be any power of two with good results. Also, I choose multipliers // that have only 2 bits set (the low is always set to be odd) so // multiplication requires only shifts and adds. Characters are required to @@ -296,7 +296,7 @@ } //------------------------------hashptr-------------------------------------- -// Slimey cheap hash function; no guarenteed performance. Better than the +// Slimey cheap hash function; no guaranteed performance. Better than the // default for pointers, especially on MS-DOS machines. int hashptr(const void *key) { #ifdef __TURBOC__ @@ -306,7 +306,7 @@ #endif } -// Slimey cheap hash function; no guarenteed performance. +// Slimey cheap hash function; no guaranteed performance. int hashkey(const void *key) { return (int)((intptr_t)key); } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/dict2.hpp --- a/src/share/vm/adlc/dict2.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/dict2.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -89,10 +89,10 @@ // Hashing functions int hashstr(const void *s); // Nice string hash -// Slimey cheap hash function; no guarenteed performance. Better than the +// Slimey cheap hash function; no guaranteed performance. Better than the // default for pointers, especially on MS-DOS machines. int hashptr(const void *key); -// Slimey cheap hash function; no guarenteed performance. +// Slimey cheap hash function; no guaranteed performance. int hashkey(const void *key); // Key comparators diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/filebuff.cpp --- a/src/share/vm/adlc/filebuff.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/filebuff.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,8 @@ // FILEBUFF.CPP - Routines for handling a parser file buffer #include "adlc.hpp" +using namespace std; + //------------------------------FileBuff--------------------------------------- // Create a new parsing buffer FileBuff::FileBuff( BufferedFile *fptr, ArchDesc& archDesc) : _fp(fptr), _AD(archDesc) { @@ -48,10 +50,10 @@ file_error(SEMERR, 0, "Buffer allocation failed\n"); exit(1); // Exit on allocation failure } - *_bigbuf = '\n'; // Lead with a sentinal newline - _buf = _bigbuf+1; // Skip sentinal + *_bigbuf = '\n'; // Lead with a sentinel newline + _buf = _bigbuf+1; // Skip sentinel _bufmax = _buf; // Buffer is empty - _bufeol = _bigbuf; // _bufeol points at sentinal + _bufeol = _bigbuf; // _bufeol points at sentinel _filepos = -1; // filepos is in sync with _bufeol _bufoff = _offset = 0L; // Offset at file start @@ -60,8 +62,8 @@ file_error(SEMERR, 0, "File read error, no input read\n"); exit(1); // Exit on read error } - *_bufmax = '\n'; // End with a sentinal new-line - *(_bufmax+1) = '\0'; // Then end with a sentinal NULL + *_bufmax = '\n'; // End with a sentinel new-line + *(_bufmax+1) = '\0'; // Then end with a sentinel NULL } //------------------------------~FileBuff-------------------------------------- @@ -79,7 +81,7 @@ _linenum++; retval = ++_bufeol; // return character following end of previous line - if (*retval == '\0') return NULL; // Check for EOF sentinal + if (*retval == '\0') return NULL; // Check for EOF sentinel // Search for newline character which must end each line for(_filepos++; *_bufeol != '\n'; _bufeol++) _filepos++; // keep filepos in sync with _bufeol @@ -217,7 +219,7 @@ off = expandtab(os,off,*s++,'-','-'); if( i == len ) os << '^'; // Mark end of region os << '\n'; // End of marked line - return 0L; // All done + return 0; // All done } //------------------------------print------------------------------------------ diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/filebuff.hpp --- a/src/share/vm/adlc/filebuff.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/filebuff.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -26,6 +26,7 @@ #include using namespace std; + // STRUCTURE FOR HANDLING INPUT AND OUTPUT FILES typedef struct { const char *_name; @@ -36,7 +37,7 @@ //------------------------------FileBuff-------------------------------------- // This class defines a nicely behaved buffer of text. Entire file of text -// is read into buffer at creation, with sentinals at start and end. +// is read into buffer at creation, with sentinels at start and end. class FileBuff { friend class FileBuffRegion; private: @@ -45,8 +46,8 @@ long _bufoff; // Start of buffer file offset char *_buf; // The buffer itself. - char *_bigbuf; // The buffer plus sentinals; actual heap area - char *_bufmax; // A pointer to the buffer end sentinal + char *_bigbuf; // The buffer plus sentinels; actual heap area + char *_bufmax; // A pointer to the buffer end sentinel char *_bufeol; // A pointer to the last complete line end int _err; // Error flag for file seek/read operations @@ -72,7 +73,7 @@ // This converts a pointer into the buffer to a file offset. It only works // when the pointer is valid (i.e. just obtained from getline()). - int getoff(const char *s) { return _bufoff+(int)(s-_buf); } + long getoff(const char* s) { return _bufoff + (s - _buf); } }; //------------------------------FileBuffRegion--------------------------------- @@ -95,8 +96,6 @@ FileBuffRegion *copy(); // Deep copy FileBuffRegion *merge(FileBuffRegion*); // Merge 2 regions; delete input -// void print(std::ostream&); -// friend std::ostream& operator<< (std::ostream&, FileBuffRegion&); void print(ostream&); friend ostream& operator<< (ostream&, FileBuffRegion&); }; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/forms.cpp --- a/src/share/vm/adlc/forms.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/forms.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -70,6 +70,7 @@ else return (_iter <_cur-1 ? _names[++_iter] : NULL); } const char *NameList::current() { return (_iter < _cur ? _names[_iter] : NULL); } +const char *NameList::peek(int skip) { return (_iter + skip < _cur ? _names[_iter + skip] : NULL); } // Return 'true' if current entry is signal bool NameList::current_is_signal() { @@ -248,11 +249,13 @@ // True if 'opType', an ideal name, loads or stores. Form::DataType Form::is_load_from_memory(const char *opType) const { if( strcmp(opType,"LoadB")==0 ) return Form::idealB; + if( strcmp(opType,"LoadUB")==0 ) return Form::idealB; if( strcmp(opType,"LoadUS")==0 ) return Form::idealC; if( strcmp(opType,"LoadD")==0 ) return Form::idealD; if( strcmp(opType,"LoadD_unaligned")==0 ) return Form::idealD; if( strcmp(opType,"LoadF")==0 ) return Form::idealF; if( strcmp(opType,"LoadI")==0 ) return Form::idealI; + if( strcmp(opType,"LoadUI2L")==0 ) return Form::idealI; if( strcmp(opType,"LoadKlass")==0 ) return Form::idealP; if( strcmp(opType,"LoadNKlass")==0 ) return Form::idealN; if( strcmp(opType,"LoadL")==0 ) return Form::idealL; @@ -370,7 +373,7 @@ } // Print out the dictionary contents as key-value pairs -static void dumpkey (const void* key) { fprintf(stdout, "%s", key); } +static void dumpkey (const void* key) { fprintf(stdout, "%s", (char*) key); } static void dumpform(const void* form) { fflush(stdout); ((Form*)form)->dump(); } void FormDict::dump() { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/forms.hpp --- a/src/share/vm/adlc/forms.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/forms.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -124,7 +124,7 @@ public: // Public Data Form *_next; // Next pointer for form lists - long _linenum; // Line number for debugging + int _linenum; // Line number for debugging // Dynamic type check for common forms. virtual OpClassForm *is_opclass() const; @@ -342,6 +342,7 @@ void reset(); // Reset iteration const char *iter(); // after reset(), first element : else next const char *current(); // return current element in iteration. + const char *peek(int skip = 1); // returns element + skip in iteration if there is one bool current_is_signal(); // Return 'true' if current entry is signal bool is_signal(const char *entry); // Return true if entry is a signal diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/formsopt.cpp --- a/src/share/vm/adlc/formsopt.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/formsopt.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1998-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1998-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -273,13 +273,13 @@ for(_regDefs.reset(); (name = _regDefs.iter()) != NULL;) { int rnum = ((RegDef*)_regDef[name])->register_num(); if( (rnum >> 5) == wordnum ) - word |= (1L<<(rnum&31)); + word |= (1 << (rnum & 31)); } if( stack_also ) { // Now also collect stack bits for( int i = 0; i < 32; i++ ) if( wordnum*32+i >= RegisterForm::_reg_ctr ) - word |= (1L< _max_position ) _max_position = position; - _parent.addName((char *)parent); - _position.addName((char *)position); + _parent.addName((char*) (intptr_t) parent); + _position.addName((char*) (intptr_t) position); _instrs.addName(name); - _input.addName((char *)input); + _input.addName((char*) (intptr_t) input); } // Access info about instructions in the peep-match rule @@ -603,7 +603,7 @@ return _max_position; } -const char *PeepMatch::instruction_name(intptr_t position) { +const char *PeepMatch::instruction_name(int position) { return _instrs.name(position); } @@ -615,11 +615,11 @@ _input.reset(); } -void PeepMatch::next_instruction( intptr_t &parent, intptr_t &position, const char * &name, intptr_t &input ){ - parent = (intptr_t)_parent.iter(); - position = (intptr_t)_position.iter(); +void PeepMatch::next_instruction(int &parent, int &position, const char* &name, int &input) { + parent = (int) (intptr_t) _parent.iter(); + position = (int) (intptr_t) _position.iter(); name = _instrs.iter(); - input = (intptr_t)_input.iter(); + input = (int) (intptr_t) _input.iter(); } // 'true' if current position in iteration is a placeholder, not matched. @@ -637,15 +637,15 @@ } //------------------------------PeepConstraint--------------------------------- -PeepConstraint::PeepConstraint(intptr_t left_inst, char *left_op, char *relation, - intptr_t right_inst, char *right_op) +PeepConstraint::PeepConstraint(int left_inst, char* left_op, char* relation, + int right_inst, char* right_op) : _left_inst(left_inst), _left_op(left_op), _relation(relation), _right_inst(right_inst), _right_op(right_op), _next(NULL) {} PeepConstraint::~PeepConstraint() { } // Check if constraints use instruction at position -bool PeepConstraint::constrains_instruction(intptr_t position) { +bool PeepConstraint::constrains_instruction(int position) { // Check local instruction constraints if( _left_inst == position ) return true; if( _right_inst == position ) return true; @@ -692,7 +692,7 @@ } void PeepReplace::add_operand( int inst_num, char *inst_operand ) { _instruction.add_signal(); - _operand_inst_num.addName((char*)inst_num); + _operand_inst_num.addName((char*) (intptr_t) inst_num); _operand_op_name.addName(inst_operand); } @@ -702,15 +702,15 @@ _operand_inst_num.reset(); _operand_op_name.reset(); } -void PeepReplace::next_instruction(const char * &inst){ +void PeepReplace::next_instruction(const char* &inst){ inst = _instruction.iter(); - intptr_t inst_num = (intptr_t)_operand_inst_num.iter(); - const char *inst_operand = _operand_op_name.iter(); + int inst_num = (int) (intptr_t) _operand_inst_num.iter(); + const char* inst_operand = _operand_op_name.iter(); } -void PeepReplace::next_operand( intptr_t &inst_num, const char * &inst_operand ) { - const char *inst = _instruction.iter(); - inst_num = (intptr_t)_operand_inst_num.iter(); - inst_operand = _operand_op_name.iter(); +void PeepReplace::next_operand(int &inst_num, const char* &inst_operand) { + const char* inst = _instruction.iter(); + inst_num = (int) (intptr_t) _operand_inst_num.iter(); + inst_operand = _operand_op_name.iter(); } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/formsopt.hpp --- a/src/share/vm/adlc/formsopt.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/formsopt.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1998-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1998-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -457,10 +457,10 @@ // Access info about instructions in the peep-match rule int max_position(); - const char *instruction_name(intptr_t position); + const char *instruction_name(int position); // Iterate through all info on matched instructions void reset(); - void next_instruction( intptr_t &parent, intptr_t &position, const char * &name, intptr_t &input ); + void next_instruction(int &parent, int &position, const char* &name, int &input); // 'true' if current position in iteration is a placeholder, not matched. bool is_placeholder(); @@ -474,20 +474,20 @@ PeepConstraint *_next; // Additional constraints ANDed together public: - const intptr_t _left_inst; - const char *_left_op; - const char *_relation; - const intptr_t _right_inst; - const char *_right_op; + const int _left_inst; + const char* _left_op; + const char* _relation; + const int _right_inst; + const char* _right_op; public: // Public Methods - PeepConstraint(intptr_t left_inst, char *left_op, char *relation, - intptr_t right_inst, char *right_op); + PeepConstraint(int left_inst, char* left_op, char* relation, + int right_inst, char* right_op); ~PeepConstraint(); // Check if constraints use instruction at position - bool constrains_instruction(intptr_t position); + bool constrains_instruction(int position); // Add another constraint void append(PeepConstraint *next_peep_constraint); @@ -519,7 +519,7 @@ // Access contents of peepreplace void reset(); void next_instruction(const char * &root); - void next_operand( intptr_t &inst_num, const char * &inst_operand ); + void next_operand(int &inst_num, const char * &inst_operand ); // Utilities void dump(); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/formssel.cpp --- a/src/share/vm/adlc/formssel.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/formssel.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -844,8 +844,12 @@ for (_parameters.reset(); (name = _parameters.iter()) != NULL;) { OperandForm *opForm = (OperandForm*)_localNames[name]; - const Form *form = _effects[name]; - Effect *e = form ? form->is_effect() : NULL; + Effect* e = NULL; + { + const Form* form = _effects[name]; + e = form ? form->is_effect() : NULL; + } + if (e != NULL) { has_temp |= e->is(Component::TEMP); @@ -858,19 +862,7 @@ OperandForm* kill = (OperandForm*)_localNames[kill_name]; globalAD->syntax_err(_linenum, "%s: %s %s must be at the end of the argument list\n", _ident, kill->_ident, kill_name); - } else if (e->isa(Component::KILL)) { - kill_name = name; - } - - // TEMPs are real uses and need to be among the first parameters - // listed, otherwise the numbering of operands and inputs gets - // screwy, so enforce this restriction during parse. - if (kill_name != NULL && - e->isa(Component::TEMP) && !e->isa(Component::DEF)) { - OperandForm* kill = (OperandForm*)_localNames[kill_name]; - globalAD->syntax_err(_linenum, "%s: %s %s must follow %s %s in the argument list\n", - _ident, kill->_ident, kill_name, opForm->_ident, name); - } else if (e->isa(Component::KILL)) { + } else if (e->isa(Component::KILL) && !e->isa(Component::USE)) { kill_name = name; } } @@ -1122,7 +1114,7 @@ const char *op_name = NULL; const char *reg_type = NULL; FormDict &globals = AD.globalNames(); - cisc_spill_operand = _matrule->cisc_spill_match(globals, AD.get_registers(), instr->_matrule, op_name, reg_type); + cisc_spill_operand = _matrule->matchrule_cisc_spill_match(globals, AD.get_registers(), instr->_matrule, op_name, reg_type); if( (cisc_spill_operand != Not_cisc_spillable) && (op_name != NULL) && equivalent_predicates(this, instr) ) { cisc_spill_operand = operand_position(op_name, Component::USE); int def_oper = operand_position(op_name, Component::DEF); @@ -1217,13 +1209,17 @@ // Seach through operands to determine parameters unique positions. void InstructForm::set_unique_opnds() { uint* uniq_idx = NULL; - uint nopnds = num_opnds(); + int nopnds = num_opnds(); uint num_uniq = nopnds; - uint i; + int i; + _uniq_idx_length = 0; if ( nopnds > 0 ) { - // Allocate index array with reserve. - uniq_idx = (uint*) malloc(sizeof(uint)*(nopnds + 2)); - for( i = 0; i < nopnds+2; i++ ) { + // Allocate index array. Worst case we're mapping from each + // component back to an index and any DEF always goes at 0 so the + // length of the array has to be the number of components + 1. + _uniq_idx_length = _components.count() + 1; + uniq_idx = (uint*) malloc(sizeof(uint)*(_uniq_idx_length)); + for( i = 0; i < _uniq_idx_length; i++ ) { uniq_idx[i] = i; } } @@ -1238,8 +1234,8 @@ _parameters.reset(); while( (name = _parameters.iter()) != NULL ) { count = 0; - uint position = 0; - uint uniq_position = 0; + int position = 0; + int uniq_position = 0; _components.reset(); Component *comp = NULL; if( sets_result() ) { @@ -1255,6 +1251,7 @@ } if( strcmp(name, comp->_name)==0 ) { if( ++count > 1 ) { + assert(position < _uniq_idx_length, "out of bounds"); uniq_idx[position] = uniq_position; has_dupl_use = true; } else { @@ -1284,7 +1281,7 @@ _num_uniq = num_uniq; } -// Generate index values needed for determing the operand position +// Generate index values needed for determining the operand position void InstructForm::index_temps(FILE *fp, FormDict &globals, const char *prefix, const char *receiver) { uint idx = 0; // position of operand in match rule int cur_num_opnds = num_opnds(); @@ -2200,8 +2197,8 @@ // Return zero-based position in component list, only counting constants; // Return -1 if not in list. int OperandForm::constant_position(FormDict &globals, const Component *last) { - // Iterate through components and count constants preceeding 'constant' - uint position = 0; + // Iterate through components and count constants preceding 'constant' + int position = 0; Component *comp; _components.reset(); while( (comp = _components.iter()) != NULL && (comp != last) ) { @@ -2238,7 +2235,7 @@ // Return zero-based position in component list, only counting constants; // Return -1 if not in list. int OperandForm::register_position(FormDict &globals, const char *reg_name) { - // Iterate through components and count registers preceeding 'last' + // Iterate through components and count registers preceding 'last' uint position = 0; Component *comp; _components.reset(); @@ -2304,7 +2301,7 @@ if ( op->is_base_constant(globals) == Form::idealP ) { // Find the constant's index: _c0, _c1, _c2, ... , _cN uint idx = op->constant_position( globals, rep_var); - fprintf(fp," virtual bool disp_is_oop() const {", _ident); + fprintf(fp," virtual bool disp_is_oop() const {"); fprintf(fp, " return _c%d->isa_oop_ptr();", idx); fprintf(fp, " }\n"); } @@ -3042,9 +3039,9 @@ // Recursive call collecting info on top-level operands, not transitive. // Implementation does not modify state of internal structures. -void MatchNode::append_components(FormDict &locals, ComponentList &components, - bool deflag) const { - int usedef = deflag ? Component::DEF : Component::USE; +void MatchNode::append_components(FormDict& locals, ComponentList& components, + bool def_flag) const { + int usedef = def_flag ? Component::DEF : Component::USE; FormDict &globals = _AD.globalNames(); assert (_name != NULL, "MatchNode::build_components encountered empty node\n"); @@ -3062,10 +3059,10 @@ return; } // Promote results of "Set" to DEF - bool def_flag = (!strcmp(_opType, "Set")) ? true : false; - if (_lChild) _lChild->append_components(locals, components, def_flag); - def_flag = false; // only applies to component immediately following 'Set' - if (_rChild) _rChild->append_components(locals, components, def_flag); + bool tmpdef_flag = (!strcmp(_opType, "Set")) ? true : false; + if (_lChild) _lChild->append_components(locals, components, tmpdef_flag); + tmpdef_flag = false; // only applies to component immediately following 'Set' + if (_rChild) _rChild->append_components(locals, components, tmpdef_flag); } // Find the n'th base-operand in the match node, @@ -3313,8 +3310,8 @@ static const char *needs_ideal_memory_list[] = { "StoreI","StoreL","StoreP","StoreN","StoreD","StoreF" , "StoreB","StoreC","Store" ,"StoreFP", - "LoadI" ,"LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" , - "LoadB" ,"LoadUS" ,"LoadS" ,"Load" , + "LoadI", "LoadUI2L", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" , + "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" , "Store4I","Store2I","Store2L","Store2D","Store4F","Store2F","Store16B", "Store8B","Store4B","Store8C","Store4C","Store2C", "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" , @@ -3411,9 +3408,9 @@ return (form1 == form2); } -//-------------------------cisc_spill_match------------------------------------ +//-------------------------cisc_spill_match_node------------------------------- // Recursively check two MatchRules for legal conversion via cisc-spilling -int MatchNode::cisc_spill_match(FormDict &globals, RegisterForm *registers, MatchNode *mRule2, const char * &operand, const char * ®_type) { +int MatchNode::cisc_spill_match(FormDict& globals, RegisterForm* registers, MatchNode* mRule2, const char* &operand, const char* ®_type) { int cisc_spillable = Maybe_cisc_spillable; int left_spillable = Maybe_cisc_spillable; int right_spillable = Maybe_cisc_spillable; @@ -3434,10 +3431,16 @@ const InstructForm *form2_inst = form2 ? form2->is_instruction() : NULL; const char *name_left = mRule2->_lChild ? mRule2->_lChild->_opType : NULL; const char *name_right = mRule2->_rChild ? mRule2->_rChild->_opType : NULL; + DataType data_type = Form::none; + if (form->is_operand()) { + // Make sure the loadX matches the type of the reg + data_type = form->ideal_to_Reg_type(form->is_operand()->ideal_type(globals)); + } // Detect reg vs (loadX memory) if( form->is_cisc_reg(globals) && form2_inst - && (is_load_from_memory(mRule2->_opType) != Form::none) // reg vs. (load memory) + && data_type != Form::none + && (is_load_from_memory(mRule2->_opType) == data_type) // reg vs. (load memory) && (name_left != NULL) // NOT (load) && (name_right == NULL) ) { // NOT (load memory foo) const Form *form2_left = name_left ? globals[name_left] : NULL; @@ -3487,13 +3490,13 @@ return cisc_spillable; } -//---------------------------cisc_spill_match---------------------------------- +//---------------------------cisc_spill_match_rule------------------------------ // Recursively check two MatchRules for legal conversion via cisc-spilling // This method handles the root of Match tree, // general recursive checks done in MatchNode -int MatchRule::cisc_spill_match(FormDict &globals, RegisterForm *registers, - MatchRule *mRule2, const char * &operand, - const char * ®_type) { +int MatchRule::matchrule_cisc_spill_match(FormDict& globals, RegisterForm* registers, + MatchRule* mRule2, const char* &operand, + const char* ®_type) { int cisc_spillable = Maybe_cisc_spillable; int left_spillable = Maybe_cisc_spillable; int right_spillable = Maybe_cisc_spillable; @@ -3531,7 +3534,7 @@ //----------------------------- equivalent ------------------------------------ // Recursively check to see if two match rules are equivalent. // This rule handles the root. -bool MatchRule::equivalent(FormDict &globals, MatchRule *mRule2) { +bool MatchRule::equivalent(FormDict &globals, MatchNode *mRule2) { // Check that each sets a result if (sets_result() != mRule2->sets_result()) { return false; @@ -3647,7 +3650,7 @@ //-------------------------- swap_commutative_op ------------------------------ // Recursively swap specified commutative operation with subtree operands. -void MatchRule::swap_commutative_op(const char* instr_ident, int count, int& match_rules_cnt) { +void MatchRule::matchrule_swap_commutative_op(const char* instr_ident, int count, int& match_rules_cnt) { assert(match_rules_cnt < 100," too many match rule clones"); // Clone MatchRule* clone = new MatchRule(_AD, this); @@ -3660,8 +3663,8 @@ clone->_next = this->_next; this-> _next = clone; if( (--count) > 0 ) { - this-> swap_commutative_op(instr_ident, count, match_rules_cnt); - clone->swap_commutative_op(instr_ident, count, match_rules_cnt); + this-> matchrule_swap_commutative_op(instr_ident, count, match_rules_cnt); + clone->matchrule_swap_commutative_op(instr_ident, count, match_rules_cnt); } } @@ -3693,7 +3696,7 @@ // Recursive call collecting info on top-level operands, not transitive. // Implementation does not modify state of internal structures. -void MatchRule::append_components(FormDict &locals, ComponentList &components) const { +void MatchRule::append_components(FormDict& locals, ComponentList& components, bool def_flag) const { assert (_name != NULL, "MatchNode::build_components encountered empty node\n"); MatchNode::append_components(locals, components, diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/formssel.hpp --- a/src/share/vm/adlc/formssel.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/formssel.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1998-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1998-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -101,6 +101,7 @@ const char *_ins_pipe; // Instruction Scheduline description class uint *_uniq_idx; // Indexes of unique operands + int _uniq_idx_length; // Length of _uniq_idx array uint _num_uniq; // Number of unique operands ComponentList _components; // List of Components matches MachNode's // operand structure @@ -257,11 +258,13 @@ void set_unique_opnds(); uint num_unique_opnds() { return _num_uniq; } uint unique_opnds_idx(int idx) { - if( _uniq_idx != NULL && idx > 0 ) + if( _uniq_idx != NULL && idx > 0 ) { + assert(idx < _uniq_idx_length, "out of bounds"); return _uniq_idx[idx]; - else + } else { return idx; - } + } + } // Operands which are only KILLs aren't part of the input array and // require special handling in some cases. Their position in this @@ -274,7 +277,7 @@ // // Generate the format call for the replacement variable void rep_var_format(FILE *fp, const char *rep_var); - // Generate index values needed for determing the operand position + // Generate index values needed for determining the operand position void index_temps (FILE *fp, FormDict &globals, const char *prefix = "", const char *receiver = ""); // --------------------------- @@ -341,7 +344,7 @@ // --------------------------- Code Block // Add code - void add_code(const char *string_preceeding_replacement_var); + void add_code(const char *string_preceding_replacement_var); // Add a replacement variable or one of its subfields // Subfields are stored with a leading '$' void add_rep_var(char *replacement_var); @@ -917,8 +920,8 @@ // return 1 if found and position is incremented by operand offset in rule bool find_name(const char *str, int &position) const; bool find_type(const char *str, int &position) const; - void append_components(FormDict &locals, ComponentList &components, - bool def_flag) const; + virtual void append_components(FormDict& locals, ComponentList& components, + bool def_flag = false) const; bool base_operand(uint &position, FormDict &globals, const char * &result, const char * &name, const char * &opType) const; @@ -944,12 +947,12 @@ const char *reduce_left (FormDict &globals) const; // Recursive version of check in MatchRule - int cisc_spill_match(FormDict &globals, RegisterForm *registers, - MatchNode *mRule2, const char * &operand, - const char * ®_type); + int cisc_spill_match(FormDict& globals, RegisterForm* registers, + MatchNode* mRule2, const char* &operand, + const char* ®_type); int cisc_spill_merge(int left_result, int right_result); - bool equivalent(FormDict &globals, MatchNode *mNode2); + virtual bool equivalent(FormDict& globals, MatchNode* mNode2); void count_commutative_op(int& count); void swap_commutative_op(bool atroot, int count); @@ -976,7 +979,7 @@ MatchRule(ArchDesc &ad, MatchNode* mroot, int depth, char* construct, int numleaves); ~MatchRule(); - void append_components(FormDict &locals, ComponentList &components) const; + virtual void append_components(FormDict& locals, ComponentList& components, bool def_flag = false) const; // Recursive call on all operands' match rules in my match rule. bool base_operand(uint &position, FormDict &globals, const char * &result, const char * &name, @@ -1003,14 +1006,14 @@ Form::DataType is_ideal_store() const;// node matches ideal 'StoreXNode' // Check if 'mRule2' is a cisc-spill variant of this MatchRule - int cisc_spill_match(FormDict &globals, RegisterForm *registers, - MatchRule *mRule2, const char * &operand, - const char * ®_type); + int matchrule_cisc_spill_match(FormDict &globals, RegisterForm* registers, + MatchRule* mRule2, const char* &operand, + const char* ®_type); // Check if 'mRule2' is equivalent to this MatchRule - bool equivalent(FormDict &globals, MatchRule *mRule2); + virtual bool equivalent(FormDict& globals, MatchNode* mRule2); - void swap_commutative_op(const char* instr_ident, int count, int& match_rules_cnt); + void matchrule_swap_commutative_op(const char* instr_ident, int count, int& match_rules_cnt); void dump(); void output(FILE *fp); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/main.cpp --- a/src/share/vm/adlc/main.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/main.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -349,7 +349,7 @@ } else { if (_ADL_file._name) printf("%s --> ", _ADL_file._name); - printf("%s, %s, %s, %s, %s, %s, %s, %s, %s", + printf("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s", _CPP_file._name, _CPP_CLONE_file._name, _CPP_EXPAND_file._name, @@ -358,7 +358,8 @@ _CPP_MISC_file._name, _CPP_PEEPHOLE_file._name, _CPP_PIPELINE_file._name, - _HPP_file._name, _DFA_file._name); + _HPP_file._name, + _DFA_file._name); } printf("\n"); } @@ -431,7 +432,7 @@ legal_end = fbuf.get_line(); } *legal_text = legal_start; - return (legal_end - legal_start); + return (int) (legal_end - legal_start); } // VS2005 has its own definition, identical to this one. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/output_c.cpp --- a/src/share/vm/adlc/output_c.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/output_c.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1998-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1998-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -225,11 +225,11 @@ pipeclass->_parameters.reset(); while ( (paramname = pipeclass->_parameters.iter()) != NULL ) { - const PipeClassOperandForm *pipeopnd = + const PipeClassOperandForm *tmppipeopnd = (const PipeClassOperandForm *)pipeclass->_localUsage[paramname]; - if (pipeopnd) - templen += 10 + (int)strlen(pipeopnd->_stage); + if (tmppipeopnd) + templen += 10 + (int)strlen(tmppipeopnd->_stage); else templen += 19; @@ -253,10 +253,10 @@ pipeclass->_parameters.reset(); while ( (paramname = pipeclass->_parameters.iter()) != NULL ) { - const PipeClassOperandForm *pipeopnd = + const PipeClassOperandForm *tmppipeopnd = (const PipeClassOperandForm *)pipeclass->_localUsage[paramname]; templen += sprintf(&operand_stages[templen], " stage_%s%c\n", - pipeopnd ? pipeopnd->_stage : "undefined", + tmppipeopnd ? tmppipeopnd->_stage : "undefined", (++i < paramcount ? ',' : ' ') ); } @@ -1042,10 +1042,10 @@ // Scan the peepmatch and output a test for each instruction static void check_peepmatch_instruction_tree(FILE *fp, PeepMatch *pmatch, PeepConstraint *pconstraint) { - intptr_t parent = -1; - intptr_t inst_position = 0; - const char *inst_name = NULL; - intptr_t input = 0; + int parent = -1; + int inst_position = 0; + const char* inst_name = NULL; + int input = 0; fprintf(fp, " // Check instruction sub-tree\n"); pmatch->reset(); for( pmatch->next_instruction( parent, inst_position, inst_name, input ); @@ -1055,14 +1055,14 @@ if( ! pmatch->is_placeholder() ) { // Define temporaries 'inst#', based on parent and parent's input index if( parent != -1 ) { // root was initialized - fprintf(fp, " inst%ld = inst%ld->in(%ld);\n", + fprintf(fp, " inst%d = inst%d->in(%d);\n", inst_position, parent, input); } // When not the root // Test we have the correct instruction by comparing the rule if( parent != -1 ) { - fprintf(fp, " matches = matches && ( inst%ld->rule() == %s_rule );", + fprintf(fp, " matches = matches && ( inst%d->rule() == %s_rule );", inst_position, inst_name); } } else { @@ -1073,20 +1073,20 @@ } } -static void print_block_index(FILE *fp, intptr_t inst_position) { +static void print_block_index(FILE *fp, int inst_position) { assert( inst_position >= 0, "Instruction number less than zero"); fprintf(fp, "block_index"); if( inst_position != 0 ) { - fprintf(fp, " - %ld", inst_position); + fprintf(fp, " - %d", inst_position); } } // Scan the peepmatch and output a test for each instruction static void check_peepmatch_instruction_sequence(FILE *fp, PeepMatch *pmatch, PeepConstraint *pconstraint) { - intptr_t parent = -1; - intptr_t inst_position = 0; - const char *inst_name = NULL; - intptr_t input = 0; + int parent = -1; + int inst_position = 0; + const char* inst_name = NULL; + int input = 0; fprintf(fp, " // Check instruction sub-tree\n"); pmatch->reset(); for( pmatch->next_instruction( parent, inst_position, inst_name, input ); @@ -1101,14 +1101,14 @@ print_block_index(fp, inst_position); fprintf(fp, " > 0 ) {\n Node *n = block->_nodes.at("); print_block_index(fp, inst_position); - fprintf(fp, ");\n inst%ld = (n->is_Mach()) ? ", inst_position); + fprintf(fp, ");\n inst%d = (n->is_Mach()) ? ", inst_position); fprintf(fp, "n->as_Mach() : NULL;\n }\n"); } // When not the root // Test we have the correct instruction by comparing the rule. if( parent != -1 ) { - fprintf(fp, " matches = matches && (inst%ld != NULL) && (inst%ld->rule() == %s_rule);\n", + fprintf(fp, " matches = matches && (inst%d != NULL) && (inst%d->rule() == %s_rule);\n", inst_position, inst_position, inst_name); } } else { @@ -1121,10 +1121,10 @@ // Build mapping for register indices, num_edges to input static void build_instruction_index_mapping( FILE *fp, FormDict &globals, PeepMatch *pmatch ) { - intptr_t parent = -1; - intptr_t inst_position = 0; - const char *inst_name = NULL; - intptr_t input = 0; + int parent = -1; + int inst_position = 0; + const char* inst_name = NULL; + int input = 0; fprintf(fp, " // Build map to register info\n"); pmatch->reset(); for( pmatch->next_instruction( parent, inst_position, inst_name, input ); @@ -1136,9 +1136,9 @@ InstructForm *inst = globals[inst_name]->is_instruction(); if( inst != NULL ) { char inst_prefix[] = "instXXXX_"; - sprintf(inst_prefix, "inst%ld_", inst_position); + sprintf(inst_prefix, "inst%d_", inst_position); char receiver[] = "instXXXX->"; - sprintf(receiver, "inst%ld->", inst_position); + sprintf(receiver, "inst%d->", inst_position); inst->index_temps( fp, globals, inst_prefix, receiver ); } } @@ -1168,7 +1168,7 @@ } // LEFT - intptr_t left_index = pconstraint->_left_inst; + int left_index = pconstraint->_left_inst; const char *left_op = pconstraint->_left_op; // Access info on the instructions whose operands are compared InstructForm *inst_left = globals[pmatch->instruction_name(left_index)]->is_instruction(); @@ -1191,7 +1191,7 @@ // RIGHT int right_op_index = -1; - intptr_t right_index = pconstraint->_right_inst; + int right_index = pconstraint->_right_inst; const char *right_op = pconstraint->_right_op; if( right_index != -1 ) { // Match operand // Access info on the instructions whose operands are compared @@ -1351,7 +1351,7 @@ assert( root_form != NULL, "Replacement instruction was not previously defined"); fprintf(fp, " %sNode *root = new (C) %sNode();\n", root_inst, root_inst); - intptr_t inst_num; + int inst_num; const char *op_name; int opnds_index = 0; // define result operand // Then install the use-operands for the new sub-tree @@ -1362,7 +1362,6 @@ InstructForm *inst_form; inst_form = globals[pmatch->instruction_name(inst_num)]->is_instruction(); assert( inst_form, "Parser should guaranty this is an instruction"); - int op_base = inst_form->oper_input_base(globals); int inst_op_num = inst_form->operand_position(op_name, Component::USE); if( inst_op_num == NameList::Not_in_list ) inst_op_num = inst_form->operand_position(op_name, Component::USE_DEF); @@ -1379,32 +1378,32 @@ // Add unmatched edges from root of match tree int op_base = root_form->oper_input_base(globals); for( int unmatched_edge = 1; unmatched_edge < op_base; ++unmatched_edge ) { - fprintf(fp, " root->add_req(inst%ld->in(%d)); // unmatched ideal edge\n", + fprintf(fp, " root->add_req(inst%d->in(%d)); // unmatched ideal edge\n", inst_num, unmatched_edge); } // If new instruction captures bottom type if( root_form->captures_bottom_type() ) { // Get bottom type from instruction whose result we are replacing - fprintf(fp, " root->_bottom_type = inst%ld->bottom_type();\n", inst_num); + fprintf(fp, " root->_bottom_type = inst%d->bottom_type();\n", inst_num); } // Define result register and result operand - fprintf(fp, " ra_->add_reference(root, inst%ld);\n", inst_num); - fprintf(fp, " ra_->set_oop (root, ra_->is_oop(inst%ld));\n", inst_num); - fprintf(fp, " ra_->set_pair(root->_idx, ra_->get_reg_second(inst%ld), ra_->get_reg_first(inst%ld));\n", inst_num, inst_num); - fprintf(fp, " root->_opnds[0] = inst%ld->_opnds[0]->clone(C); // result\n", inst_num); + fprintf(fp, " ra_->add_reference(root, inst%d);\n", inst_num); + fprintf(fp, " ra_->set_oop (root, ra_->is_oop(inst%d));\n", inst_num); + fprintf(fp, " ra_->set_pair(root->_idx, ra_->get_reg_second(inst%d), ra_->get_reg_first(inst%d));\n", inst_num, inst_num); + fprintf(fp, " root->_opnds[0] = inst%d->_opnds[0]->clone(C); // result\n", inst_num); fprintf(fp, " // ----- Done with initial setup -----\n"); } else { if( (op_form == NULL) || (op_form->is_base_constant(globals) == Form::none) ) { // Do not have ideal edges for constants after matching - fprintf(fp, " for( unsigned x%d = inst%ld_idx%d; x%d < inst%ld_idx%d; x%d++ )\n", + fprintf(fp, " for( unsigned x%d = inst%d_idx%d; x%d < inst%d_idx%d; x%d++ )\n", inst_op_num, inst_num, inst_op_num, inst_op_num, inst_num, inst_op_num+1, inst_op_num ); - fprintf(fp, " root->add_req( inst%ld->in(x%d) );\n", + fprintf(fp, " root->add_req( inst%d->in(x%d) );\n", inst_num, inst_op_num ); } else { fprintf(fp, " // no ideal edge for constants after matching\n"); } - fprintf(fp, " root->_opnds[%d] = inst%ld->_opnds[%d]->clone(C);\n", + fprintf(fp, " root->_opnds[%d] = inst%d->_opnds[%d]->clone(C);\n", opnds_index, inst_num, inst_op_num ); } ++opnds_index; @@ -1443,7 +1442,7 @@ } for( int i = 0; i <= max_position; ++i ) { if( i == 0 ) { - fprintf(fp, " MachNode *inst0 = this;\n", i); + fprintf(fp, " MachNode *inst0 = this;\n"); } else { fprintf(fp, " MachNode *inst%d = NULL;\n", i); } @@ -1743,7 +1742,7 @@ } // delete the rest of edges fprintf(fp," for(int i = idx%d - 1; i >= (int)idx%d; i--) {\n", cur_num_opnds, new_num_opnds); - fprintf(fp," del_req(i);\n", i); + fprintf(fp," del_req(i);\n"); fprintf(fp," }\n"); fprintf(fp," _num_opnds = %d;\n", new_num_opnds); } @@ -1817,7 +1816,7 @@ fprintf(fp,"\n"); if( node->expands() ) { - fprintf(fp," return result;\n",cnt-1); + fprintf(fp," return result;\n"); } else { fprintf(fp," return this;\n"); } @@ -2140,8 +2139,59 @@ // A subfield variable, '$$' prefix emit_field( rep_var ); } else { - // A replacement variable, '$' prefix - emit_rep_var( rep_var ); + if (_strings_to_emit.peek() != NULL && + strcmp(_strings_to_emit.peek(), "$Address") == 0) { + fprintf(_fp, "Address::make_raw("); + + emit_rep_var( rep_var ); + fprintf(_fp,"->base(ra_,this,idx%d), ", _operand_idx); + + _reg_status = LITERAL_ACCESSED; + emit_rep_var( rep_var ); + fprintf(_fp,"->index(ra_,this,idx%d), ", _operand_idx); + + _reg_status = LITERAL_ACCESSED; + emit_rep_var( rep_var ); + fprintf(_fp,"->scale(), "); + + _reg_status = LITERAL_ACCESSED; + emit_rep_var( rep_var ); + Form::DataType stack_type = _operand ? _operand->is_user_name_for_sReg() : Form::none; + if( _operand && _operand_idx==0 && stack_type != Form::none ) { + fprintf(_fp,"->disp(ra_,this,0), "); + } else { + fprintf(_fp,"->disp(ra_,this,idx%d), ", _operand_idx); + } + + _reg_status = LITERAL_ACCESSED; + emit_rep_var( rep_var ); + fprintf(_fp,"->disp_is_oop())"); + + // skip trailing $Address + _strings_to_emit.iter(); + } else { + // A replacement variable, '$' prefix + const char* next = _strings_to_emit.peek(); + const char* next2 = _strings_to_emit.peek(2); + if (next != NULL && next2 != NULL && strcmp(next2, "$Register") == 0 && + (strcmp(next, "$base") == 0 || strcmp(next, "$index") == 0)) { + // handle $rev_var$$base$$Register and $rev_var$$index$$Register by + // producing as_Register(opnd_array(#)->base(ra_,this,idx1)). + fprintf(_fp, "as_Register("); + // emit the operand reference + emit_rep_var( rep_var ); + rep_var = _strings_to_emit.iter(); + assert(strcmp(rep_var, "$base") == 0 || strcmp(rep_var, "$index") == 0, "bad pattern"); + // handle base or index + emit_field(rep_var); + rep_var = _strings_to_emit.iter(); + assert(strcmp(rep_var, "$Register") == 0, "bad pattern"); + // close up the parens + fprintf(_fp, ")"); + } else { + emit_rep_var( rep_var ); + } + } } // end replacement and/or subfield } } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/adlc/output_h.cpp --- a/src/share/vm/adlc/output_h.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/adlc/output_h.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 1998-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1998-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -574,7 +574,7 @@ // Generate the user-defined portion of the format if( inst._format ) { // If there are replacement variables, - // Generate index values needed for determing the operand position + // Generate index values needed for determining the operand position if( inst._format->_rep_vars.count() ) inst.index_temps(fp, globals); @@ -1832,7 +1832,7 @@ break; case Form::idealP: case Form::idealN: - fprintf(fp," return opnd_array(1)->type();\n",result); + fprintf(fp," return opnd_array(1)->type();\n"); break; case Form::idealD: fprintf(fp," return TypeD::make(opnd_array(1)->constantD());\n"); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/asm/assembler.cpp --- a/src/share/vm/asm/assembler.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/asm/assembler.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -31,7 +31,7 @@ // The AbstractAssembler is generating code into a CodeBuffer. To make code generation faster, // the assembler keeps a copy of the code buffers boundaries & modifies them when // emitting bytes rather than using the code buffers accessor functions all the time. -// The code buffer is updated via set_code_end(...) after emiting a whole instruction. +// The code buffer is updated via set_code_end(...) after emitting a whole instruction. AbstractAssembler::AbstractAssembler(CodeBuffer* code) { if (code == NULL) return; @@ -239,6 +239,78 @@ } } +struct DelayedConstant { + typedef void (*value_fn_t)(); + BasicType type; + intptr_t value; + value_fn_t value_fn; + // This limit of 20 is generous for initial uses. + // The limit needs to be large enough to store the field offsets + // into classes which do not have statically fixed layouts. + // (Initial use is for method handle object offsets.) + // Look for uses of "delayed_value" in the source code + // and make sure this number is generous enough to handle all of them. + enum { DC_LIMIT = 20 }; + static DelayedConstant delayed_constants[DC_LIMIT]; + static DelayedConstant* add(BasicType type, value_fn_t value_fn); + bool match(BasicType t, value_fn_t cfn) { + return type == t && value_fn == cfn; + } + static void update_all(); +}; + +DelayedConstant DelayedConstant::delayed_constants[DC_LIMIT]; +// Default C structure initialization rules have the following effect here: +// = { { (BasicType)0, (intptr_t)NULL }, ... }; + +DelayedConstant* DelayedConstant::add(BasicType type, + DelayedConstant::value_fn_t cfn) { + for (int i = 0; i < DC_LIMIT; i++) { + DelayedConstant* dcon = &delayed_constants[i]; + if (dcon->match(type, cfn)) + return dcon; + if (dcon->value_fn == NULL) { + // (cmpxchg not because this is multi-threaded but because I'm paranoid) + if (Atomic::cmpxchg_ptr(CAST_FROM_FN_PTR(void*, cfn), &dcon->value_fn, NULL) == NULL) { + dcon->type = type; + return dcon; + } + } + } + // If this assert is hit (in pre-integration testing!) then re-evaluate + // the comment on the definition of DC_LIMIT. + guarantee(false, "too many delayed constants"); + return NULL; +} + +void DelayedConstant::update_all() { + for (int i = 0; i < DC_LIMIT; i++) { + DelayedConstant* dcon = &delayed_constants[i]; + if (dcon->value_fn != NULL && dcon->value == 0) { + typedef int (*int_fn_t)(); + typedef address (*address_fn_t)(); + switch (dcon->type) { + case T_INT: dcon->value = (intptr_t) ((int_fn_t) dcon->value_fn)(); break; + case T_ADDRESS: dcon->value = (intptr_t) ((address_fn_t)dcon->value_fn)(); break; + } + } + } +} + +intptr_t* AbstractAssembler::delayed_value_addr(int(*value_fn)()) { + DelayedConstant* dcon = DelayedConstant::add(T_INT, (DelayedConstant::value_fn_t) value_fn); + return &dcon->value; +} +intptr_t* AbstractAssembler::delayed_value_addr(address(*value_fn)()) { + DelayedConstant* dcon = DelayedConstant::add(T_ADDRESS, (DelayedConstant::value_fn_t) value_fn); + return &dcon->value; +} +void AbstractAssembler::update_delayed_values() { + DelayedConstant::update_all(); +} + + + void AbstractAssembler::block_comment(const char* comment) { if (sect() == CodeBuffer::SECT_INSTS) { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/asm/assembler.hpp --- a/src/share/vm/asm/assembler.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/asm/assembler.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -22,7 +22,7 @@ * */ -// This file contains platform-independant assembler declarations. +// This file contains platform-independent assembler declarations. class CodeBuffer; class MacroAssembler; @@ -140,6 +140,28 @@ } }; +// A union type for code which has to assemble both constant and +// non-constant operands, when the distinction cannot be made +// statically. +class RegisterConstant VALUE_OBJ_CLASS_SPEC { + private: + Register _r; + intptr_t _c; + + public: + RegisterConstant(): _r(noreg), _c(0) {} + RegisterConstant(Register r): _r(r), _c(0) {} + RegisterConstant(intptr_t c): _r(noreg), _c(c) {} + + Register as_register() const { assert(is_register(),""); return _r; } + intptr_t as_constant() const { assert(is_constant(),""); return _c; } + + Register register_or_noreg() const { return _r; } + intptr_t constant_or_zero() const { return _c; } + + bool is_register() const { return _r != noreg; } + bool is_constant() const { return _r == noreg; } +}; // The Abstract Assembler: Pure assembler doing NO optimizations on the // instruction level; i.e., what you write is what you get. @@ -280,6 +302,26 @@ inline address address_constant(Label& L); inline address address_table_constant(GrowableArray label); + // Bootstrapping aid to cope with delayed determination of constants. + // Returns a static address which will eventually contain the constant. + // The value zero (NULL) stands instead of a constant which is still uncomputed. + // Thus, the eventual value of the constant must not be zero. + // This is fine, since this is designed for embedding object field + // offsets in code which must be generated before the object class is loaded. + // Field offsets are never zero, since an object's header (mark word) + // is located at offset zero. + RegisterConstant delayed_value(int(*value_fn)(), Register tmp, int offset = 0) { + return delayed_value(delayed_value_addr(value_fn), tmp, offset); + } + RegisterConstant delayed_value(address(*value_fn)(), Register tmp, int offset = 0) { + return delayed_value(delayed_value_addr(value_fn), tmp, offset); + } + virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, Register tmp, int offset) = 0; + // Last overloading is platform-dependent; look in assembler_.cpp. + static intptr_t* delayed_value_addr(int(*constant_fn)()); + static intptr_t* delayed_value_addr(address(*constant_fn)()); + static void update_delayed_values(); + // Bang stack to trigger StackOverflowError at a safe location // implementation delegates to machine-specific bang_stack_with_offset void generate_stack_overflow_check( int frame_size_in_bytes ); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/ci/ciTypeFlow.cpp --- a/src/share/vm/ci/ciTypeFlow.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/ci/ciTypeFlow.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -541,7 +541,7 @@ // is report a value that will meet correctly with any downstream // reference types on paths that will truly be executed. This null type // meets with any reference type to yield that same reference type. - // (The compiler will generate an unconditonal exception here.) + // (The compiler will generate an unconditional exception here.) push(null_type()); return; } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/classfile/symbolTable.cpp --- a/src/share/vm/classfile/symbolTable.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/classfile/symbolTable.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -156,7 +156,7 @@ symbolOop test = lookup(index, (char*)name, len, hashValue); if (test != NULL) { - // A race occured and another thread introduced the symbol, this one + // A race occurred and another thread introduced the symbol, this one // will be dropped and collected. return test; } @@ -193,7 +193,7 @@ int index = hash_to_index(hashValues[i]); symbolOop test = lookup(index, names[i], lengths[i], hashValues[i]); if (test != NULL) { - // A race occured and another thread introduced the symbol, this one + // A race occurred and another thread introduced the symbol, this one // will be dropped and collected. Use test instead. cp->symbol_at_put(cp_indices[i], test); } else { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/code/nmethod.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -380,7 +380,7 @@ void nmethod::add_handler_for_exception_and_pc(Handle exception, address pc, address handler) { // There are potential race conditions during exception cache updates, so we // must own the ExceptionCache_lock before doing ANY modifications. Because - // we dont lock during reads, it is possible to have several threads attempt + // we don't lock during reads, it is possible to have several threads attempt // to update the cache with the same data. We need to check for already inserted // copies of the current data before adding it. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/code/nmethod.hpp --- a/src/share/vm/code/nmethod.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/code/nmethod.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -167,7 +167,7 @@ nmFlags flags; // various flags to keep track of nmethod state bool _markedForDeoptimization; // Used for stack deoptimization enum { alive = 0, - not_entrant = 1, // uncommon trap has happend but activations may still exist + not_entrant = 1, // uncommon trap has happened but activations may still exist zombie = 2, unloaded = 3 }; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -393,7 +393,7 @@ // Restarts the concurrent phases timer. void concurrent_phases_resume(); - // Time begining and end of the marking phase for + // Time beginning and end of the marking phase for // a synchronous MS collection. A MS collection // that finishes in the foreground can have started // in the background. These methods capture the diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/concurrentMarkSweep/cmsGCAdaptivePolicyCounters.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsGCAdaptivePolicyCounters.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsGCAdaptivePolicyCounters.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -69,7 +69,7 @@ // end of the sweep of the tenured generation. PerfVariable* _avg_cms_free_counter; // Average of the free space in the tenured generation at the - // start of the sweep of the tenured genertion. + // start of the sweep of the tenured generation. PerfVariable* _avg_cms_free_at_sweep_counter; // Average of the free space in the tenured generation at the // after any resizing of the tenured generation at the end diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -4178,7 +4178,7 @@ // and is deferred for now; see CR# TBF. 07252005YSR. XXX assert(!CMSAbortSemantics || tsk.aborted(), "Inconsistency"); // If _restart_addr is non-NULL, a marking stack overflow - // occured; we need to do a fresh marking iteration from the + // occurred; we need to do a fresh marking iteration from the // indicated restart address. if (_foregroundGCIsActive && asynch) { // We may be running into repeated stack overflows, having @@ -4221,7 +4221,7 @@ // should be incremental with periodic yields. _markBitMap.iterate(&markFromRootsClosure); // If _restart_addr is non-NULL, a marking stack overflow - // occured; we need to do a fresh iteration from the + // occurred; we need to do a fresh iteration from the // indicated restart address. while (_restart_addr != NULL) { if (_foregroundGCIsActive && asynch) { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -133,14 +133,12 @@ _co_tracker.update(false); if (G1SmoothConcRefine) { - start_vtime_sec = os::elapsedVTime(); prev_buffer_num = curr_buffer_num; - _sts.leave(); os::sleep(Thread::current(), (jlong) _interval_ms, false); _sts.join(); + start_vtime_sec = os::elapsedVTime(); } - n_logs++; } // Make sure we harvest the PYA, if any. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/concurrentMark.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -420,6 +420,10 @@ _has_overflown(false), _concurrent(false), + _has_aborted(false), + _restart_for_overflow(false), + _concurrent_marking_in_progress(false), + _should_gray_objects(false), // _verbose_level set below diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -107,7 +107,7 @@ if (PrintGC) { gclog_or_tty->date_stamp(PrintGCDateStamps); gclog_or_tty->stamp(PrintGCTimeStamps); - tty->print_cr("[GC concurrent-mark-start]"); + gclog_or_tty->print_cr("[GC concurrent-mark-start]"); } if (!g1_policy->in_young_gc_mode()) { @@ -320,8 +320,6 @@ set_in_progress(); clear_started(); if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting"); - - return; } // Note: this method, although exported by the ConcurrentMarkSweepThread, diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -78,8 +78,8 @@ void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, int max_completed_queue, - Mutex* lock) { - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + Mutex* lock, PtrQueueSet* fl_owner) { + PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner); set_buffer_size(DCQBarrierQueueBufferSize); set_process_completed_threshold(DCQBarrierProcessCompletedThreshold); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -88,7 +88,7 @@ void initialize(Monitor* cbl_mon, Mutex* fl_lock, int max_completed_queue = 0, - Mutex* lock = NULL); + Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL); // The number of parallel ids that can be claimed to allow collector or // mutator threads to do card-processing work. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -136,6 +136,14 @@ int calls() { return _calls; } }; +class RedirtyLoggedCardTableEntryFastClosure : public CardTableEntryClosure { +public: + bool do_card_ptr(jbyte* card_ptr, int worker_i) { + *card_ptr = CardTableModRefBS::dirty_card_val(); + return true; + } +}; + YoungList::YoungList(G1CollectedHeap* g1h) : _g1h(g1h), _head(NULL), _scan_only_head(NULL), _scan_only_tail(NULL), _curr_scan_only(NULL), @@ -812,6 +820,40 @@ } }; +class RebuildRSOutOfRegionClosure: public HeapRegionClosure { + G1CollectedHeap* _g1h; + UpdateRSOopClosure _cl; + int _worker_i; +public: + RebuildRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : + _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i), + _worker_i(worker_i), + _g1h(g1) + { } + bool doHeapRegion(HeapRegion* r) { + if (!r->continuesHumongous()) { + _cl.set_from(r); + r->oop_iterate(&_cl); + } + return false; + } +}; + +class ParRebuildRSTask: public AbstractGangTask { + G1CollectedHeap* _g1; +public: + ParRebuildRSTask(G1CollectedHeap* g1) + : AbstractGangTask("ParRebuildRSTask"), + _g1(g1) + { } + + void work(int i) { + RebuildRSOutOfRegionClosure rebuild_rs(_g1, i); + _g1->heap_region_par_iterate_chunked(&rebuild_rs, i, + HeapRegion::RebuildRSClaimValue); + } +}; + void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, size_t word_size) { ResourceMark rm; @@ -918,24 +960,35 @@ reset_gc_time_stamp(); // Since everything potentially moved, we will clear all remembered - // sets, and clear all cards. Later we will also cards in the used - // portion of the heap after the resizing (which could be a shrinking.) - // We will also reset the GC time stamps of the regions. + // sets, and clear all cards. Later we will rebuild remebered + // sets. We will also reset the GC time stamps of the regions. PostMCRemSetClearClosure rs_clear(mr_bs()); heap_region_iterate(&rs_clear); // Resize the heap if necessary. resize_if_necessary_after_full_collection(full ? 0 : word_size); - // Since everything potentially moved, we will clear all remembered - // sets, but also dirty all cards corresponding to used regions. - PostMCRemSetInvalidateClosure rs_invalidate(mr_bs()); - heap_region_iterate(&rs_invalidate); if (_cg1r->use_cache()) { _cg1r->clear_and_record_card_counts(); _cg1r->clear_hot_cache(); } + // Rebuild remembered sets of all regions. + if (ParallelGCThreads > 0) { + ParRebuildRSTask rebuild_rs_task(this); + assert(check_heap_region_claim_values( + HeapRegion::InitialClaimValue), "sanity check"); + set_par_threads(workers()->total_workers()); + workers()->run_task(&rebuild_rs_task); + set_par_threads(0); + assert(check_heap_region_claim_values( + HeapRegion::RebuildRSClaimValue), "sanity check"); + reset_heap_region_claim_values(); + } else { + RebuildRSOutOfRegionClosure rebuild_rs(this); + heap_region_iterate(&rebuild_rs); + } + if (PrintGC) { print_size_transition(gclog_or_tty, g1h_prev_used, used(), capacity()); } @@ -961,7 +1014,8 @@ // dirtied, so this should abandon those logs, and set "do_traversal" // to true. concurrent_g1_refine()->set_pya_restart(); - + assert(!G1DeferredRSUpdate + || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any"); assert(regions_accounted_for(), "Region leakage!"); } @@ -1466,6 +1520,13 @@ G1DirtyCardQueueMax, Shared_DirtyCardQ_lock); } + if (G1DeferredRSUpdate) { + dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, + DirtyCardQ_FL_lock, + 0, + Shared_DirtyCardQ_lock, + &JavaThread::dirty_card_queue_set()); + } // In case we're keeping closure specialization stats, initialize those // counts and that mechanism. SpecializationStats::clear(); @@ -2316,7 +2377,6 @@ void G1CollectedHeap::checkConcurrentMark() { VerifyMarkedObjsClosure verifycl(this); - doConcurrentMark(); // MutexLockerEx x(getMarkBitMapLock(), // Mutex::_no_safepoint_check_flag); object_iterate(&verifycl); @@ -2493,7 +2553,7 @@ guarantee(_in_cset_fast_test == NULL, "invariant"); guarantee(_in_cset_fast_test_base == NULL, "invariant"); - _in_cset_fast_test_length = n_regions(); + _in_cset_fast_test_length = max_regions(); _in_cset_fast_test_base = NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length); memset(_in_cset_fast_test_base, false, @@ -2513,7 +2573,7 @@ } save_marks(); - // We must do this before any possible evacuation that should propogate + // We must do this before any possible evacuation that should propagate // marks, including evacuation of popular objects in a popular pause. if (mark_in_progress()) { double start_time_sec = os::elapsedTime(); @@ -2626,9 +2686,8 @@ #endif // SCAN_ONLY_VERBOSE double end_time_sec = os::elapsedTime(); - if (!evacuation_failed()) { - g1_policy()->record_pause_time((end_time_sec - start_time_sec)*1000.0); - } + double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; + g1_policy()->record_pause_time_ms(pause_time_ms); GCOverheadReporter::recordSTWEnd(end_time_sec); g1_policy()->record_collection_pause_end(popular_region != NULL, abandoned); @@ -2919,27 +2978,51 @@ } }; -class RecreateRSetEntriesClosure: public OopClosure { +class UpdateRSetImmediate : public OopsInHeapRegionClosure { private: G1CollectedHeap* _g1; G1RemSet* _g1_rem_set; - HeapRegion* _from; public: - RecreateRSetEntriesClosure(G1CollectedHeap* g1, HeapRegion* from) : - _g1(g1), _g1_rem_set(g1->g1_rem_set()), _from(from) - {} + UpdateRSetImmediate(G1CollectedHeap* g1) : + _g1(g1), _g1_rem_set(g1->g1_rem_set()) {} void do_oop(narrowOop* p) { guarantee(false, "NYI"); } void do_oop(oop* p) { assert(_from->is_in_reserved(p), "paranoia"); - if (*p != NULL) { - _g1_rem_set->write_ref(_from, p); + if (*p != NULL && !_from->is_survivor()) { + _g1_rem_set->par_write_ref(_from, p, 0); } } }; +class UpdateRSetDeferred : public OopsInHeapRegionClosure { +private: + G1CollectedHeap* _g1; + DirtyCardQueue *_dcq; + CardTableModRefBS* _ct_bs; + +public: + UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) : + _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {} + + void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + void do_oop(oop* p) { + assert(_from->is_in_reserved(p), "paranoia"); + if (!_from->is_in_reserved(*p) && !_from->is_survivor()) { + size_t card_index = _ct_bs->index_for(p); + if (_ct_bs->mark_card_deferred(card_index)) { + _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index)); + } + } + } +}; + + + class RemoveSelfPointerClosure: public ObjectClosure { private: G1CollectedHeap* _g1; @@ -2947,11 +3030,11 @@ HeapRegion* _hr; size_t _prev_marked_bytes; size_t _next_marked_bytes; + OopsInHeapRegionClosure *_cl; public: - RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr) : - _g1(g1), _cm(_g1->concurrent_mark()), _hr(hr), - _prev_marked_bytes(0), _next_marked_bytes(0) - {} + RemoveSelfPointerClosure(G1CollectedHeap* g1, OopsInHeapRegionClosure* cl) : + _g1(g1), _cm(_g1->concurrent_mark()), _prev_marked_bytes(0), + _next_marked_bytes(0), _cl(cl) {} size_t prev_marked_bytes() { return _prev_marked_bytes; } size_t next_marked_bytes() { return _next_marked_bytes; } @@ -2989,8 +3072,7 @@ // that, if evacuation fails, we might have remembered set // entries missing given that we skipped cards on the // collection set. So, we'll recreate such entries now. - RecreateRSetEntriesClosure cl(_g1, _hr); - obj->oop_iterate(&cl); + obj->oop_iterate(_cl); assert(_cm->isPrevMarked(obj), "Should be marked!"); } else { // The object has been either evacuated or is dead. Fill it with a @@ -3003,14 +3085,23 @@ }; void G1CollectedHeap::remove_self_forwarding_pointers() { + UpdateRSetImmediate immediate_update(_g1h); + DirtyCardQueue dcq(&_g1h->dirty_card_queue_set()); + UpdateRSetDeferred deferred_update(_g1h, &dcq); + OopsInHeapRegionClosure *cl; + if (G1DeferredRSUpdate) { + cl = &deferred_update; + } else { + cl = &immediate_update; + } HeapRegion* cur = g1_policy()->collection_set(); - while (cur != NULL) { assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!"); + RemoveSelfPointerClosure rspc(_g1h, cl); if (cur->evacuation_failed()) { - RemoveSelfPointerClosure rspc(_g1h, cur); assert(cur->in_collection_set(), "bad CS"); + cl->set_region(cur); cur->object_iterate(&rspc); // A number of manipulations to make the TAMS be the current top, @@ -3519,6 +3610,9 @@ protected: G1CollectedHeap* _g1h; RefToScanQueue* _refs; + DirtyCardQueue _dcq; + CardTableModRefBS* _ct_bs; + G1RemSet* _g1_rem; typedef GrowableArray OverflowQueue; OverflowQueue* _overflowed_refs; @@ -3560,10 +3654,32 @@ void add_to_undo_waste(size_t waste) { _undo_waste += waste; } + DirtyCardQueue& dirty_card_queue() { return _dcq; } + CardTableModRefBS* ctbs() { return _ct_bs; } + + void immediate_rs_update(HeapRegion* from, oop* p, int tid) { + _g1_rem->par_write_ref(from, p, tid); + } + + void deferred_rs_update(HeapRegion* from, oop* p, int tid) { + // If the new value of the field points to the same region or + // is the to-space, we don't need to include it in the Rset updates. + if (!from->is_in_reserved(*p) && !from->is_survivor()) { + size_t card_index = ctbs()->index_for(p); + // If the card hasn't been added to the buffer, do it. + if (ctbs()->mark_card_deferred(card_index)) { + dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index)); + } + } + } + public: G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num) : _g1h(g1h), _refs(g1h->task_queue(queue_num)), + _dcq(&g1h->dirty_card_queue_set()), + _ct_bs((CardTableModRefBS*)_g1h->barrier_set()), + _g1_rem(g1h->g1_rem_set()), _hash_seed(17), _queue_num(queue_num), _term_attempts(0), _age_table(false), @@ -3641,6 +3757,14 @@ int refs_to_scan() { return refs()->size(); } int overflowed_refs_to_scan() { return overflowed_refs()->length(); } + void update_rs(HeapRegion* from, oop* p, int tid) { + if (G1DeferredRSUpdate) { + deferred_rs_update(from, p, tid); + } else { + immediate_rs_update(from, p, tid); + } + } + HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) { HeapWord* obj = NULL; @@ -3809,7 +3933,6 @@ } }; - G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()), _par_scan_state(par_scan_state) { } @@ -3835,7 +3958,7 @@ assert(obj == *p, "the value of *p should not have changed"); _par_scan_state->push_on_queue(p); } else { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } } } @@ -3973,13 +4096,13 @@ } // When scanning the RS, we only care about objs in CS. if (barrier == G1BarrierRS) { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } } // When scanning moved objs, must look at all oops. if (barrier == G1BarrierEvac && obj != NULL) { - _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num()); + _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num()); } if (do_gen_barrier && obj != NULL) { @@ -4128,6 +4251,7 @@ G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss); G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss); G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss); + G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss); G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss); G1ParScanAndMarkHeapRSClosure scan_mark_heap_rs_cl(_g1h, &pss); @@ -4383,7 +4507,6 @@ g1_rem_set()->prepare_for_oops_into_collection_set_do(); concurrent_g1_refine()->set_use_cache(false); int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); - set_par_threads(n_workers); G1ParTask g1_par_task(this, n_workers, _task_queues); @@ -4391,8 +4514,9 @@ change_strong_roots_parity(); // In preparation for parallel strong roots. rem_set()->prepare_for_younger_refs_iterate(true); + + assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty"); double start_par = os::elapsedTime(); - if (ParallelGCThreads > 0) { // The individual threads will set their evac-failure closures. workers()->run_task(&g1_par_task); @@ -4412,8 +4536,8 @@ G1KeepAliveClosure keep_alive(this); JNIHandles::weak_oops_do(&is_alive, &keep_alive); } - g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + concurrent_g1_refine()->set_use_cache(true); finalize_for_evac_failure(); @@ -4424,7 +4548,6 @@ if (evacuation_failed()) { remove_self_forwarding_pointers(); - if (PrintGCDetails) { gclog_or_tty->print(" (evacuation failed)"); } else if (PrintGC) { @@ -4432,6 +4555,14 @@ } } + if (G1DeferredRSUpdate) { + RedirtyLoggedCardTableEntryFastClosure redirty; + dirty_card_queue_set().set_closure(&redirty); + dirty_card_queue_set().apply_closure_to_all_completed_buffers(); + JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set()); + assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); + } + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -457,6 +457,10 @@ // And it's mod ref barrier set, used to track updates for the above. ModRefBarrierSet* _mr_bs; + // A set of cards that cover the objects for which the Rsets should be updated + // concurrently after the collection. + DirtyCardQueueSet _dirty_card_queue_set; + // The Heap Region Rem Set Iterator. HeapRegionRemSetIterator** _rem_set_iterator; @@ -666,6 +670,9 @@ RefToScanQueue *task_queue(int i); + // A set of cards where updates happened during the GC + DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; } + // Create a G1CollectedHeap with the specified policy. // Must call the initialize method afterwards. // May not return if something goes wrong. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1014,7 +1014,7 @@ _all_full_gc_times_ms->add(full_gc_time_ms); - update_recent_gc_times(end_sec, full_gc_time_sec); + update_recent_gc_times(end_sec, full_gc_time_ms); _g1->clear_full_collection(); @@ -1475,6 +1475,7 @@ size_t cur_used_bytes = _g1->used(); assert(cur_used_bytes == _g1->recalculate_used(), "It should!"); bool last_pause_included_initial_mark = false; + bool update_stats = !abandoned && !_g1->evacuation_failed(); #ifndef PRODUCT if (G1YoungSurvRateVerbose) { @@ -1535,7 +1536,7 @@ _n_pauses++; - if (!abandoned) { + if (update_stats) { _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms); _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms); _recent_evac_times_ms->add(evac_ms); @@ -1636,7 +1637,7 @@ double termination_time = avg_value(_par_last_termination_times_ms); double parallel_other_time; - if (!abandoned) { + if (update_stats) { MainBodySummary* body_summary = summary->main_body_summary(); guarantee(body_summary != NULL, "should not be null!"); @@ -1852,7 +1853,7 @@ // - if (!popular && !abandoned) { + if (!popular && update_stats) { double pause_time_ms = elapsed_ms; size_t diff = 0; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -966,7 +966,7 @@ record_termination_time(0, ms); } - void record_pause_time(double ms) { + void record_pause_time_ms(double ms) { _last_pause_time_ms = ms; } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -105,33 +105,6 @@ _g1->heap_region_iterate(&rc); } -class UpdateRSOopClosure: public OopClosure { - HeapRegion* _from; - HRInto_G1RemSet* _rs; - int _worker_i; -public: - UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) : - _from(NULL), _rs(rs), _worker_i(worker_i) { - guarantee(_rs != NULL, "Requires an HRIntoG1RemSet"); - } - - void set_from(HeapRegion* from) { - assert(from != NULL, "from region must be non-NULL"); - _from = from; - } - - virtual void do_oop(narrowOop* p) { - guarantee(false, "NYI"); - } - virtual void do_oop(oop* p) { - assert(_from != NULL, "from region must be non-NULL"); - _rs->par_write_ref(_from, p, _worker_i); - } - // Override: this closure is idempotent. - // bool idempotent() { return true; } - bool apply_to_weak_ref_discovered_field() { return true; } -}; - class UpdateRSOutOfRegionClosure: public HeapRegionClosure { G1CollectedHeap* _g1h; ModRefBarrierSet* _mr_bs; @@ -177,11 +150,19 @@ _cards_scanned(NULL), _total_cards_scanned(0) { _seq_task = new SubTasksDone(NumSeqTasks); - _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, ParallelGCThreads); + guarantee(n_workers() > 0, "There should be some workers"); + _new_refs = NEW_C_HEAP_ARRAY(GrowableArray*, n_workers()); + for (uint i = 0; i < n_workers(); i++) { + _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); + } } HRInto_G1RemSet::~HRInto_G1RemSet() { delete _seq_task; + for (uint i = 0; i < n_workers(); i++) { + delete _new_refs[i]; + } + FREE_C_HEAP_ARRAY(GrowableArray*, _new_refs); } void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) { @@ -281,8 +262,9 @@ if (!_ct_bs->is_card_claimed(card_index) && !_ct_bs->is_card_dirty(card_index)) { assert(_ct_bs->is_card_clean(card_index) || - _ct_bs->is_card_claimed(card_index), - "Card is either dirty, clean, or claimed"); + _ct_bs->is_card_claimed(card_index) || + _ct_bs->is_card_deferred(card_index), + "Card is either clean, claimed or deferred"); if (_ct_bs->claim_card(card_index)) scanCard(card_index, card_region); } @@ -338,14 +320,12 @@ _g1p->record_scan_rs_start_time(worker_i, rs_time_start * 1000.0); _g1p->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); - if (ParallelGCThreads > 0) { - // In this case, we called scanNewRefsRS and recorded the corresponding - // time. - double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); - if (scan_new_refs_time_ms > 0.0) { - closure_app_time_ms += scan_new_refs_time_ms; - } + + double scan_new_refs_time_ms = _g1p->get_scan_new_refs_time(worker_i); + if (scan_new_refs_time_ms > 0.0) { + closure_app_time_ms += scan_new_refs_time_ms; } + _g1p->record_obj_copy_time(worker_i, closure_app_time_ms); } @@ -469,8 +449,8 @@ double scan_new_refs_start_sec = os::elapsedTime(); G1CollectedHeap* g1h = G1CollectedHeap::heap(); CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set()); - while (_new_refs[worker_i]->is_nonempty()) { - oop* p = _new_refs[worker_i]->pop(); + for (int i = 0; i < _new_refs[worker_i]->length(); i++) { + oop* p = _new_refs[worker_i]->at(i); oop obj = *p; // *p was in the collection set when p was pushed on "_new_refs", but // another thread may have processed this location from an RS, so it @@ -480,10 +460,6 @@ HeapRegion* r = g1h->heap_region_containing(p); DEBUG_ONLY(HeapRegion* to = g1h->heap_region_containing(obj)); - assert(ParallelGCThreads > 1 - || to->rem_set()->contains_reference(p), - "Invariant: pushed after being added." - "(Not reliable in parallel code.)"); oc->set_region(r); // If "p" has already been processed concurrently, this is // idempotent. @@ -538,8 +514,8 @@ } } else { assert(worker_i == 0, "invariant"); - updateRS(0); + scanNewRefsRS(oc, 0); scanRS(oc, 0); } } @@ -559,11 +535,7 @@ assert(!_par_traversal_in_progress, "Invariant between iterations."); if (ParallelGCThreads > 0) { set_par_traversal(true); - int n_workers = _g1->workers()->total_workers(); - _seq_task->set_par_threads(n_workers); - for (uint i = 0; i < ParallelGCThreads; i++) - _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray(8192,true); - + _seq_task->set_par_threads((int)n_workers()); if (cg1r->do_traversal()) { updateRS(0); // Have to do this again after updaters @@ -587,6 +559,53 @@ } }; +class UpdateRSetOopsIntoCSImmediate : public OopClosure { + G1CollectedHeap* _g1; +public: + UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + HeapRegion* to = _g1->heap_region_containing(*p); + if (to->in_collection_set()) { + if (to->rem_set()->add_reference(p, 0)) { + _g1->schedule_popular_region_evac(to); + } + } + } +}; + +class UpdateRSetOopsIntoCSDeferred : public OopClosure { + G1CollectedHeap* _g1; + CardTableModRefBS* _ct_bs; + DirtyCardQueue* _dcq; +public: + UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) : + _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { } + virtual void do_oop(narrowOop* p) { + guarantee(false, "NYI"); + } + virtual void do_oop(oop* p) { + oop obj = *p; + if (_g1->obj_in_cs(obj)) { + size_t card_index = _ct_bs->index_for(p); + if (_ct_bs->mark_card_deferred(card_index)) { + _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index)); + } + } + } +}; + +void HRInto_G1RemSet::new_refs_iterate(OopClosure* cl) { + for (size_t i = 0; i < n_workers(); i++) { + for (int j = 0; j < _new_refs[i]->length(); j++) { + oop* p = _new_refs[i]->at(j); + cl->do_oop(p); + } + } +} + void HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do() { guarantee( _cards_scanned != NULL, "invariant" ); _total_cards_scanned = 0; @@ -609,11 +628,25 @@ if (cg1r->do_traversal()) { cg1r->cg1rThread()->set_do_traversal(false); } - for (uint i = 0; i < ParallelGCThreads; i++) { - delete _new_refs[i]; - } set_par_traversal(false); } + + if (_g1->evacuation_failed()) { + // Restore remembered sets for the regions pointing into + // the collection set. + if (G1DeferredRSUpdate) { + DirtyCardQueue dcq(&_g1->dirty_card_queue_set()); + UpdateRSetOopsIntoCSDeferred deferred_update(_g1, &dcq); + new_refs_iterate(&deferred_update); + } else { + UpdateRSetOopsIntoCSImmediate immediate_update(_g1); + new_refs_iterate(&immediate_update); + } + } + for (uint i = 0; i < n_workers(); i++) { + _new_refs[i]->clear(); + } + assert(!_par_traversal_in_progress, "Invariant between iterations."); } @@ -683,7 +716,8 @@ bool doHeapRegion(HeapRegion* r) { if (!r->in_collection_set() && !r->continuesHumongous() && - !r->is_young()) { + !r->is_young() && + !r->is_survivor()) { _update_rs_oop_cl.set_from(r); UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); @@ -820,7 +854,7 @@ // before all the cards on the region are dirtied. This is unlikely, // and it doesn't happen often, but it can happen. So, the extra // check below filters out those cards. - if (r->is_young()) { + if (r->is_young() || r->is_survivor()) { return; } // While we are processing RSet buffers during the collection, we diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/g1RemSet.hpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -155,6 +155,7 @@ bool _par_traversal_in_progress; void set_par_traversal(bool b); GrowableArray** _new_refs; + void new_refs_iterate(OopClosure* cl); public: // This is called to reset dual hash tables after the gc pause @@ -214,3 +215,27 @@ int n() { return _n; }; HeapWord* start_first() { return _start_first; } }; + +class UpdateRSOopClosure: public OopClosure { + HeapRegion* _from; + HRInto_G1RemSet* _rs; + int _worker_i; +public: + UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) : + _from(NULL), _rs(rs), _worker_i(worker_i) { + guarantee(_rs != NULL, "Requires an HRIntoG1RemSet"); + } + + void set_from(HeapRegion* from) { + assert(from != NULL, "from region must be non-NULL"); + _from = from; + } + + virtual void do_oop(narrowOop* p); + virtual void do_oop(oop* p); + + // Override: this closure is idempotent. + // bool idempotent() { return true; } + bool apply_to_weak_ref_discovered_field() { return true; } +}; + diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -31,24 +31,7 @@ } inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) { - oop obj = *p; - assert(from != NULL && from->is_in_reserved(p), - "p is not in a from"); - HeapRegion* to = _g1->heap_region_containing(obj); - if (from != to && to != NULL) { - if (!to->popular() && !from->is_survivor()) { -#if G1_REM_SET_LOGGING - gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" - " for region [" PTR_FORMAT ", " PTR_FORMAT ")", - p, obj, - to->bottom(), to->end()); -#endif - assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); - if (to->rem_set()->add_reference(p)) { - _g1->schedule_popular_region_evac(to); - } - } - } + par_write_ref(from, p, 0); } inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) { @@ -82,7 +65,22 @@ HeapRegion* to = _g1->heap_region_containing(obj); // The test below could be optimized by applying a bit op to to and from. if (to != NULL && from != NULL && from != to) { - if (!to->popular() && !from->is_survivor()) { + bool update_delayed = false; + // There is a tricky infinite loop if we keep pushing + // self forwarding pointers onto our _new_refs list. + // The _par_traversal_in_progress flag is true during the collection pause, + // false during the evacuation failure handing. + if (_par_traversal_in_progress && + to->in_collection_set() && !self_forwarded(obj)) { + _new_refs[tid]->push(p); + // Deferred updates to the Cset are either discarded (in the normal case), + // or processed (if an evacuation failure occurs) at the end + // of the collection. + // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do(). + update_delayed = true; + } + + if (!to->popular() && !update_delayed) { #if G1_REM_SET_LOGGING gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" " for region [" PTR_FORMAT ", " PTR_FORMAT ")", @@ -94,11 +92,14 @@ _g1->schedule_popular_region_evac(to); } } - // There is a tricky infinite loop if we keep pushing - // self forwarding pointers onto our _new_refs list. - if (_par_traversal_in_progress && - to->in_collection_set() && !self_forwarded(obj)) { - _new_refs[tid]->push(p); - } } } + +inline void UpdateRSOopClosure::do_oop(narrowOop* p) { + guarantee(false, "NYI"); +} + +inline void UpdateRSOopClosure::do_oop(oop* p) { + assert(_from != NULL, "from region must be non-NULL"); + _rs->par_write_ref(_from, p, _worker_i); +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -172,6 +172,9 @@ develop(bool, G1RSBarrierUseQueue, true, \ "If true, use queueing RS barrier") \ \ + develop(bool, G1DeferredRSUpdate, true, \ + "If true, use deferred RS updates") \ + \ develop(bool, G1RSLogCheckCardTable, false, \ "If true, verify that no dirty cards remain after RS log " \ "processing.") \ diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/heapRegion.hpp --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -318,7 +318,8 @@ FinalCountClaimValue = 1, NoteEndClaimValue = 2, ScrubRemSetClaimValue = 3, - ParVerifyClaimValue = 4 + ParVerifyClaimValue = 4, + RebuildRSClaimValue = 5 }; // Concurrent refinement requires contiguous heap regions (in which TLABs diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/ptrQueue.cpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -91,15 +91,17 @@ _n_completed_buffers(0), _process_completed_threshold(0), _process_completed(false), _buf_free_list(NULL), _buf_free_list_sz(0) -{} +{ + _fl_owner = this; +} void** PtrQueueSet::allocate_buffer() { assert(_sz > 0, "Didn't set a buffer size."); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - if (_buf_free_list != NULL) { - void** res = _buf_free_list; - _buf_free_list = (void**)_buf_free_list[0]; - _buf_free_list_sz--; + MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); + if (_fl_owner->_buf_free_list != NULL) { + void** res = _fl_owner->_buf_free_list; + _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0]; + _fl_owner->_buf_free_list_sz--; // Just override the next pointer with NULL, just in case we scan this part // of the buffer. res[0] = NULL; @@ -111,10 +113,10 @@ void PtrQueueSet::deallocate_buffer(void** buf) { assert(_sz > 0, "Didn't set a buffer size."); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - buf[0] = (void*)_buf_free_list; - _buf_free_list = buf; - _buf_free_list_sz++; + MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); + buf[0] = (void*)_fl_owner->_buf_free_list; + _fl_owner->_buf_free_list = buf; + _fl_owner->_buf_free_list_sz++; } void PtrQueueSet::reduce_free_list() { @@ -207,3 +209,58 @@ void PtrQueueSet::set_process_completed_threshold(size_t sz) { _process_completed_threshold = sz; } + +// Merge lists of buffers. Notify waiting threads if the length of the list +// exceeds threshold. The source queue is emptied as a result. The queues +// must share the monitor. +void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { + assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_completed_buffers_tail == NULL) { + assert(_completed_buffers_head == NULL, "Well-formedness"); + _completed_buffers_head = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } else { + assert(_completed_buffers_head != NULL, "Well formedness"); + if (src->_completed_buffers_head != NULL) { + _completed_buffers_tail->next = src->_completed_buffers_head; + _completed_buffers_tail = src->_completed_buffers_tail; + } + } + _n_completed_buffers += src->_n_completed_buffers; + + src->_n_completed_buffers = 0; + src->_completed_buffers_head = NULL; + src->_completed_buffers_tail = NULL; + + assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || + _completed_buffers_head != NULL && _completed_buffers_tail != NULL, + "Sanity"); + + if (!_process_completed && + _n_completed_buffers >= _process_completed_threshold) { + _process_completed = true; + if (_notify_when_complete) + _cbl_mon->notify_all(); + } +} + +// Merge free lists of the two queues. The free list of the source +// queue is emptied as a result. The queues must share the same +// mutex that guards free lists. +void PtrQueueSet::merge_freelists(PtrQueueSet* src) { + assert(_fl_lock == src->_fl_lock, "Should share the same lock"); + MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); + if (_buf_free_list != NULL) { + void **p = _buf_free_list; + while (*p != NULL) { + p = (void**)*p; + } + *p = src->_buf_free_list; + } else { + _buf_free_list = src->_buf_free_list; + } + _buf_free_list_sz += src->_buf_free_list_sz; + src->_buf_free_list = NULL; + src->_buf_free_list_sz = 0; +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/ptrQueue.hpp --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -155,6 +155,9 @@ Mutex* _fl_lock; void** _buf_free_list; size_t _buf_free_list_sz; + // Queue set can share a freelist. The _fl_owner variable + // specifies the owner. It is set to "this" by default. + PtrQueueSet* _fl_owner; // The size of all buffers in the set. size_t _sz; @@ -188,10 +191,13 @@ // Because of init-order concerns, we can't pass these as constructor // arguments. void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0) { + int max_completed_queue = 0, + PtrQueueSet *fl_owner = NULL) { _max_completed_queue = max_completed_queue; assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); - _cbl_mon = cbl_mon; _fl_lock = fl_lock; + _cbl_mon = cbl_mon; + _fl_lock = fl_lock; + _fl_owner = (fl_owner != NULL) ? fl_owner : this; } // Return an empty oop array of size _sz (required to be non-zero). @@ -228,4 +234,7 @@ void reduce_free_list(); size_t completed_buffers_num() { return _n_completed_buffers; } + + void merge_bufferlists(PtrQueueSet* src); + void merge_freelists(PtrQueueSet* src); }; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/sparsePRT.cpp --- a/src/share/vm/gc_implementation/g1/sparsePRT.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -504,6 +504,7 @@ // Make sure that the current and next tables agree. (Another mechanism // takes care of deleting now-unused tables.) _cur = _next; + set_expanded(false); } void SparsePRT::expand() { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/g1/sparsePRT.hpp --- a/src/share/vm/gc_implementation/g1/sparsePRT.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/sparsePRT.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -274,7 +274,7 @@ // Clean up all tables on the expanded list. Called single threaded. static void cleanup_all(); - RSHashTable* next() const { return _next; } + RSHashTable* cur() const { return _cur; } void init_iterator(SparsePRTIter* sprt_iter); @@ -300,7 +300,7 @@ {} void init(const SparsePRT* sprt) { - RSHashTableIter::init(sprt->next()); + RSHashTableIter::init(sprt->cur()); } bool has_next(size_t& card_index) { return RSHashTableIter::has_next(card_index); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -78,7 +78,7 @@ } // Card marks are not precise. The current system can leave us with - // a mismash of precise marks and begining of object marks. This means + // a mismash of precise marks and beginning of object marks. This means // we test for missing precise marks first. If any are found, we don't // fail unless the object head is also unmarked. virtual void do_object(oop obj) { @@ -258,7 +258,7 @@ if (!start_array->object_starts_in_range(slice_start, slice_end)) { continue; } - // Update our begining addr + // Update our beginning addr HeapWord* first_object = start_array->object_start(slice_start); debug_only(oop* first_object_within_slice = (oop*) first_object;) if (first_object < slice_start) { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/parallelScavenge/objectStartArray.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/objectStartArray.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -127,7 +127,7 @@ // Optimized for finding the first object that crosses into // a given block. The blocks contain the offset of the last // object in that block. Scroll backwards by one, and the first - // object hit should be at the begining of the block + // object hit should be at the beginning of the block HeapWord* object_start(HeapWord* addr) const { assert(_covered_region.contains(addr), "Must be in covered region"); jbyte* block = block_for_addr(addr); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/prefetchQueue.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -26,7 +26,7 @@ // PrefetchQueue is a FIFO queue of variable length (currently 8). // // We need to examine the performance penalty of variable lengths. -// We may also want to split this into cpu dependant bits. +// We may also want to split this into cpu dependent bits. // const int PREFETCH_QUEUE_SIZE = 8; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -125,6 +125,8 @@ perm_gen->verify_object_start_array(); } + heap->pre_full_gc_dump(); + // Filled in below to track the state of the young gen after the collection. bool eden_empty; bool survivors_empty; @@ -363,6 +365,8 @@ Universe::print_heap_after_gc(); } + heap->post_full_gc_dump(); + #ifdef TRACESPINNING ParallelTaskTerminator::print_termination_counts(); #endif diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1982,6 +1982,8 @@ heap->record_gen_tops_before_GC(); } + heap->pre_full_gc_dump(); + _print_phases = PrintGCDetails && PrintParallelOldGCPhaseTimes; // Make sure data structures are sane, make the heap parsable, and do other @@ -2204,6 +2206,8 @@ gc_task_manager()->print_task_time_stamps(); } + heap->post_full_gc_dump(); + #ifdef TRACESPINNING ParallelTaskTerminator::print_termination_counts(); #endif diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp --- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -74,7 +74,7 @@ for (int i = 0; i < lgrp_spaces()->length(); i++) { LGRPSpace *ls = lgrp_spaces()->at(i); MutableSpace *s = ls->space(); - if (s->top() < top()) { // For all spaces preceeding the one containing top() + if (s->top() < top()) { // For all spaces preceding the one containing top() if (s->free_in_words() > 0) { size_t area_touched_words = pointer_delta(s->end(), s->top()); CollectedHeap::fill_with_object(s->top(), area_touched_words); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/shared/vmGCOperations.cpp --- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -121,7 +121,7 @@ // make the heap parsable (no need to retire TLABs) ch->ensure_parsability(false); } - HeapInspection::heap_inspection(_out); + HeapInspection::heap_inspection(_out, _need_prologue /* need_prologue */); } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_implementation/shared/vmGCOperations.hpp --- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -112,13 +112,16 @@ private: outputStream* _out; bool _full_gc; + bool _need_prologue; public: - VM_GC_HeapInspection(outputStream* out, bool request_full_gc) : + VM_GC_HeapInspection(outputStream* out, bool request_full_gc, + bool need_prologue) : VM_GC_Operation(0 /* total collections, dummy, ignored */, 0 /* total full collections, dummy, ignored */, request_full_gc) { _out = out; _full_gc = request_full_gc; + _need_prologue = need_prologue; } ~VM_GC_HeapInspection() {} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_interface/collectedHeap.cpp --- a/src/share/vm/gc_interface/collectedHeap.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -294,3 +294,29 @@ ThreadLocalAllocBuffer::resize_all_tlabs(); } } + +void CollectedHeap::pre_full_gc_dump() { + if (HeapDumpBeforeFullGC) { + TraceTime tt("Heap Dump: ", PrintGCDetails, false, gclog_or_tty); + // We are doing a "major" collection and a heap dump before + // major collection has been requested. + HeapDumper::dump_heap(); + } + if (PrintClassHistogramBeforeFullGC) { + TraceTime tt("Class Histogram: ", PrintGCDetails, true, gclog_or_tty); + VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */); + inspector.doit(); + } +} + +void CollectedHeap::post_full_gc_dump() { + if (HeapDumpAfterFullGC) { + TraceTime tt("Heap Dump", PrintGCDetails, false, gclog_or_tty); + HeapDumper::dump_heap(); + } + if (PrintClassHistogramAfterFullGC) { + TraceTime tt("Class Histogram", PrintGCDetails, true, gclog_or_tty); + VM_GC_HeapInspection inspector(gclog_or_tty, false /* ! full gc */, false /* ! prologue */); + inspector.doit(); + } +} diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/gc_interface/collectedHeap.hpp --- a/src/share/vm/gc_interface/collectedHeap.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/gc_interface/collectedHeap.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -514,6 +514,10 @@ // Perform any cleanup actions necessary before allowing a verification. virtual void prepare_for_verify() = 0; + // Generate any dumps preceding or following a full gc + void pre_full_gc_dump(); + void post_full_gc_dump(); + virtual void print() const = 0; virtual void print_on(outputStream* st) const = 0; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/includeDB_core --- a/src/share/vm/includeDB_core Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/includeDB_core Thu Mar 12 18:16:36 2009 -0700 @@ -176,7 +176,7 @@ arguments.cpp oop.inline.hpp arguments.cpp os_.inline.hpp arguments.cpp universe.inline.hpp -arguments.cpp vm_version_.hpp +arguments.cpp vm_version_.hpp arguments.hpp java.hpp arguments.hpp perfData.hpp @@ -241,7 +241,7 @@ assembler.hpp register_.hpp assembler.hpp relocInfo.hpp assembler.hpp top.hpp -assembler.hpp vm_version_.hpp +assembler.hpp vm_version_.hpp assembler.inline.hpp assembler.hpp assembler.inline.hpp codeBuffer.hpp @@ -280,7 +280,7 @@ atomic_.inline.hpp atomic.hpp atomic_.inline.hpp os.hpp -atomic_.inline.hpp vm_version_.hpp +atomic_.inline.hpp vm_version_.hpp // attachListener is jck optional, put cpp deps in includeDB_features @@ -474,6 +474,7 @@ cardTableModRefBS.cpp mutexLocker.hpp cardTableModRefBS.cpp sharedHeap.hpp cardTableModRefBS.cpp space.hpp +cardTableModRefBS.cpp space.inline.hpp cardTableModRefBS.cpp universe.hpp cardTableModRefBS.cpp virtualspace.hpp @@ -2094,6 +2095,7 @@ interp_masm_.cpp interpreterRuntime.hpp interp_masm_.cpp interpreter.hpp interp_masm_.cpp jvmtiExport.hpp +interp_masm_.cpp jvmtiRedefineClassesTrace.hpp interp_masm_.cpp jvmtiThreadState.hpp interp_masm_.cpp markOop.hpp interp_masm_.cpp methodDataOop.hpp @@ -2176,7 +2178,7 @@ interpreterRuntime.cpp threadCritical.hpp interpreterRuntime.cpp universe.inline.hpp interpreterRuntime.cpp vmSymbols.hpp -interpreterRuntime.cpp vm_version_.hpp +interpreterRuntime.cpp vm_version_.hpp interpreterRuntime.hpp bytecode.hpp interpreterRuntime.hpp frame.inline.hpp @@ -2279,7 +2281,7 @@ java.cpp universe.hpp java.cpp vmError.hpp java.cpp vm_operations.hpp -java.cpp vm_version_.hpp +java.cpp vm_version_.hpp java.cpp vtune.hpp java.hpp os.hpp @@ -3485,7 +3487,7 @@ register_.cpp register_.hpp register_.hpp register.hpp -register_.hpp vm_version_.hpp +register_.hpp vm_version_.hpp registerMap.hpp globalDefinitions.hpp registerMap.hpp register_.hpp @@ -3670,6 +3672,7 @@ sharedRuntime.cpp interpreter.hpp sharedRuntime.cpp javaCalls.hpp sharedRuntime.cpp jvmtiExport.hpp +sharedRuntime.cpp jvmtiRedefineClassesTrace.hpp sharedRuntime.cpp nativeInst_.hpp sharedRuntime.cpp nativeLookup.hpp sharedRuntime.cpp oop.inline.hpp @@ -3699,6 +3702,7 @@ sharedRuntime_.cpp debugInfoRec.hpp sharedRuntime_.cpp icBuffer.hpp sharedRuntime_.cpp interpreter.hpp +sharedRuntime_.cpp jvmtiRedefineClassesTrace.hpp sharedRuntime_.cpp sharedRuntime.hpp sharedRuntime_.cpp vframeArray.hpp sharedRuntime_.cpp vmreg_.inline.hpp @@ -3835,7 +3839,7 @@ statSampler.cpp statSampler.hpp statSampler.cpp systemDictionary.hpp statSampler.cpp vmSymbols.hpp -statSampler.cpp vm_version_.hpp +statSampler.cpp vm_version_.hpp statSampler.hpp perfData.hpp statSampler.hpp task.hpp @@ -4579,22 +4583,22 @@ vm_version.cpp arguments.hpp vm_version.cpp oop.inline.hpp vm_version.cpp universe.hpp -vm_version.cpp vm_version_.hpp +vm_version.cpp vm_version_.hpp vm_version.hpp allocation.hpp vm_version.hpp ostream.hpp -vm_version_.cpp assembler_.inline.hpp -vm_version_.cpp java.hpp -vm_version_.cpp os_.inline.hpp -vm_version_.cpp resourceArea.hpp -vm_version_.cpp stubCodeGenerator.hpp -vm_version_.cpp vm_version_.hpp - -vm_version_.hpp globals_extension.hpp -vm_version_.hpp vm_version.hpp - -vm_version_.cpp vm_version_.hpp +vm_version_.cpp assembler_.inline.hpp +vm_version_.cpp java.hpp +vm_version_.cpp os_.inline.hpp +vm_version_.cpp resourceArea.hpp +vm_version_.cpp stubCodeGenerator.hpp +vm_version_.cpp vm_version_.hpp + +vm_version_.hpp globals_extension.hpp +vm_version_.hpp vm_version.hpp + +vm_version_.cpp vm_version_.hpp vmreg.cpp assembler.hpp vmreg.cpp vmreg.hpp diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/includeDB_gc --- a/src/share/vm/includeDB_gc Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/includeDB_gc Thu Mar 12 18:16:36 2009 -0700 @@ -26,10 +26,12 @@ collectedHeap.cpp collectedHeap.hpp collectedHeap.cpp collectedHeap.inline.hpp +collectedHeap.cpp heapDumper.hpp collectedHeap.cpp init.hpp collectedHeap.cpp oop.inline.hpp collectedHeap.cpp systemDictionary.hpp collectedHeap.cpp thread_.inline.hpp +collectedHeap.cpp vmGCOperations.hpp collectedHeap.hpp allocation.hpp collectedHeap.hpp barrierSet.hpp diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/interpreter/abstractInterpreter.hpp --- a/src/share/vm/interpreter/abstractInterpreter.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/interpreter/abstractInterpreter.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -22,7 +22,7 @@ * */ -// This file contains the platform-independant parts +// This file contains the platform-independent parts // of the abstract interpreter and the abstract interpreter generator. // Organization of the interpreter(s). There exists two different interpreters in hotpot diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/interpreter/bytecodeInterpreter.cpp --- a/src/share/vm/interpreter/bytecodeInterpreter.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -2642,7 +2642,7 @@ // two interpreted frames). We need to save the current arguments in C heap so that // the deoptimized frame when it restarts can copy the arguments to its expression // stack and re-execute the call. We also have to notify deoptimization that this - // has occured and to pick the preerved args copy them to the deoptimized frame's + // has occurred and to pick the preserved args copy them to the deoptimized frame's // java expression stack. Yuck. // THREAD->popframe_preserve_args(in_ByteSize(METHOD->size_of_parameters() * wordSize), diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/interpreter/bytecodeInterpreter.inline.hpp --- a/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -22,7 +22,7 @@ * */ -// This file holds platform-independant bodies of inline functions for the C++ based interpreter +// This file holds platform-independent bodies of inline functions for the C++ based interpreter #ifdef CC_INTERP diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/interpreter/cppInterpreter.hpp --- a/src/share/vm/interpreter/cppInterpreter.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/interpreter/cppInterpreter.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -24,7 +24,7 @@ #ifdef CC_INTERP -// This file contains the platform-independant parts +// This file contains the platform-independent parts // of the c++ interpreter class CppInterpreter: public AbstractInterpreter { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/interpreter/cppInterpreterGenerator.hpp --- a/src/share/vm/interpreter/cppInterpreterGenerator.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/interpreter/cppInterpreterGenerator.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -22,7 +22,7 @@ * */ -// This file contains the platform-independant parts +// This file contains the platform-independent parts // of the template interpreter generator. #ifdef CC_INTERP diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/interpreter/interpreter.hpp --- a/src/share/vm/interpreter/interpreter.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/interpreter/interpreter.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -22,7 +22,7 @@ * */ -// This file contains the platform-independant parts +// This file contains the platform-independent parts // of the interpreter and the interpreter generator. //------------------------------------------------------------------------------------------------------------------------ diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/interpreter/interpreterGenerator.hpp --- a/src/share/vm/interpreter/interpreterGenerator.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/interpreter/interpreterGenerator.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -22,7 +22,7 @@ * */ -// This file contains the platform-independant parts +// This file contains the platform-independent parts // of the interpreter generator. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/interpreter/templateInterpreter.hpp --- a/src/share/vm/interpreter/templateInterpreter.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/interpreter/templateInterpreter.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -22,7 +22,7 @@ * */ -// This file contains the platform-independant parts +// This file contains the platform-independent parts // of the template interpreter and the template interpreter generator. #ifndef CC_INTERP diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/interpreter/templateInterpreterGenerator.hpp --- a/src/share/vm/interpreter/templateInterpreterGenerator.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/interpreter/templateInterpreterGenerator.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -22,7 +22,7 @@ * */ -// This file contains the platform-independant parts +// This file contains the platform-independent parts // of the template interpreter generator. #ifndef CC_INTERP diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/libadt/dict.cpp --- a/src/share/vm/libadt/dict.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/libadt/dict.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -306,7 +306,7 @@ // Convert string to hash key. This algorithm implements a universal hash // function with the multipliers frozen (ok, so it's not universal). The // multipliers (and allowable characters) are all odd, so the resultant sum -// is odd - guarenteed not divisible by any power of two, so the hash tables +// is odd - guaranteed not divisible by any power of two, so the hash tables // can be any power of two with good results. Also, I choose multipliers // that have only 2 bits set (the low is always set to be odd) so // multiplication requires only shifts and adds. Characters are required to @@ -326,7 +326,7 @@ } //------------------------------hashptr-------------------------------------- -// Slimey cheap hash function; no guarenteed performance. Better than the +// Slimey cheap hash function; no guaranteed performance. Better than the // default for pointers, especially on MS-DOS machines. int hashptr(const void *key) { #ifdef __TURBOC__ @@ -336,7 +336,7 @@ #endif } -// Slimey cheap hash function; no guarenteed performance. +// Slimey cheap hash function; no guaranteed performance. int hashkey(const void *key) { return (intptr_t)key; } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/libadt/dict.hpp --- a/src/share/vm/libadt/dict.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/libadt/dict.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -86,10 +86,10 @@ // Hashing functions int hashstr(const void *s); // Nice string hash -// Slimey cheap hash function; no guarenteed performance. Better than the +// Slimey cheap hash function; no guaranteed performance. Better than the // default for pointers, especially on MS-DOS machines. int hashptr(const void *key); -// Slimey cheap hash function; no guarenteed performance. +// Slimey cheap hash function; no guaranteed performance. int hashkey(const void *key); // Key comparators diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/memory/cardTableModRefBS.cpp --- a/src/share/vm/memory/cardTableModRefBS.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/memory/cardTableModRefBS.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -217,15 +217,28 @@ (HeapWord*) align_size_up((uintptr_t)new_end, _page_size); assert(new_end_aligned >= (HeapWord*) new_end, "align up, but less"); + // Check the other regions (excludes "ind") to ensure that + // the new_end_aligned does not intrude onto the committed + // space of another region. int ri = 0; for (ri = 0; ri < _cur_covered_regions; ri++) { if (ri != ind) { if (_committed[ri].contains(new_end_aligned)) { - assert((new_end_aligned >= _committed[ri].start()) && - (_committed[ri].start() > _committed[ind].start()), + // The prior check included in the assert + // (new_end_aligned >= _committed[ri].start()) + // is redundant with the "contains" test. + // Any region containing the new end + // should start at or beyond the region found (ind) + // for the new end (committed regions are not expected to + // be proper subsets of other committed regions). + assert(_committed[ri].start() >= _committed[ind].start(), "New end of committed region is inconsistent"); new_end_aligned = _committed[ri].start(); - assert(new_end_aligned > _committed[ind].start(), + // new_end_aligned can be equal to the start of its + // committed region (i.e., of "ind") if a second + // region following "ind" also start at the same location + // as "ind". + assert(new_end_aligned >= _committed[ind].start(), "New end of committed region is before start"); debug_only(collided = true;) // Should only collide with 1 region @@ -343,18 +356,62 @@ inline_write_ref_field(field, newVal); } +/* + Claimed and deferred bits are used together in G1 during the evacuation + pause. These bits can have the following state transitions: + 1. The claimed bit can be put over any other card state. Except that + the "dirty -> dirty and claimed" transition is checked for in + G1 code and is not used. + 2. Deferred bit can be set only if the previous state of the card + was either clean or claimed. mark_card_deferred() is wait-free. + We do not care if the operation is be successful because if + it does not it will only result in duplicate entry in the update + buffer because of the "cache-miss". So it's not worth spinning. + */ + bool CardTableModRefBS::claim_card(size_t card_index) { jbyte val = _byte_map[card_index]; - if (val != claimed_card_val()) { - jbyte res = Atomic::cmpxchg((jbyte) claimed_card_val(), &_byte_map[card_index], val); - if (res == val) + assert(val != dirty_card_val(), "Shouldn't claim a dirty card"); + while (val == clean_card_val() || + (val & (clean_card_mask_val() | claimed_card_val())) != claimed_card_val()) { + jbyte new_val = val; + if (val == clean_card_val()) { + new_val = (jbyte)claimed_card_val(); + } else { + new_val = val | (jbyte)claimed_card_val(); + } + jbyte res = Atomic::cmpxchg(new_val, &_byte_map[card_index], val); + if (res == val) { return true; - else return false; + } + val = res; } return false; } +bool CardTableModRefBS::mark_card_deferred(size_t card_index) { + jbyte val = _byte_map[card_index]; + // It's already processed + if ((val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val()) { + return false; + } + // Cached bit can be installed either on a clean card or on a claimed card. + jbyte new_val = val; + if (val == clean_card_val()) { + new_val = (jbyte)deferred_card_val(); + } else { + if (val & claimed_card_val()) { + new_val = val | (jbyte)deferred_card_val(); + } + } + if (new_val != val) { + Atomic::cmpxchg(new_val, &_byte_map[card_index], val); + } + return true; +} + + void CardTableModRefBS::non_clean_card_iterate(Space* sp, MemRegion mr, DirtyCardToOopClosure* dcto_cl, diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/memory/cardTableModRefBS.hpp --- a/src/share/vm/memory/cardTableModRefBS.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/memory/cardTableModRefBS.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -52,11 +52,15 @@ enum CardValues { clean_card = -1, + // The mask contains zeros in places for all other values. + clean_card_mask = clean_card - 31, + dirty_card = 0, precleaned_card = 1, - claimed_card = 3, - last_card = 4, - CT_MR_BS_last_reserved = 10 + claimed_card = 2, + deferred_card = 4, + last_card = 8, + CT_MR_BS_last_reserved = 16 }; // dirty and precleaned are equivalent wrt younger_refs_iter. @@ -254,9 +258,11 @@ }; static int clean_card_val() { return clean_card; } + static int clean_card_mask_val() { return clean_card_mask; } static int dirty_card_val() { return dirty_card; } static int claimed_card_val() { return claimed_card; } static int precleaned_card_val() { return precleaned_card; } + static int deferred_card_val() { return deferred_card; } // For RTTI simulation. bool is_a(BarrierSet::Name bsn) { @@ -329,7 +335,8 @@ } bool is_card_claimed(size_t card_index) { - return _byte_map[card_index] == claimed_card_val(); + jbyte val = _byte_map[card_index]; + return (val & (clean_card_mask_val() | claimed_card_val())) == claimed_card_val(); } bool claim_card(size_t card_index); @@ -338,6 +345,13 @@ return _byte_map[card_index] == clean_card_val(); } + bool is_card_deferred(size_t card_index) { + jbyte val = _byte_map[card_index]; + return (val & (clean_card_mask_val() | deferred_card_val())) == deferred_card_val(); + } + + bool mark_card_deferred(size_t card_index); + // Card marking array base (adjusted for heap low boundary) // This would be the 0th element of _byte_map, if the heap started at 0x0. // But since the heap starts at some higher address, this points to somewhere @@ -434,6 +448,10 @@ return byte_for(p) - _byte_map; } + const jbyte* byte_for_index(const size_t card_index) const { + return _byte_map + card_index; + } + void verify(); void verify_guard(); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/memory/filemap.cpp --- a/src/share/vm/memory/filemap.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/memory/filemap.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -35,14 +35,14 @@ extern address JVM_FunctionAtStart(); extern address JVM_FunctionAtEnd(); -// Complain and stop. All error conditions occuring during the writing of +// Complain and stop. All error conditions occurring during the writing of // an archive file should stop the process. Unrecoverable errors during // the reading of the archive file should stop the process. static void fail(const char *msg, va_list ap) { // This occurs very early during initialization: tty is not initialized. jio_fprintf(defaultStream::error_stream(), - "An error has occured while processing the" + "An error has occurred while processing the" " shared archive file.\n"); jio_vfprintf(defaultStream::error_stream(), msg, ap); jio_fprintf(defaultStream::error_stream(), "\n"); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/memory/genCollectedHeap.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -456,6 +456,9 @@ int max_level_collected = starting_level; for (int i = starting_level; i <= max_level; i++) { if (_gens[i]->should_collect(full, size, is_tlab)) { + if (i == n_gens() - 1) { // a major collection is to happen + pre_full_gc_dump(); // do any pre full gc dumps + } // Timer for individual generations. Last argument is false: no CR TraceTime t1(_gens[i]->short_name(), PrintGCDetails, false, gclog_or_tty); TraceCollectorStats tcs(_gens[i]->counters()); @@ -573,6 +576,10 @@ // a whole heap collection. complete = complete || (max_level_collected == n_gens() - 1); + if (complete) { // We did a "major" collection + post_full_gc_dump(); // do any post full gc dumps + } + if (PrintGCDetails) { print_heap_change(gch_prev_used); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/memory/heapInspection.cpp --- a/src/share/vm/memory/heapInspection.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/memory/heapInspection.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -233,7 +233,7 @@ size_t missed_count() { return _missed_count; } }; -void HeapInspection::heap_inspection(outputStream* st) { +void HeapInspection::heap_inspection(outputStream* st, bool need_prologue) { ResourceMark rm; HeapWord* ref; @@ -244,7 +244,9 @@ case CollectedHeap::GenCollectedHeap: { is_shared_heap = true; SharedHeap* sh = (SharedHeap*)heap; - sh->gc_prologue(false /* !full */); // get any necessary locks, etc. + if (need_prologue) { + sh->gc_prologue(false /* !full */); // get any necessary locks, etc. + } ref = sh->perm_gen()->used_region().start(); break; } @@ -290,7 +292,7 @@ } st->flush(); - if (is_shared_heap) { + if (need_prologue && is_shared_heap) { SharedHeap* sh = (SharedHeap*)heap; sh->gc_epilogue(false /* !full */); // release all acquired locks, etc. } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/memory/heapInspection.hpp --- a/src/share/vm/memory/heapInspection.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/memory/heapInspection.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -127,6 +127,6 @@ class HeapInspection : public AllStatic { public: - static void heap_inspection(outputStream* st) KERNEL_RETURN; + static void heap_inspection(outputStream* st, bool need_prologue) KERNEL_RETURN; static void find_instances_at_safepoint(klassOop k, GrowableArray* result) KERNEL_RETURN; }; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/memory/permGen.hpp --- a/src/share/vm/memory/permGen.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/memory/permGen.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -36,7 +36,7 @@ friend class VMStructs; protected: size_t _capacity_expansion_limit; // maximum expansion allowed without a - // full gc occuring + // full gc occurring HeapWord* mem_allocate_in_gen(size_t size, Generation* gen); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/oops/generateOopMap.cpp --- a/src/share/vm/oops/generateOopMap.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/oops/generateOopMap.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -2003,7 +2003,7 @@ // ============ Main Entry Point =========== // GenerateOopMap::GenerateOopMap(methodHandle method) { - // We have to initialize all variables here, that can be queried direcly + // We have to initialize all variables here, that can be queried directly _method = method; _max_locals=0; _init_vars = NULL; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/oops/generateOopMap.hpp --- a/src/share/vm/oops/generateOopMap.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/oops/generateOopMap.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -292,7 +292,7 @@ int _max_stack; // Cached value of max. stack depth int _max_monitors; // Cached value of max. monitor stack depth int _has_exceptions; // True, if exceptions exist for method - bool _got_error; // True, if an error occured during interpretation. + bool _got_error; // True, if an error occurred during interpretation. Handle _exception; // Exception if got_error is true. bool _did_rewriting; // was bytecodes rewritten bool _did_relocation; // was relocation neccessary @@ -422,7 +422,7 @@ void add_to_ref_init_set (int localNo); // Conflicts rewrite logic - bool _conflict; // True, if a conflict occured during interpretation + bool _conflict; // True, if a conflict occurred during interpretation int _nof_refval_conflicts; // No. of conflicts that require rewrites int * _new_var_map; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/oops/instanceKlass.cpp --- a/src/share/vm/oops/instanceKlass.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/oops/instanceKlass.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1917,7 +1917,7 @@ / itableOffsetEntry::size(); for (int cnt = 0 ; ; cnt ++, ioe ++) { - // If the interface isn't implemented by the reciever class, + // If the interface isn't implemented by the receiver class, // the VM should throw IncompatibleClassChangeError. if (cnt >= nof_interfaces) { THROW_OOP_0(vmSymbols::java_lang_IncompatibleClassChangeError()); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/oops/klass.cpp --- a/src/share/vm/oops/klass.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/oops/klass.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -71,7 +71,7 @@ return r; // Return the 1 concrete class } -// Find LCA in class heirarchy +// Find LCA in class hierarchy Klass *Klass::LCA( Klass *k2 ) { Klass *k1 = this; while( 1 ) { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/oops/klass.hpp --- a/src/share/vm/oops/klass.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/oops/klass.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -471,7 +471,7 @@ } bool search_secondary_supers(klassOop k) const; - // Find LCA in class heirarchy + // Find LCA in class hierarchy Klass *LCA( Klass *k ); // Check whether reflection/jni/jvm code is allowed to instantiate this class; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/oops/klassVtable.cpp --- a/src/share/vm/oops/klassVtable.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/oops/klassVtable.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -992,6 +992,10 @@ methodOop new_method = new_methods[j]; itableMethodEntry* ime = method_entry(0); + // The itable can describe more than one interface and the same + // method signature can be specified by more than one interface. + // This means we have to do an exhaustive search to find all the + // old_method references. for (int i = 0; i < _size_method_table; i++) { if (ime->method() == old_method) { ime->initialize(new_method); @@ -1008,7 +1012,6 @@ new_method->name()->as_C_string(), new_method->signature()->as_C_string())); } - break; } ime++; } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/oops/methodOop.hpp --- a/src/share/vm/oops/methodOop.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/oops/methodOop.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -296,7 +296,7 @@ void set_compiled_invocation_count(int count) { _compiled_invocation_count = count; } #endif // not PRODUCT - // Clear (non-shared space) pointers which could not be relevent + // Clear (non-shared space) pointers which could not be relevant // if this (shared) method were mapped into another JVM. void remove_unshareable_info(); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/block.cpp --- a/src/share/vm/opto/block.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/block.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -181,7 +181,7 @@ } //------------------------------has_uncommon_code------------------------------ -// Return true if the block's code implies that it is not likely to be +// Return true if the block's code implies that it is likely to be // executed infrequently. Check to see if the block ends in a Halt or // a low probability call. bool Block::has_uncommon_code() const { @@ -909,6 +909,10 @@ !(n->jvms() != NULL && n->jvms()->is_monitor_use(k)) ) { assert( b->find_node(def) < j, "uses must follow definitions" ); } + if( def->is_SafePointScalarObject() ) { + assert(_bbs[def->_idx] == b, "SafePointScalarObject Node should be at the same block as its SafePoint node"); + assert(_bbs[def->_idx] == _bbs[def->in(0)->_idx], "SafePointScalarObject Node should be at the same block as its control edge"); + } } } } @@ -1307,7 +1311,7 @@ } } else if (e->state() == CFGEdge::open) { // Append traces, even without a fall-thru connection. - // But leave root entry at the begining of the block list. + // But leave root entry at the beginning of the block list. if (targ_trace != trace(_cfg._broot)) { e->set_state(CFGEdge::connected); src_trace->append(targ_trace); @@ -1430,7 +1434,7 @@ } // Backbranch to the top of a trace - // Scroll foward through the trace from the targ_block. If we find + // Scroll forward through the trace from the targ_block. If we find // a loop head before another loop top, use the the loop head alignment. for (Block *b = targ_block; b != NULL; b = next(b)) { if (b->has_loop_alignment()) { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/block.hpp --- a/src/share/vm/opto/block.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/block.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -347,6 +347,8 @@ // Helper function to insert a node into a block void schedule_node_into_block( Node *n, Block *b ); + void replace_block_proj_ctrl( Node *n ); + // Set the basic block for pinned Nodes void schedule_pinned_nodes( VectorSet &visited ); @@ -607,7 +609,7 @@ Block * next(Block *b) const { return _next_list[b->_pre_order]; } void set_next(Block *b, Block *n) const { _next_list[b->_pre_order] = n; } - // Return the block that preceeds "b" in the trace. + // Return the block that precedes "b" in the trace. Block * prev(Block *b) const { return _prev_list[b->_pre_order]; } void set_prev(Block *b, Block *p) const { _prev_list[b->_pre_order] = p; } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/buildOopMap.cpp --- a/src/share/vm/opto/buildOopMap.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/buildOopMap.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -55,7 +55,7 @@ // breadth-first approach but it was worse (showed O(n^2) in the // pick-next-block code). // -// The relevent data is kept in a struct of arrays (it could just as well be +// The relevant data is kept in a struct of arrays (it could just as well be // an array of structs, but the struct-of-arrays is generally a little more // efficient). The arrays are indexed by register number (including // stack-slots as registers) and so is bounded by 200 to 300 elements in diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/callnode.cpp --- a/src/share/vm/opto/callnode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/callnode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -975,6 +975,7 @@ } bool SafePointScalarObjectNode::pinned() const { return true; } +bool SafePointScalarObjectNode::depends_only_on_test() const { return false; } uint SafePointScalarObjectNode::ideal_reg() const { return 0; // No matching to machine instruction diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/callnode.hpp --- a/src/share/vm/opto/callnode.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/callnode.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -437,6 +437,10 @@ // of the SafePoint node for which it was generated. virtual bool pinned() const; // { return true; } + // SafePointScalarObject depends on the SafePoint node + // for which it was generated. + virtual bool depends_only_on_test() const; // { return false; } + virtual uint size_of() const { return sizeof(*this); } // Assumes that "this" is an argument to a safepoint node "s", and that diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/cfgnode.cpp --- a/src/share/vm/opto/cfgnode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/cfgnode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1350,7 +1350,7 @@ } // Register the new node but do not transform it. Cannot transform until the - // entire Region/Phi conglerate has been hacked as a single huge transform. + // entire Region/Phi conglomerate has been hacked as a single huge transform. igvn->register_new_node_with_optimizer( newn ); // Now I can point to the new node. n->add_req(newn); @@ -1381,7 +1381,7 @@ Node *val = phi->in(i); // Constant to split for uint hit = 0; // Number of times it occurs - for( ; i < phi->req(); i++ ){ // Count occurances of constant + for( ; i < phi->req(); i++ ){ // Count occurrences of constant Node *n = phi->in(i); if( !n ) return NULL; if( phase->type(n) == Type::TOP ) return NULL; @@ -1423,7 +1423,7 @@ //============================================================================= //------------------------------simple_data_loop_check------------------------- -// Try to determing if the phi node in a simple safe/unsafe data loop. +// Try to determining if the phi node in a simple safe/unsafe data loop. // Returns: // enum LoopSafety { Safe = 0, Unsafe, UnsafeLoop }; // Safe - safe case when the phi and it's inputs reference only safe data @@ -1687,7 +1687,7 @@ progress = phase->C->top(); break; } - // If tranformed to a MergeMem, get the desired slice + // If transformed to a MergeMem, get the desired slice // Otherwise the returned node represents memory for every slice Node *new_mem = (m->is_MergeMem()) ? m->as_MergeMem()->memory_at(alias_idx) : m; @@ -1962,7 +1962,7 @@ f[CatchProjNode::fall_through_index] = Type::TOP; } else if( call->req() > TypeFunc::Parms ) { const Type *arg0 = phase->type( call->in(TypeFunc::Parms) ); - // Check for null reciever to virtual or interface calls + // Check for null receiver to virtual or interface calls if( call->is_CallDynamicJava() && arg0->higher_equal(TypePtr::NULL_PTR) ) { f[CatchProjNode::fall_through_index] = Type::TOP; @@ -1995,7 +1995,7 @@ // also remove any exception table entry. Thus we must know the call // feeding the Catch will not really throw an exception. This is ok for // the main fall-thru control (happens when we know a call can never throw - // an exception) or for "rethrow", because a further optimnization will + // an exception) or for "rethrow", because a further optimization will // yank the rethrow (happens when we inline a function that can throw an // exception and the caller has no handler). Not legal, e.g., for passing // a NULL receiver to a v-call, or passing bad types to a slow-check-cast. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/chaitin.cpp --- a/src/share/vm/opto/chaitin.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/chaitin.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1246,7 +1246,7 @@ // If the live range is not bound, then we actually had some choices // to make. In this case, the mask has more bits in it than the colors - // choosen. Restrict the mask to just what was picked. + // chosen. Restrict the mask to just what was picked. if( lrg->num_regs() == 1 ) { // Size 1 live range lrg->Clear(); // Clear the mask lrg->Insert(reg); // Set regmask to match selected reg diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/chaitin.hpp --- a/src/share/vm/opto/chaitin.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/chaitin.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -327,7 +327,7 @@ // True if lidx is used before any real register is def'd in the block bool prompt_use( Block *b, uint lidx ); Node *get_spillcopy_wide( Node *def, Node *use, uint uidx ); - // Insert the spill at chosen location. Skip over any interveneing Proj's or + // Insert the spill at chosen location. Skip over any intervening Proj's or // Phis. Skip over a CatchNode and projs, inserting in the fall-through block // instead. Update high-pressure indices. Create a new live range. void insert_proj( Block *b, uint i, Node *spill, uint maxlrg ); @@ -431,7 +431,7 @@ void Simplify(); // Select colors by re-inserting edges into the IFG. - // Return TRUE if any spills occured. + // Return TRUE if any spills occurred. uint Select( ); // Helper function for select which allows biased coloring OptoReg::Name choose_color( LRG &lrg, int chunk ); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/classes.hpp --- a/src/share/vm/opto/classes.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/classes.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -129,11 +129,13 @@ macro(LShiftI) macro(LShiftL) macro(LoadB) +macro(LoadUB) macro(LoadUS) macro(LoadD) macro(LoadD_unaligned) macro(LoadF) macro(LoadI) +macro(LoadUI2L) macro(LoadKlass) macro(LoadNKlass) macro(LoadL) diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/coalesce.cpp --- a/src/share/vm/opto/coalesce.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/coalesce.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -123,7 +123,7 @@ } //------------------------------clone_projs------------------------------------ -// After cloning some rematierialized instruction, clone any MachProj's that +// After cloning some rematerialized instruction, clone any MachProj's that // follow it. Example: Intel zero is XOR, kills flags. Sparc FP constants // use G3 as an address temp. int PhaseChaitin::clone_projs( Block *b, uint idx, Node *con, Node *copy, uint &maxlrg ) { @@ -694,8 +694,8 @@ } // End of if not infinite-stack neighbor } // End of if actually inserted } // End of if live range overlaps - } // End of else collect intereferences for 1 node - } // End of while forever, scan back for intereferences + } // End of else collect interferences for 1 node + } // End of while forever, scan back for interferences return reg_degree; } @@ -786,7 +786,7 @@ if( rm_size == 0 ) return false; // Another early bail-out test is when we are double-coalescing and the - // 2 copies are seperated by some control flow. + // 2 copies are separated by some control flow. if( dst_copy != src_copy ) { Block *src_b = _phc._cfg._bbs[src_copy->_idx]; Block *b2 = b; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/compile.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -337,7 +337,7 @@ tty->print_cr("*********************************************************"); } if (env()->break_at_compile()) { - // Open the debugger when compiing this method. + // Open the debugger when compiling this method. tty->print("### Breaking when compiling: "); method()->print_short_name(); tty->cr(); @@ -1191,8 +1191,8 @@ default: ShouldNotReachHere(); } break; - case 2: // No collasping at level 2; keep all splits - case 3: // No collasping at level 3; keep all splits + case 2: // No collapsing at level 2; keep all splits + case 3: // No collapsing at level 3; keep all splits break; default: Unimplemented(); @@ -2005,8 +2005,10 @@ case Op_StoreP: case Op_StoreN: case Op_LoadB: + case Op_LoadUB: case Op_LoadUS: case Op_LoadI: + case Op_LoadUI2L: case Op_LoadKlass: case Op_LoadNKlass: case Op_LoadL: @@ -2102,7 +2104,7 @@ // [base_reg + offset] // NullCheck base_reg // - // Pin the new DecodeN node to non-null path on these patforms (Sparc) + // Pin the new DecodeN node to non-null path on these platform (Sparc) // to keep the information to which NULL check the new DecodeN node // corresponds to use it as value in implicit_null_check(). // diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/connode.cpp --- a/src/share/vm/opto/connode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/connode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -71,7 +71,7 @@ to figure out which test post-dominates. The real problem is that it doesn't matter which one you pick. After you pick up, the dominating-test elider in IGVN can remove the test and allow you to hoist up to the dominating test on -the choosen oop bypassing the test on the not-choosen oop. Seen in testing. +the chosen oop bypassing the test on the not-chosen oop. Seen in testing. Oops. (3) Leave the CastPP's in. This makes the graph more accurate in some sense; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/divnode.cpp --- a/src/share/vm/opto/divnode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/divnode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -35,7 +35,7 @@ // by constant into a multiply/shift/add series. Return false if calculations // fail. // -// Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with +// Borrowed almost verbatim from Hacker's Delight by Henry S. Warren, Jr. with // minor type name and parameter changes. static bool magic_int_divide_constants(jint d, jint &M, jint &s) { int32_t p; @@ -202,7 +202,7 @@ // by constant into a multiply/shift/add series. Return false if calculations // fail. // -// Borrowed almost verbatum from Hacker's Delight by Henry S. Warren, Jr. with +// Borrowed almost verbatim from Hacker's Delight by Henry S. Warren, Jr. with // minor type name and parameter changes. Adjusted to 64 bit word width. static bool magic_long_divide_constants(jlong d, jlong &M, jint &s) { int64_t p; @@ -1069,7 +1069,7 @@ int log2_con = -1; - // If this is a power of two, they maybe we can mask it + // If this is a power of two, then maybe we can mask it if( is_power_of_2_long(pos_con) ) { log2_con = log2_long(pos_con); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/domgraph.cpp --- a/src/share/vm/opto/domgraph.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/domgraph.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -183,7 +183,7 @@ if (pre_order == 1) t->_parent = NULL; // first block doesn't have parent else { - // Save parent (currernt top block on stack) in DFS + // Save parent (current top block on stack) in DFS t->_parent = &_tarjan[_stack_top->block->_pre_order]; } // Now put this block on stack diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/escape.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -515,7 +515,7 @@ // cause the failure in add_offset() with narrow oops since TypeOopPtr() // constructor verifies correctness of the offset. // - // It could happend on subclass's branch (from the type profiling + // It could happened on subclass's branch (from the type profiling // inlining) which was not eliminated during parsing since the exactness // of the allocation type was not propagated to the subclass type check. // @@ -703,7 +703,7 @@ while (prev != result) { prev = result; if (result == start_mem) - break; // hit one of our sentinals + break; // hit one of our sentinels if (result->is_Mem()) { const Type *at = phase->type(result->in(MemNode::Address)); if (at != Type::TOP) { @@ -720,7 +720,7 @@ if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) { Node *proj_in = result->in(0); if (proj_in->is_Allocate() && proj_in->_idx == (uint)tinst->instance_id()) { - break; // hit one of our sentinals + break; // hit one of our sentinels } else if (proj_in->is_Call()) { CallNode *call = proj_in->as_Call(); if (!call->may_modify(tinst, phase)) { @@ -756,6 +756,16 @@ } else { break; } + } else if (result->Opcode() == Op_SCMemProj) { + assert(result->in(0)->is_LoadStore(), "sanity"); + const Type *at = phase->type(result->in(0)->in(MemNode::Address)); + if (at != Type::TOP) { + assert (at->isa_ptr() != NULL, "pointer type required."); + int idx = C->get_alias_index(at->is_ptr()); + assert(idx != alias_idx, "Object is not scalar replaceable if a LoadStore node access its field"); + break; + } + result = result->in(0)->in(MemNode::Memory); } } if (result->is_Phi()) { @@ -794,7 +804,7 @@ // Phase 2: Process MemNode's from memnode_worklist. compute new address type and // search the Memory chain for a store with the appropriate type // address type. If a Phi is found, create a new version with -// the approriate memory slices from each of the Phi inputs. +// the appropriate memory slices from each of the Phi inputs. // For stores, process the users as follows: // MemNode: push on memnode_worklist // MergeMem: push on mergemem_worklist @@ -1548,7 +1558,7 @@ has_non_escaping_obj = true; // Non GlobalEscape Node* n = ptn->_node; if (n->is_Allocate() && ptn->_scalar_replaceable ) { - // Push scalar replaceable alocations on alloc_worklist + // Push scalar replaceable allocations on alloc_worklist // for processing in split_unique_types(). alloc_worklist.append(n); } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/gcm.cpp --- a/src/share/vm/opto/gcm.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/gcm.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -57,6 +57,37 @@ } } +//----------------------------replace_block_proj_ctrl------------------------- +// Nodes that have is_block_proj() nodes as their control need to use +// the appropriate Region for their actual block as their control since +// the projection will be in a predecessor block. +void PhaseCFG::replace_block_proj_ctrl( Node *n ) { + const Node *in0 = n->in(0); + assert(in0 != NULL, "Only control-dependent"); + const Node *p = in0->is_block_proj(); + if (p != NULL && p != n) { // Control from a block projection? + assert(!n->pinned() || n->is_SafePointScalarObject(), "only SafePointScalarObject pinned node is expected here"); + // Find trailing Region + Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block + uint j = 0; + if (pb->_num_succs != 1) { // More then 1 successor? + // Search for successor + uint max = pb->_nodes.size(); + assert( max > 1, "" ); + uint start = max - pb->_num_succs; + // Find which output path belongs to projection + for (j = start; j < max; j++) { + if( pb->_nodes[j] == in0 ) + break; + } + assert( j < max, "must find" ); + // Change control to match head of successor basic block + j -= start; + } + n->set_req(0, pb->_succs[j]->head()); + } +} + //------------------------------schedule_pinned_nodes-------------------------- // Set the basic block for Nodes pinned into blocks @@ -68,8 +99,10 @@ Node *n = spstack.pop(); if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited if( n->pinned() && !_bbs.lookup(n->_idx) ) { // Pinned? Nail it down! + assert( n->in(0), "pinned Node must have Control" ); + // Before setting block replace block_proj control edge + replace_block_proj_ctrl(n); Node *input = n->in(0); - assert( input, "pinned Node must have Control" ); while( !input->is_block_start() ) input = input->in(0); Block *b = _bbs[input->_idx]; // Basic block of controlling input @@ -158,34 +191,12 @@ uint i = nstack_top_i; if (i == 0) { - // Special control input processing. - // While I am here, go ahead and look for Nodes which are taking control - // from a is_block_proj Node. After I inserted RegionNodes to make proper - // blocks, the control at a is_block_proj more properly comes from the - // Region being controlled by the block_proj Node. + // Fixup some control. Constants without control get attached + // to root and nodes that use is_block_proj() nodes should be attached + // to the region that starts their block. const Node *in0 = n->in(0); if (in0 != NULL) { // Control-dependent? - const Node *p = in0->is_block_proj(); - if (p != NULL && p != n) { // Control from a block projection? - // Find trailing Region - Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block - uint j = 0; - if (pb->_num_succs != 1) { // More then 1 successor? - // Search for successor - uint max = pb->_nodes.size(); - assert( max > 1, "" ); - uint start = max - pb->_num_succs; - // Find which output path belongs to projection - for (j = start; j < max; j++) { - if( pb->_nodes[j] == in0 ) - break; - } - assert( j < max, "must find" ); - // Change control to match head of successor basic block - j -= start; - } - n->set_req(0, pb->_succs[j]->head()); - } + replace_block_proj_ctrl(n); } else { // n->in(0) == NULL if (n->req() == 1) { // This guy is a constant with NO inputs? n->set_req(0, _root); @@ -226,6 +237,8 @@ if (!n->pinned()) { // Set earliest legal block. _bbs.map(n->_idx, find_deepest_input(n, _bbs)); + } else { + assert(_bbs[n->_idx] == _bbs[n->in(0)->_idx], "Pinned Node should be at the same block as its control edge"); } if (nstack.is_empty()) { @@ -593,7 +606,7 @@ if (pred_block != early) { // If any predecessor of the Phi matches the load's "early block", // we do not need a precedence edge between the Phi and 'load' - // since the load will be forced into a block preceeding the Phi. + // since the load will be forced into a block preceding the Phi. pred_block->set_raise_LCA_mark(load_index); assert(!LCA_orig->dominates(pred_block) || early->dominates(pred_block), "early is high enough"); @@ -1386,7 +1399,7 @@ #ifdef ASSERT for (uint i = 0; i < _num_blocks; i++ ) { Block *b = _blocks[i]; - assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requiers meaningful block frequency"); + assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency"); } #endif @@ -1639,7 +1652,7 @@ // successor blocks. assert(_num_succs == 2, "expecting 2 successors of a null check"); // If either successor has only one predecessor, then the - // probabiltity estimate can be derived using the + // probability estimate can be derived using the // relative frequency of the successor and this block. if (_succs[i]->num_preds() == 2) { return _succs[i]->_freq / _freq; @@ -1841,7 +1854,7 @@ } //------------------------------update_succ_freq------------------------------- -// Update the appropriate frequency associated with block 'b', a succesor of +// Update the appropriate frequency associated with block 'b', a successor of // a block in this loop. void CFGLoop::update_succ_freq(Block* b, float freq) { if (b->_loop == this) { @@ -1888,7 +1901,8 @@ for (int i = 0; i < _members.length(); i++) { CFGElement* s = _members.at(i); float block_freq = s->_freq * loop_freq; - if (block_freq < MIN_BLOCK_FREQUENCY) block_freq = MIN_BLOCK_FREQUENCY; + if (g_isnan(block_freq) || block_freq < MIN_BLOCK_FREQUENCY) + block_freq = MIN_BLOCK_FREQUENCY; s->_freq = block_freq; } CFGLoop* ch = _child; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/graphKit.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1148,7 +1148,7 @@ Node *tst = _gvn.transform( btst ); //----------- - // if peephole optimizations occured, a prior test existed. + // if peephole optimizations occurred, a prior test existed. // If a prior test existed, maybe it dominates as we can avoid this test. if (tst != btst && type == T_OBJECT) { // At this point we want to scan up the CFG to see if we can @@ -1196,7 +1196,7 @@ // Consider using 'Reason_class_check' instead? // To cause an implicit null check, we set the not-null probability - // to the maximum (PROB_MAX). For an explicit check the probablity + // to the maximum (PROB_MAX). For an explicit check the probability // is set to a smaller value. if (null_control != NULL || too_many_traps(reason)) { // probability is less likely diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/ifg.cpp --- a/src/share/vm/opto/ifg.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/ifg.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -292,7 +292,7 @@ //------------------------------interfere_with_live---------------------------- // Interfere this register with everything currently live. Use the RegMasks // to trim the set of possible interferences. Return a count of register-only -// inteferences as an estimate of register pressure. +// interferences as an estimate of register pressure. void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) { uint retval = 0; // Interfere with everything live. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/ifnode.cpp --- a/src/share/vm/opto/ifnode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/ifnode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -81,7 +81,7 @@ uint i4; for( i4 = 1; i4 < phi->req(); i4++ ) { con1 = phi->in(i4); - if( !con1 ) return NULL; // Do not optimize partially collaped merges + if( !con1 ) return NULL; // Do not optimize partially collapsed merges if( con1->is_Con() ) break; // Found a constant // Also allow null-vs-not-null checks const TypePtr *tp = igvn->type(con1)->isa_ptr(); @@ -204,7 +204,7 @@ // T F T F T F // ..s.. ..t .. ..s.. ..t.. ..s.. ..t.. // - // Split the paths coming into the merge point into 2 seperate groups of + // Split the paths coming into the merge point into 2 separate groups of // merges. On the left will be all the paths feeding constants into the // Cmp's Phi. On the right will be the remaining paths. The Cmp's Phi // will fold up into a constant; this will let the Cmp fold up as well as @@ -236,7 +236,7 @@ } // Register the new RegionNodes but do not transform them. Cannot - // transform until the entire Region/Phi conglerate has been hacked + // transform until the entire Region/Phi conglomerate has been hacked // as a single huge transform. igvn->register_new_node_with_optimizer( region_c ); igvn->register_new_node_with_optimizer( region_x ); @@ -599,7 +599,7 @@ //------------------------------fold_compares---------------------------- // See if a pair of CmpIs can be converted into a CmpU. In some cases -// the direction of this if is determined by the preciding if so it +// the direction of this if is determined by the preceding if so it // can be eliminate entirely. Given an if testing (CmpI n c) check // for an immediately control dependent if that is testing (CmpI n c2) // and has one projection leading to this if and the other projection @@ -811,7 +811,7 @@ // Try to remove extra range checks. All 'up_one_dom' gives up at merges // so all checks we inspect post-dominate the top-most check we find. // If we are going to fail the current check and we reach the top check - // then we are guarenteed to fail, so just start interpreting there. + // then we are guaranteed to fail, so just start interpreting there. // We 'expand' the top 2 range checks to include all post-dominating // checks. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/library_call.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -992,7 +992,7 @@ Node *argument = pop(); // pop non-receiver first: it was pushed second Node *receiver = pop(); - // don't intrinsify is argument isn't a constant string. + // don't intrinsify if argument isn't a constant string. if (!argument->is_Con()) { return false; } @@ -1267,7 +1267,7 @@ // result = DPow(x,y); // } // if (result != result)? { - // ucommon_trap(); + // uncommon_trap(); // } // return result; @@ -1324,7 +1324,7 @@ // Check if (y isn't int) then go to slow path Node *bol2 = _gvn.transform( new (C, 2) BoolNode( cmpinty, BoolTest::ne ) ); - // Branch eith way + // Branch either way IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN); Node *slow_path = opt_iff(r,if2); // Set region path 2 @@ -1715,8 +1715,8 @@ } //----------------------------inline_reverseBytes_int/long------------------- -// inline Int.reverseBytes(int) -// inline Long.reverseByes(long) +// inline Integer.reverseBytes(int) +// inline Long.reverseBytes(long) bool LibraryCallKit::inline_reverseBytes(vmIntrinsics::ID id) { assert(id == vmIntrinsics::_reverseBytes_i || id == vmIntrinsics::_reverseBytes_l, "not reverse Bytes"); if (id == vmIntrinsics::_reverseBytes_i && !Matcher::has_match_rule(Op_ReverseBytesI)) return false; @@ -1915,7 +1915,7 @@ // addition to memory membars when is_volatile. This is a little // too strong, but avoids the need to insert per-alias-type // volatile membars (for stores; compare Parse::do_put_xxx), which - // we cannot do effctively here because we probably only have a + // we cannot do effectively here because we probably only have a // rough approximation of type. need_mem_bar = true; // For Stores, place a memory ordering barrier now. @@ -2099,7 +2099,7 @@ // overly confusing. (This is a true fact! I originally combined // them, but even I was confused by it!) As much code/comments as // possible are retained from inline_unsafe_access though to make - // the correspondances clearer. - dl + // the correspondences clearer. - dl if (callee()->is_static()) return false; // caller must have the capability! @@ -2166,7 +2166,7 @@ int alias_idx = C->get_alias_index(adr_type); // Memory-model-wise, a CAS acts like a little synchronized block, - // so needs barriers on each side. These don't't translate into + // so needs barriers on each side. These don't translate into // actual barriers on most machines, but we still need rest of // compiler to respect ordering. @@ -3208,7 +3208,7 @@ Node *hash_shift = _gvn.intcon(markOopDesc::hash_shift); Node *hshifted_header= _gvn.transform( new (C, 3) URShiftXNode(header, hash_shift) ); // This hack lets the hash bits live anywhere in the mark object now, as long - // as the shift drops the relevent bits into the low 32 bits. Note that + // as the shift drops the relevant bits into the low 32 bits. Note that // Java spec says that HashCode is an int so there's no point in capturing // an 'X'-sized hashcode (32 in 32-bit build or 64 in 64-bit build). hshifted_header = ConvX2I(hshifted_header); @@ -3255,7 +3255,7 @@ } //---------------------------inline_native_getClass---------------------------- -// Build special case code for calls to hashCode on an object. +// Build special case code for calls to getClass on an object. bool LibraryCallKit::inline_native_getClass() { Node* obj = null_check_receiver(callee()); if (stopped()) return true; @@ -4594,7 +4594,7 @@ } // The memory edges above are precise in order to model effects around - // array copyies accurately to allow value numbering of field loads around + // array copies accurately to allow value numbering of field loads around // arraycopy. Such field loads, both before and after, are common in Java // collections and similar classes involving header/array data structures. // diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/live.cpp --- a/src/share/vm/opto/live.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/live.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -39,7 +39,7 @@ // Leftover bits become the new live-in for the predecessor block, and the pred // block is put on the worklist. // The locally live-in stuff is computed once and added to predecessor -// live-out sets. This seperate compilation is done in the outer loop below. +// live-out sets. This separate compilation is done in the outer loop below. PhaseLive::PhaseLive( const PhaseCFG &cfg, LRG_List &names, Arena *arena ) : Phase(LIVE), _cfg(cfg), _names(names), _arena(arena), _live(0) { } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/locknode.cpp --- a/src/share/vm/opto/locknode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/locknode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -121,7 +121,7 @@ kill_dead_locals(); pop(); // Pop oop to unlock - // Because monitors are guarenteed paired (else we bail out), we know + // Because monitors are guaranteed paired (else we bail out), we know // the matching Lock for this Unlock. Hence we know there is no need // for a null check on Unlock. shared_unlock(map()->peek_monitor_box(), map()->peek_monitor_obj()); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/loopTransform.cpp --- a/src/share/vm/opto/loopTransform.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/loopTransform.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -119,7 +119,7 @@ //---------------------is_invariant_addition----------------------------- // Return nonzero index of invariant operand for an Add or Sub -// of (nonconstant) invariant and variant values. Helper for reassoicate_invariants. +// of (nonconstant) invariant and variant values. Helper for reassociate_invariants. int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) { int op = n->Opcode(); if (op == Op_AddI || op == Op_SubI) { @@ -520,7 +520,7 @@ //------------------------------policy_align----------------------------------- // Return TRUE or FALSE if the loop should be cache-line aligned. Gather the // expression that does the alignment. Note that only one array base can be -// aligned in a loop (unless the VM guarentees mutual alignment). Note that +// aligned in a loop (unless the VM guarantees mutual alignment). Note that // if we vectorize short memory ops into longer memory ops, we may want to // increase alignment. bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/loopUnswitch.cpp --- a/src/share/vm/opto/loopUnswitch.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/loopUnswitch.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -131,7 +131,7 @@ ProjNode* proj_false = invar_iff->proj_out(0)->as_Proj(); - // Hoist invariant casts out of each loop to the appropiate + // Hoist invariant casts out of each loop to the appropriate // control projection. Node_List worklist; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/loopnode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -274,7 +274,7 @@ // // Canonicalize the condition on the test. If we can exactly determine // the trip-counter exit value, then set limit to that value and use - // a '!=' test. Otherwise use conditon '<' for count-up loops and + // a '!=' test. Otherwise use condition '<' for count-up loops and // '>' for count-down loops. If the condition is inverted and we will // be rolling through MININT to MAXINT, then bail out. @@ -290,7 +290,7 @@ // If compare points to incr, we are ok. Otherwise the compare // can directly point to the phi; in this case adjust the compare so that - // it points to the incr by adusting the limit. + // it points to the incr by adjusting the limit. if( cmp->in(1) == phi || cmp->in(2) == phi ) limit = gvn->transform(new (C, 3) AddINode(limit,stride)); @@ -471,7 +471,7 @@ lazy_replace( x, l ); set_idom(l, init_control, dom_depth(x)); - // Check for immediately preceeding SafePoint and remove + // Check for immediately preceding SafePoint and remove Node *sfpt2 = le->in(0); if( sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control)); @@ -1506,7 +1506,7 @@ // Build Dominators for elision of NULL checks & loop finding. // Since nodes do not have a slot for immediate dominator, make - // a persistant side array for that info indexed on node->_idx. + // a persistent side array for that info indexed on node->_idx. _idom_size = C->unique(); _idom = NEW_RESOURCE_ARRAY( Node*, _idom_size ); _dom_depth = NEW_RESOURCE_ARRAY( uint, _idom_size ); @@ -1529,7 +1529,7 @@ // Given dominators, try to find inner loops with calls that must // always be executed (call dominates loop tail). These loops do - // not need a seperate safepoint. + // not need a separate safepoint. Node_List cisstack(a); _ltree_root->check_safepts(visited, cisstack); @@ -2332,7 +2332,7 @@ if (done) { // All of n's inputs have been processed, complete post-processing. - // Compute earilest point this Node can go. + // Compute earliest point this Node can go. // CFG, Phi, pinned nodes already know their controlling input. if (!has_node(n)) { // Record earliest legal location @@ -2672,9 +2672,9 @@ pinned = false; } if( pinned ) { - IdealLoopTree *choosen_loop = get_loop(n->is_CFG() ? n : get_ctrl(n)); - if( !choosen_loop->_child ) // Inner loop? - choosen_loop->_body.push(n); // Collect inner loops + IdealLoopTree *chosen_loop = get_loop(n->is_CFG() ? n : get_ctrl(n)); + if( !chosen_loop->_child ) // Inner loop? + chosen_loop->_body.push(n); // Collect inner loops return; } } else { // No slot zero @@ -2746,9 +2746,9 @@ set_ctrl(n, least); // Collect inner loop bodies - IdealLoopTree *choosen_loop = get_loop(least); - if( !choosen_loop->_child ) // Inner loop? - choosen_loop->_body.push(n);// Collect inner loops + IdealLoopTree *chosen_loop = get_loop(least); + if( !chosen_loop->_child ) // Inner loop? + chosen_loop->_body.push(n);// Collect inner loops } #ifndef PRODUCT diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/loopnode.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -390,7 +390,7 @@ // Return TRUE or FALSE if the loop should be cache-line aligned. // Gather the expression that does the alignment. Note that only - // one array base can be aligned in a loop (unless the VM guarentees + // one array base can be aligned in a loop (unless the VM guarantees // mutual alignment). Note that if we vectorize short memory ops // into longer memory ops, we may want to increase alignment. bool policy_align( PhaseIdealLoop *phase ) const; @@ -403,7 +403,7 @@ // Reassociate invariant add and subtract expressions. Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase); // Return nonzero index of invariant operand if invariant and variant - // are combined with an Add or Sub. Helper for reassoicate_invariants. + // are combined with an Add or Sub. Helper for reassociate_invariants. int is_invariant_addition(Node* n, PhaseIdealLoop *phase); // Return true if n is invariant diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/loopopts.cpp --- a/src/share/vm/opto/loopopts.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/loopopts.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -97,7 +97,7 @@ // (Note: This tweaking with igvn only works because x is a new node.) _igvn.set_type(x, t); // If x is a TypeNode, capture any more-precise type permanently into Node - // othewise it will be not updated during igvn->transform since + // otherwise it will be not updated during igvn->transform since // igvn->type(x) is set to x->Value() already. x->raise_bottom_type(t); Node *y = x->Identity(&_igvn); @@ -879,7 +879,7 @@ Node *x_ctrl = NULL; if( u->is_Phi() ) { // Replace all uses of normal nodes. Replace Phi uses - // individually, so the seperate Nodes can sink down + // individually, so the separate Nodes can sink down // different paths. uint k = 1; while( u->in(k) != n ) k++; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/machnode.cpp --- a/src/share/vm/opto/machnode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/machnode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -136,7 +136,7 @@ // Size of instruction in bytes uint MachNode::size(PhaseRegAlloc *ra_) const { // If a virtual was not defined for this specific instruction, - // Call the helper which finds the size by emiting the bits. + // Call the helper which finds the size by emitting the bits. return MachNode::emit_size(ra_); } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/macro.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -64,6 +64,7 @@ uint old_unique = C->unique(); Node* new_in = old_sosn->clone(jvms_adj, sosn_map); if (old_unique != C->unique()) { + new_in->set_req(0, newcall->in(0)); // reset control edge new_in = transform_later(new_in); // Register new node. } old_in = new_in; @@ -215,7 +216,7 @@ const TypeOopPtr *tinst = phase->C->get_adr_type(alias_idx)->isa_oopptr(); while (true) { if (mem == alloc_mem || mem == start_mem ) { - return mem; // hit one of our sentinals + return mem; // hit one of our sentinels } else if (mem->is_MergeMem()) { mem = mem->as_MergeMem()->memory_at(alias_idx); } else if (mem->is_Proj() && mem->as_Proj()->_con == TypeFunc::Memory) { @@ -250,6 +251,15 @@ assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw"); } mem = mem->in(MemNode::Memory); + } else if (mem->Opcode() == Op_SCMemProj) { + assert(mem->in(0)->is_LoadStore(), "sanity"); + const TypePtr* atype = mem->in(0)->in(MemNode::Address)->bottom_type()->is_ptr(); + int adr_idx = Compile::current()->get_alias_index(atype); + if (adr_idx == alias_idx) { + assert(false, "Object is not scalar replaceable if a LoadStore node access its field"); + return NULL; + } + mem = mem->in(0)->in(MemNode::Memory); } else { return mem; } @@ -329,8 +339,15 @@ return NULL; } values.at_put(j, val); + } else if (val->Opcode() == Op_SCMemProj) { + assert(val->in(0)->is_LoadStore(), "sanity"); + assert(false, "Object is not scalar replaceable if a LoadStore node access its field"); + return NULL; } else { +#ifdef ASSERT + val->dump(); assert(false, "unknown node on this path"); +#endif return NULL; // unknown node on this path } } @@ -1651,7 +1668,7 @@ if (UseOptoBiasInlining) { /* - * See the full descrition in MacroAssembler::biased_locking_enter(). + * See the full description in MacroAssembler::biased_locking_enter(). * * if( (mark_word & biased_lock_mask) == biased_lock_pattern ) { * // The object is biased. @@ -1887,7 +1904,7 @@ if (UseOptoBiasInlining) { // Check for biased locking unlock case, which is a no-op. - // See the full descrition in MacroAssembler::biased_locking_exit(). + // See the full description in MacroAssembler::biased_locking_exit(). region = new (C, 4) RegionNode(4); // create a Phi for the memory state mem_phi = new (C, 4) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/matcher.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -897,7 +897,7 @@ #ifdef ASSERT _new2old_map.map(m->_idx, n); #endif - mstack.push(m, Post_Visit, n, i); // Don't neet to visit + mstack.push(m, Post_Visit, n, i); // Don't need to visit mstack.push(m->in(0), Visit, m, 0); } else { mstack.push(m, Visit, n, i); @@ -1267,7 +1267,7 @@ } } - // Not forceably cloning. If shared, put it into a register. + // Not forceable cloning. If shared, put it into a register. return shared; } @@ -1542,7 +1542,7 @@ // This is what my child will give me. int opnd_class_instance = s->_rule[op]; // Choose between operand class or not. - // This is what I will recieve. + // This is what I will receive. int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op; // New rule for child. Chase operand classes to get the actual rule. int newrule = s->_rule[catch_op]; @@ -1707,11 +1707,18 @@ void Matcher::find_shared( Node *n ) { // Allocate stack of size C->unique() * 2 to avoid frequent realloc MStack mstack(C->unique() * 2); + // Mark nodes as address_visited if they are inputs to an address expression + VectorSet address_visited(Thread::current()->resource_area()); mstack.push(n, Visit); // Don't need to pre-visit root node while (mstack.is_nonempty()) { n = mstack.node(); // Leave node on stack Node_State nstate = mstack.state(); + uint nop = n->Opcode(); if (nstate == Pre_Visit) { + if (address_visited.test(n->_idx)) { // Visited in address already? + // Flag as visited and shared now. + set_visited(n); + } if (is_visited(n)) { // Visited already? // Node is shared and has no reason to clone. Flag it as shared. // This causes it to match into a register for the sharing. @@ -1726,7 +1733,7 @@ set_visited(n); // Flag as visited now bool mem_op = false; - switch( n->Opcode() ) { // Handle some opcodes special + switch( nop ) { // Handle some opcodes special case Op_Phi: // Treat Phis as shared roots case Op_Parm: case Op_Proj: // All handled specially during matching @@ -1887,34 +1894,51 @@ // to have a single use so force sharing here. set_shared(m->in(AddPNode::Base)->in(1)); } + + // Some inputs for address expression are not put on stack + // to avoid marking them as shared and forcing them into register + // if they are used only in address expressions. + // But they should be marked as shared if there are other uses + // besides address expressions. + Node *off = m->in(AddPNode::Offset); - if( off->is_Con() ) { - set_visited(m); // Flag as visited now + if( off->is_Con() && + // When there are other uses besides address expressions + // put it on stack and mark as shared. + !is_visited(m) ) { + address_visited.test_set(m->_idx); // Flag as address_visited Node *adr = m->in(AddPNode::Address); // Intel, ARM and friends can handle 2 adds in addressing mode if( clone_shift_expressions && adr->is_AddP() && // AtomicAdd is not an addressing expression. // Cheap to find it by looking for screwy base. - !adr->in(AddPNode::Base)->is_top() ) { - set_visited(adr); // Flag as visited now + !adr->in(AddPNode::Base)->is_top() && + // Are there other uses besides address expressions? + !is_visited(adr) ) { + address_visited.set(adr->_idx); // Flag as address_visited Node *shift = adr->in(AddPNode::Offset); // Check for shift by small constant as well if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && - shift->in(2)->get_int() <= 3 ) { - set_visited(shift); // Flag as visited now + shift->in(2)->get_int() <= 3 && + // Are there other uses besides address expressions? + !is_visited(shift) ) { + address_visited.set(shift->_idx); // Flag as address_visited mstack.push(shift->in(2), Visit); + Node *conv = shift->in(1); #ifdef _LP64 // Allow Matcher to match the rule which bypass // ConvI2L operation for an array index on LP64 // if the index value is positive. - if( shift->in(1)->Opcode() == Op_ConvI2L && - shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) { - set_visited(shift->in(1)); // Flag as visited now - mstack.push(shift->in(1)->in(1), Pre_Visit); + if( conv->Opcode() == Op_ConvI2L && + conv->as_Type()->type()->is_long()->_lo >= 0 && + // Are there other uses besides address expressions? + !is_visited(conv) ) { + address_visited.set(conv->_idx); // Flag as address_visited + mstack.push(conv->in(1), Pre_Visit); } else #endif - mstack.push(shift->in(1), Pre_Visit); + mstack.push(conv, Pre_Visit); } else { mstack.push(shift, Pre_Visit); } @@ -1942,7 +1966,7 @@ // BoolNode::match_edge always returns a zero. // We reorder the Op_If in a pre-order manner, so we can visit without - // accidently sharing the Cmp (the Bool and the If make 2 users). + // accidentally sharing the Cmp (the Bool and the If make 2 users). n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool } else if (nstate == Post_Visit) { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/memnode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -100,12 +100,12 @@ while (prev != result) { prev = result; if (result == start_mem) - break; // hit one of our sentinals + break; // hit one of our sentinels // skip over a call which does not affect this memory slice if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) { Node *proj_in = result->in(0); if (proj_in->is_Allocate() && proj_in->_idx == instance_id) { - break; // hit one of our sentinals + break; // hit one of our sentinels } else if (proj_in->is_Call()) { CallNode *call = proj_in->as_Call(); if (!call->may_modify(t_adr, phase)) { @@ -198,7 +198,7 @@ // If not, we can update the input infinitely along a MergeMem cycle // Equivalent code in PhiNode::Ideal Node* m = phase->transform(mmem); - // If tranformed to a MergeMem, get the desired slice + // If transformed to a MergeMem, get the desired slice // Otherwise the returned node represents memory for every slice mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m; // Update input if it is progress over what we have now @@ -778,7 +778,7 @@ adr_type->offset() == arrayOopDesc::length_offset_in_bytes()), "use LoadRangeNode instead"); switch (bt) { - case T_BOOLEAN: + case T_BOOLEAN: return new (C, 3) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int() ); case T_BYTE: return new (C, 3) LoadBNode (ctl, mem, adr, adr_type, rt->is_int() ); case T_INT: return new (C, 3) LoadINode (ctl, mem, adr, adr_type, rt->is_int() ); case T_CHAR: return new (C, 3) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int() ); @@ -970,7 +970,7 @@ } // Search for an existing data phi which was generated before for the same - // instance's field to avoid infinite genertion of phis in a loop. + // instance's field to avoid infinite generation of phis in a loop. Node *region = mem->in(0); if (is_instance_field_load_with_local_phi(region)) { const TypePtr *addr_t = in(MemNode::Address)->bottom_type()->isa_ptr(); @@ -1066,11 +1066,11 @@ break; } } - LoadNode* load = NULL; - if (allocation != NULL && base->in(load_index)->is_Load()) { - load = base->in(load_index)->as_Load(); - } - if (load != NULL && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) { + bool has_load = ( allocation != NULL && + (base->in(load_index)->is_Load() || + base->in(load_index)->is_DecodeN() && + base->in(load_index)->in(1)->is_Load()) ); + if (has_load && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) { // Push the loads from the phi that comes from valueOf up // through it to allow elimination of the loads and the recovery // of the original value. @@ -1106,11 +1106,20 @@ result->set_req(load_index, in2); return result; } - } else if (base->is_Load()) { + } else if (base->is_Load() || + base->is_DecodeN() && base->in(1)->is_Load()) { + if (base->is_DecodeN()) { + // Get LoadN node which loads cached Integer object + base = base->in(1); + } // Eliminate the load of Integer.value for integers from the cache // array by deriving the value from the index into the array. // Capture the offset of the load and then reverse the computation. Node* load_base = base->in(Address)->in(AddPNode::Base); + if (load_base->is_DecodeN()) { + // Get LoadN node which loads IntegerCache.cache field + load_base = load_base->in(1); + } if (load_base != NULL) { Compile::AliasType* atp = phase->C->alias_type(load_base->adr_type()); intptr_t cache_offset; @@ -1245,7 +1254,7 @@ // (This tweaking with igvn only works because x is a new node.) igvn->set_type(x, t); // If x is a TypeNode, capture any more-precise type permanently into Node - // othewise it will be not updated during igvn->transform since + // otherwise it will be not updated during igvn->transform since // igvn->type(x) is set to x->Value() already. x->raise_bottom_type(t); Node *y = x->Identity(igvn); @@ -1607,6 +1616,22 @@ return LoadNode::Ideal(phase, can_reshape); } +//--------------------------LoadUBNode::Ideal------------------------------------- +// +// If the previous store is to the same address as this load, +// and the value stored was larger than a byte, replace this load +// with the value stored truncated to a byte. If no truncation is +// needed, the replacement is done in LoadNode::Identity(). +// +Node* LoadUBNode::Ideal(PhaseGVN* phase, bool can_reshape) { + Node* mem = in(MemNode::Memory); + Node* value = can_see_stored_value(mem, phase); + if (value && !phase->type(value)->higher_equal(_type)) + return new (phase->C, 3) AndINode(value, phase->intcon(0xFF)); + // Identity call will handle the case where truncation is not needed. + return LoadNode::Ideal(phase, can_reshape); +} + //--------------------------LoadUSNode::Ideal------------------------------------- // // If the previous store is to the same address as this load, @@ -2582,7 +2607,7 @@ // capturing of nearby memory operations. // // During macro-expansion, all captured initializations which store -// constant values of 32 bits or smaller are coalesced (if advantagous) +// constant values of 32 bits or smaller are coalesced (if advantageous) // into larger 'tiles' 32 or 64 bits. This allows an object to be // initialized in fewer memory operations. Memory words which are // covered by neither tiles nor non-constant stores are pre-zeroed @@ -3669,7 +3694,7 @@ else if (old_mmem != NULL) { new_mem = old_mmem->memory_at(i); } - // else preceeding memory was not a MergeMem + // else preceding memory was not a MergeMem // replace equivalent phis (unfortunately, they do not GVN together) if (new_mem != NULL && new_mem != new_base && diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/memnode.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -207,6 +207,19 @@ virtual BasicType memory_type() const { return T_BYTE; } }; +//------------------------------LoadUBNode------------------------------------- +// Load a unsigned byte (8bits unsigned) from memory +class LoadUBNode : public LoadNode { +public: + LoadUBNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt* ti = TypeInt::UBYTE ) + : LoadNode(c, mem, adr, at, ti) {} + virtual int Opcode() const; + virtual uint ideal_reg() const { return Op_RegI; } + virtual Node* Ideal(PhaseGVN *phase, bool can_reshape); + virtual int store_Opcode() const { return Op_StoreB; } + virtual BasicType memory_type() const { return T_BYTE; } +}; + //------------------------------LoadUSNode------------------------------------- // Load an unsigned short/char (16bits unsigned) from memory class LoadUSNode : public LoadNode { @@ -232,6 +245,18 @@ virtual BasicType memory_type() const { return T_INT; } }; +//------------------------------LoadUI2LNode----------------------------------- +// Load an unsigned integer into long from memory +class LoadUI2LNode : public LoadNode { +public: + LoadUI2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong* t = TypeLong::UINT) + : LoadNode(c, mem, adr, at, t) {} + virtual int Opcode() const; + virtual uint ideal_reg() const { return Op_RegL; } + virtual int store_Opcode() const { return Op_StoreL; } + virtual BasicType memory_type() const { return T_LONG; } +}; + //------------------------------LoadRangeNode---------------------------------- // Load an array length from the array class LoadRangeNode : public LoadINode { @@ -757,10 +782,10 @@ // Model. Monitor-enter and volatile-load act as Aquires: no following ref // can be moved to before them. We insert a MemBar-Acquire after a FastLock or // volatile-load. Monitor-exit and volatile-store act as Release: no -// preceeding ref can be moved to after them. We insert a MemBar-Release +// preceding ref can be moved to after them. We insert a MemBar-Release // before a FastUnlock or volatile-store. All volatiles need to be // serialized, so we follow all volatile-stores with a MemBar-Volatile to -// seperate it from any following volatile-load. +// separate it from any following volatile-load. class MemBarNode: public MultiNode { virtual uint hash() const ; // { return NO_HASH; } virtual uint cmp( const Node &n ) const ; // Always fail, except on self diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/mulnode.cpp --- a/src/share/vm/opto/mulnode.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/mulnode.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -486,20 +486,23 @@ return new (phase->C, 3) AndINode(ldus, phase->intcon(mask&0xFFFF)); } - // Masking sign bits off of a Byte? Let the matcher use an unsigned load - if( lop == Op_LoadB && - (!in(0) && load->in(0)) && - (mask == 0x000000FF) ) { - // Associate this node with the LoadB, so the matcher can see them together. - // If we don't do this, it is common for the LoadB to have one control - // edge, and the store or call containing this AndI to have a different - // control edge. This will cause Label_Root to group the AndI with - // the encoding store or call, so the matcher has no chance to match - // this AndI together with the LoadB. Setting the control edge here - // prevents Label_Root from grouping the AndI with the store or call, - // if it has a control edge that is inconsistent with the LoadB. - set_req(0, load->in(0)); - return this; + // Masking sign bits off of a Byte? Do an unsigned byte load. + if (lop == Op_LoadB && mask == 0x000000FF) { + return new (phase->C, 3) LoadUBNode(load->in(MemNode::Control), + load->in(MemNode::Memory), + load->in(MemNode::Address), + load->adr_type()); + } + + // Masking sign bits off of a Byte plus additional lower bits? Do + // an unsigned byte load plus an and. + if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) { + Node* ldub = new (phase->C, 3) LoadUBNode(load->in(MemNode::Control), + load->in(MemNode::Memory), + load->in(MemNode::Address), + load->adr_type()); + ldub = phase->transform(ldub); + return new (phase->C, 3) AndINode(ldub, phase->intcon(mask)); } // Masking off sign bits? Dont make them! @@ -599,12 +602,21 @@ if( !t2 || !t2->is_con() ) return MulNode::Ideal(phase, can_reshape); const jlong mask = t2->get_con(); - Node *rsh = in(1); - uint rop = rsh->Opcode(); + Node* in1 = in(1); + uint op = in1->Opcode(); + + // Masking sign bits off of an integer? Do an unsigned integer to long load. + if (op == Op_ConvI2L && in1->in(1)->Opcode() == Op_LoadI && mask == 0x00000000FFFFFFFFL) { + Node* load = in1->in(1); + return new (phase->C, 3) LoadUI2LNode(load->in(MemNode::Control), + load->in(MemNode::Memory), + load->in(MemNode::Address), + load->adr_type()); + } // Masking off sign bits? Dont make them! - if( rop == Op_RShiftL ) { - const TypeInt *t12 = phase->type(rsh->in(2))->isa_int(); + if (op == Op_RShiftL) { + const TypeInt *t12 = phase->type(in1->in(2))->isa_int(); if( t12 && t12->is_con() ) { // Shift is by a constant int shift = t12->get_con(); shift &= BitsPerJavaLong - 1; // semantics of Java shifts @@ -613,7 +625,7 @@ // bits survive. NO sign-extension bits survive the maskings. if( (sign_bits_mask & mask) == 0 ) { // Use zero-fill shift instead - Node *zshift = phase->transform(new (phase->C, 3) URShiftLNode(rsh->in(1),rsh->in(2))); + Node *zshift = phase->transform(new (phase->C, 3) URShiftLNode(in1->in(1), in1->in(2))); return new (phase->C, 3) AndLNode( zshift, in(2) ); } } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/node.cpp --- a/src/share/vm/opto/node.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/node.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -968,22 +968,23 @@ // Example: when reshape "(X+3)+4" into "X+7" you must leave the Node for // "X+3" unchanged in case it is shared. // -// If you modify the 'this' pointer's inputs, you must use 'set_req' with -// def-use info. If you are making a new Node (either as the new root or -// some new internal piece) you must NOT use set_req with def-use info. -// You can make a new Node with either 'new' or 'clone'. In either case, -// def-use info is (correctly) not generated. +// If you modify the 'this' pointer's inputs, you should use +// 'set_req'. If you are making a new Node (either as the new root or +// some new internal piece) you may use 'init_req' to set the initial +// value. You can make a new Node with either 'new' or 'clone'. In +// either case, def-use info is correctly maintained. +// // Example: reshape "(X+3)+4" into "X+7": -// set_req(1,in(1)->in(1) /* grab X */, du /* must use DU on 'this' */); -// set_req(2,phase->intcon(7),du); +// set_req(1, in(1)->in(1)); +// set_req(2, phase->intcon(7)); // return this; -// Example: reshape "X*4" into "X<<1" -// return new (C,3) LShiftINode( in(1), phase->intcon(1) ); +// Example: reshape "X*4" into "X<<2" +// return new (C,3) LShiftINode(in(1), phase->intcon(2)); // // You must call 'phase->transform(X)' on any new Nodes X you make, except -// for the returned root node. Example: reshape "X*31" with "(X<<5)-1". +// for the returned root node. Example: reshape "X*31" with "(X<<5)-X". // Node *shift=phase->transform(new(C,3)LShiftINode(in(1),phase->intcon(5))); -// return new (C,3) AddINode(shift, phase->intcon(-1)); +// return new (C,3) AddINode(shift, in(1)); // // When making a Node for a constant use 'phase->makecon' or 'phase->intcon'. // These forms are faster than 'phase->transform(new (C,1) ConNode())' and Do @@ -1679,7 +1680,7 @@ if (visited.member(this)) return; visited.push(this); - // Walk over all input edges, checking for correspondance + // Walk over all input edges, checking for correspondence for( i = 0; i < len(); i++ ) { n = in(i); if (n != NULL && !n->is_top()) { @@ -1723,7 +1724,7 @@ // Contained in new_space or old_space? VectorSet *v = C->node_arena()->contains(n) ? &new_space : &old_space; // Check for visited in the proper space. Numberings are not unique - // across spaces so we need a seperate VectorSet for each space. + // across spaces so we need a separate VectorSet for each space. if( v->test_set(n->_idx) ) return; if (n->is_Con() && n->bottom_type() == Type::TOP) { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/node.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -257,7 +257,7 @@ Node **_in; // Array of use-def references to Nodes Node **_out; // Array of def-use references to Nodes - // Input edges are split into two catagories. Required edges are required + // Input edges are split into two categories. Required edges are required // for semantic correctness; order is important and NULLs are allowed. // Precedence edges are used to help determine execution order and are // added, e.g., for scheduling purposes. They are unordered and not @@ -854,7 +854,7 @@ // If the hash function returns the special sentinel value NO_HASH, // the node is guaranteed never to compare equal to any other node. - // If we accidently generate a hash with value NO_HASH the node + // If we accidentally generate a hash with value NO_HASH the node // won't go into the table and we'll lose a little optimization. enum { NO_HASH = 0 }; virtual uint hash() const; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/output.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1171,7 +1171,7 @@ cb->flush_bundle(false); // The following logic is duplicated in the code ifdeffed for - // ENABLE_ZAP_DEAD_LOCALS which apppears above in this file. It + // ENABLE_ZAP_DEAD_LOCALS which appears above in this file. It // should be factored out. Or maybe dispersed to the nodes? // Special handling for SafePoint/Call Nodes @@ -1275,7 +1275,7 @@ } #ifdef ASSERT - // Check that oop-store preceeds the card-mark + // Check that oop-store precedes the card-mark else if( mach->ideal_Opcode() == Op_StoreCM ) { uint storeCM_idx = j; Node *oop_store = mach->in(mach->_cnt); // First precedence edge @@ -1291,7 +1291,7 @@ #endif else if( !n->is_Proj() ) { - // Remember the begining of the previous instruction, in case + // Remember the beginning of the previous instruction, in case // it's followed by a flag-kill and a null-check. Happens on // Intel all the time, with add-to-memory kind of opcodes. previous_offset = current_offset; @@ -1567,7 +1567,7 @@ compile.set_node_bundling_limit(_node_bundling_limit); - // This one is persistant within the Compile class + // This one is persistent within the Compile class _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max); // Allocate space for fixed-size arrays @@ -1666,7 +1666,7 @@ // Compute the latency of all the instructions. This is fairly simple, // because we already have a legal ordering. Walk over the instructions // from first to last, and compute the latency of the instruction based -// on the latency of the preceeding instruction(s). +// on the latency of the preceding instruction(s). void Scheduling::ComputeLocalLatenciesForward(const Block *bb) { #ifndef PRODUCT if (_cfg->C->trace_opto_output()) @@ -1931,7 +1931,7 @@ uint siz = _available.size(); // Conditional branches can support an instruction that - // is unconditionally executed and not dependant by the + // is unconditionally executed and not dependent by the // branch, OR a conditionally executed instruction if // the branch is taken. In practice, this means that // the first instruction at the branch target is @@ -1947,7 +1947,7 @@ #endif // At least 1 instruction is on the available list - // that is not dependant on the branch + // that is not dependent on the branch for (uint i = 0; i < siz; i++) { Node *d = _available[i]; const Pipeline *avail_pipeline = d->pipeline(); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/parse.hpp --- a/src/share/vm/opto/parse.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/parse.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -78,7 +78,7 @@ }; // See if it is OK to inline. - // The reciever is the inline tree for the caller. + // The receiver is the inline tree for the caller. // // The result is a temperature indication. If it is hot or cold, // inlining is immediate or undesirable. Otherwise, the info block diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/parse1.cpp --- a/src/share/vm/opto/parse1.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/parse1.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -607,7 +607,7 @@ if (control()->is_Region() && !block->is_loop_head() && !has_irreducible && !block->is_handler()) { // In the absence of irreducible loops, the Region and Phis // associated with a merge that doesn't involve a backedge can - // be simplfied now since the RPO parsing order guarantees + // be simplified now since the RPO parsing order guarantees // that any path which was supposed to reach here has already // been parsed or must be dead. Node* c = control(); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/parse2.cpp --- a/src/share/vm/opto/parse2.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/parse2.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -32,7 +32,7 @@ void Parse::array_load(BasicType elem_type) { const Type* elem = Type::TOP; Node* adr = array_addressing(elem_type, 0, &elem); - if (stopped()) return; // guarenteed null or range check + if (stopped()) return; // guaranteed null or range check _sp -= 2; // Pop array and index const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); Node* ld = make_load(control(), adr, elem, elem_type, adr_type); @@ -43,7 +43,7 @@ //--------------------------------array_store---------------------------------- void Parse::array_store(BasicType elem_type) { Node* adr = array_addressing(elem_type, 1); - if (stopped()) return; // guarenteed null or range check + if (stopped()) return; // guaranteed null or range check Node* val = pop(); _sp -= 2; // Pop array and index const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); @@ -1541,14 +1541,14 @@ case Bytecodes::_aaload: array_load(T_OBJECT); break; case Bytecodes::_laload: { a = array_addressing(T_LONG, 0); - if (stopped()) return; // guarenteed null or range check + if (stopped()) return; // guaranteed null or range check _sp -= 2; // Pop array and index push_pair( make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS)); break; } case Bytecodes::_daload: { a = array_addressing(T_DOUBLE, 0); - if (stopped()) return; // guarenteed null or range check + if (stopped()) return; // guaranteed null or range check _sp -= 2; // Pop array and index push_pair( make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES)); break; @@ -1560,7 +1560,7 @@ case Bytecodes::_fastore: array_store(T_FLOAT); break; case Bytecodes::_aastore: { d = array_addressing(T_OBJECT, 1); - if (stopped()) return; // guarenteed null or range check + if (stopped()) return; // guaranteed null or range check array_store_check(); c = pop(); // Oop to store b = pop(); // index (already used) @@ -1572,7 +1572,7 @@ } case Bytecodes::_lastore: { a = array_addressing(T_LONG, 2); - if (stopped()) return; // guarenteed null or range check + if (stopped()) return; // guaranteed null or range check c = pop_pair(); _sp -= 2; // Pop array and index store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS); @@ -1580,7 +1580,7 @@ } case Bytecodes::_dastore: { a = array_addressing(T_DOUBLE, 2); - if (stopped()) return; // guarenteed null or range check + if (stopped()) return; // guaranteed null or range check c = pop_pair(); _sp -= 2; // Pop array and index c = dstore_rounding(c); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/phase.cpp --- a/src/share/vm/opto/phase.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/phase.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -73,7 +73,7 @@ //------------------------------Phase------------------------------------------ Phase::Phase( PhaseNumber pnum ) : _pnum(pnum), C( pnum == Compiler ? NULL : Compile::current()) { - // Poll for requests from shutdown mechanism to quiesce comiler (4448539, 4448544). + // Poll for requests from shutdown mechanism to quiesce compiler (4448539, 4448544). // This is an effective place to poll, since the compiler is full of phases. // In particular, every inlining site uses a recursively created Parse phase. CompileBroker::maybe_block(); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/phaseX.cpp --- a/src/share/vm/opto/phaseX.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/phaseX.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -196,7 +196,7 @@ } //------------------------------hash_delete------------------------------------ -// Replace in hash table with sentinal +// Replace in hash table with sentinel bool NodeHash::hash_delete( const Node *n ) { Node *k; uint hash = n->hash(); @@ -207,7 +207,7 @@ uint key = hash & (_max-1); uint stride = key | 0x01; debug_only( uint counter = 0; ); - for( ; /* (k != NULL) && (k != _sentinal) */; ) { + for( ; /* (k != NULL) && (k != _sentinel) */; ) { debug_only( counter++ ); debug_only( _delete_probes++ ); k = _table[key]; // Get hashed value @@ -715,7 +715,7 @@ #ifdef ASSERT //------------------------------dead_loop_check-------------------------------- -// Check for a simple dead loop when a data node references itself direcly +// Check for a simple dead loop when a data node references itself directly // or through an other data node excluding cons and phis. void PhaseGVN::dead_loop_check( Node *n ) { // Phi may reference itself in a loop @@ -1359,7 +1359,7 @@ worklist.push(p); // Propagate change to user } } - // If we changed the reciever type to a call, we need to revisit + // If we changed the receiver type to a call, we need to revisit // the Catch following the call. It's looking for a non-NULL // receiver to know when to enable the regular fall-through path // in addition to the NullPtrException path diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/postaloc.cpp --- a/src/share/vm/opto/postaloc.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/postaloc.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -46,7 +46,7 @@ // be splitting live ranges for callee save registers to such // an extent that in large methods the chains can be very long // (50+). The conservative answer is to return true if we don't - // know as this prevents optimizations from occuring. + // know as this prevents optimizations from occurring. const int limit = 60; int i; @@ -286,7 +286,7 @@ // // n will be replaced with the old value but n might have // kills projections associated with it so remove them now so that - // yank_if_dead will be able to elminate the copy once the uses + // yank_if_dead will be able to eliminate the copy once the uses // have been transferred to the old[value]. for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node* use = n->fast_out(i); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/reg_split.cpp --- a/src/share/vm/opto/reg_split.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/reg_split.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -26,8 +26,8 @@ #include "incls/_reg_split.cpp.incl" //------------------------------Split-------------------------------------- -// Walk the graph in RPO and for each lrg which spills, propogate reaching -// definitions. During propogation, split the live range around regions of +// Walk the graph in RPO and for each lrg which spills, propagate reaching +// definitions. During propagation, split the live range around regions of // High Register Pressure (HRP). If a Def is in a region of Low Register // Pressure (LRP), it will not get spilled until we encounter a region of // HRP between it and one of its uses. We will spill at the transition @@ -88,7 +88,7 @@ } //------------------------------insert_proj------------------------------------ -// Insert the spill at chosen location. Skip over any interveneing Proj's or +// Insert the spill at chosen location. Skip over any intervening Proj's or // Phis. Skip over a CatchNode and projs, inserting in the fall-through block // instead. Update high-pressure indices. Create a new live range. void PhaseChaitin::insert_proj( Block *b, uint i, Node *spill, uint maxlrg ) { @@ -125,7 +125,7 @@ } //------------------------------split_DEF-------------------------------------- -// There are four catagories of Split; UP/DOWN x DEF/USE +// There are four categories of Split; UP/DOWN x DEF/USE // Only three of these really occur as DOWN/USE will always color // Any Split with a DEF cannot CISC-Spill now. Thus we need // two helper routines, one for Split DEFS (insert after instruction), @@ -726,7 +726,7 @@ // ********** Handle Crossing HRP Boundry ********** if( (insidx == b->_ihrp_index) || (insidx == b->_fhrp_index) ) { for( slidx = 0; slidx < spill_cnt; slidx++ ) { - // Check for need to split at HRP boundry - split if UP + // Check for need to split at HRP boundary - split if UP n1 = Reachblock[slidx]; // bail out if no reaching DEF if( n1 == NULL ) continue; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/runtime.cpp --- a/src/share/vm/opto/runtime.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/runtime.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1196,7 +1196,7 @@ // The following does not work because for one thing, the // thread state is wrong; it expects java, but it is native. -// Also, the invarients in a native stub are different and +// Also, the invariants in a native stub are different and // I'm not sure it is safe to have a MachCalRuntimeDirectNode // in there. // So for now, we do not zap in native stubs. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/split_if.cpp --- a/src/share/vm/opto/split_if.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/split_if.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -318,7 +318,7 @@ if( use->is_Phi() ) { // Phi uses in prior block // Grab the first Phi use; there may be many. - // Each will be handled as a seperate iteration of + // Each will be handled as a separate iteration of // the "while( phi->outcnt() )" loop. uint j; for( j = 1; j < use->req(); j++ ) diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/superword.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -470,7 +470,7 @@ } //------------------------------stmts_can_pack--------------------------- -// Can s1 and s2 be in a pack with s1 immediately preceeding s2 and +// Can s1 and s2 be in a pack with s1 immediately preceding s2 and // s1 aligned at "align" bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) { if (isomorphic(s1, s2)) { @@ -869,7 +869,7 @@ for (uint i = start; i < end; i++) { if (!is_vector_use(p0, i)) { // For now, return false if not scalar promotion case (inputs are the same.) - // Later, implement PackNode and allow differring, non-vector inputs + // Later, implement PackNode and allow differing, non-vector inputs // (maybe just the ones from outside the block.) Node* p0_def = p0->in(i); for (uint j = 1; j < p->size(); j++) { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/superword.hpp --- a/src/share/vm/opto/superword.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/superword.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -308,7 +308,7 @@ void dependence_graph(); // Return a memory slice (node list) in predecessor order starting at "start" void mem_slice_preds(Node* start, Node* stop, GrowableArray &preds); - // Can s1 and s2 be in a pack with s1 immediately preceeding s2 and s1 aligned at "align" + // Can s1 and s2 be in a pack with s1 immediately preceding s2 and s1 aligned at "align" bool stmts_can_pack(Node* s1, Node* s2, int align); // Does s exist in a pack at position pos? bool exists_at(Node* s, uint pos); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/type.cpp --- a/src/share/vm/opto/type.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/type.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -226,6 +226,7 @@ TypeInt::CC_LE = TypeInt::make(-1, 0, WidenMin); TypeInt::CC_GE = TypeInt::make( 0, 1, WidenMin); // == TypeInt::BOOL TypeInt::BYTE = TypeInt::make(-128,127, WidenMin); // Bytes + TypeInt::UBYTE = TypeInt::make(0, 255, WidenMin); // Unsigned Bytes TypeInt::CHAR = TypeInt::make(0,65535, WidenMin); // Java chars TypeInt::SHORT = TypeInt::make(-32768,32767, WidenMin); // Java shorts TypeInt::POS = TypeInt::make(0,max_jint, WidenMin); // Non-neg values @@ -1022,6 +1023,7 @@ const TypeInt *TypeInt::CC_LE; // [-1,0] const TypeInt *TypeInt::CC_GE; // [0,1] == BOOL (!) const TypeInt *TypeInt::BYTE; // Bytes, -128 to 127 +const TypeInt *TypeInt::UBYTE; // Unsigned Bytes, 0 to 255 const TypeInt *TypeInt::CHAR; // Java chars, 0-65535 const TypeInt *TypeInt::SHORT; // Java shorts, -32768-32767 const TypeInt *TypeInt::POS; // Positive 32-bit integers or zero @@ -2455,7 +2457,7 @@ // code and dereferenced at the time the nmethod is made. Until that time, // it is not reasonable to do arithmetic with the addresses of oops (we don't // have access to the addresses!). This does not seem to currently happen, - // but this assertion here is to help prevent its occurrance. + // but this assertion here is to help prevent its occurence. tty->print_cr("Found oop constant with non-zero offset"); ShouldNotReachHere(); } @@ -2761,7 +2763,7 @@ // LCA is object_klass, but if we subclass from the top we can do better if( above_centerline(_ptr) ) { // if( _ptr == TopPTR || _ptr == AnyNull ) // If 'this' (InstPtr) is above the centerline and it is Object class - // then we can subclass in the Java class heirarchy. + // then we can subclass in the Java class hierarchy. if (klass()->equals(ciEnv::current()->Object_klass())) { // that is, tp's array type is a subtype of my klass return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, instance_id); @@ -3022,7 +3024,7 @@ //------------------------------xdual------------------------------------------ // Dual: do NOT dual on klasses. This means I do NOT understand the Java -// inheritence mechanism. +// inheritance mechanism. const Type *TypeInstPtr::xdual() const { return new TypeInstPtr( dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance_id() ); } @@ -3176,7 +3178,7 @@ bool chg = false; if (lo < min_lo) { lo = min_lo; chg = true; } if (hi > max_hi) { hi = max_hi; chg = true; } - // Negative length arrays will produce weird intermediate dead fath-path code + // Negative length arrays will produce weird intermediate dead fast-path code if (lo > hi) return TypeInt::ZERO; if (!chg) @@ -3358,7 +3360,7 @@ // LCA is object_klass, but if we subclass from the top we can do better if (above_centerline(tp->ptr())) { // If 'tp' is above the centerline and it is Object class - // then we can subclass in the Java class heirarchy. + // then we can subclass in the Java class hierarchy. if( tp->klass()->equals(ciEnv::current()->Object_klass()) ) { // that is, my array type is a subtype of 'tp' klass return make( ptr, _ary, _klass, _klass_is_exact, offset, instance_id ); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/opto/type.hpp --- a/src/share/vm/opto/type.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/opto/type.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -415,6 +415,7 @@ static const TypeInt *CC_LE; // [-1,0] static const TypeInt *CC_GE; // [0,1] == BOOL (!) static const TypeInt *BYTE; + static const TypeInt *UBYTE; static const TypeInt *CHAR; static const TypeInt *SHORT; static const TypeInt *POS; diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiEnv.cpp --- a/src/share/vm/prims/jvmtiEnv.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiEnv.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -99,6 +99,9 @@ } // otherwise, create the state state = JvmtiThreadState::state_for(java_thread); + if (state == NULL) { + return JVMTI_ERROR_THREAD_NOT_ALIVE; + } } state->env_thread_state(this)->set_agent_thread_local_storage_data((void*)data); return JVMTI_ERROR_NONE; @@ -1308,6 +1311,9 @@ // retrieve or create JvmtiThreadState. JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread); + if (state == NULL) { + return JVMTI_ERROR_THREAD_NOT_ALIVE; + } uint32_t debug_bits = 0; if (is_thread_fully_suspended(java_thread, true, &debug_bits)) { err = get_frame_count(state, count_ptr); @@ -1329,6 +1335,12 @@ HandleMark hm(current_thread); uint32_t debug_bits = 0; + // retrieve or create the state + JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread); + if (state == NULL) { + return JVMTI_ERROR_THREAD_NOT_ALIVE; + } + // Check if java_thread is fully suspended if (!is_thread_fully_suspended(java_thread, true /* wait for suspend completion */, &debug_bits)) { return JVMTI_ERROR_THREAD_NOT_SUSPENDED; @@ -1399,9 +1411,6 @@ // It's fine to update the thread state here because no JVMTI events // shall be posted for this PopFrame. - // retreive or create the state - JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread); - state->update_for_pop_top_frame(); java_thread->set_popframe_condition(JavaThread::popframe_pending_bit); // Set pending step flag for this popframe and it is cleared when next @@ -1445,6 +1454,11 @@ ResourceMark rm; uint32_t debug_bits = 0; + JvmtiThreadState *state = JvmtiThreadState::state_for(java_thread); + if (state == NULL) { + return JVMTI_ERROR_THREAD_NOT_ALIVE; + } + if (!JvmtiEnv::is_thread_fully_suspended(java_thread, true, &debug_bits)) { return JVMTI_ERROR_THREAD_NOT_SUSPENDED; } @@ -1464,7 +1478,6 @@ assert(vf->frame_pointer() != NULL, "frame pointer mustn't be NULL"); - JvmtiThreadState *state = JvmtiThreadState::state_for(java_thread); int frame_number = state->count_frames() - depth; state->env_thread_state(this)->set_frame_pop(frame_number); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiEnvBase.cpp --- a/src/share/vm/prims/jvmtiEnvBase.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiEnvBase.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -94,6 +94,35 @@ } +bool +JvmtiEnvBase::is_valid() { + jint value = 0; + + // This object might not be a JvmtiEnvBase so we can't assume + // the _magic field is properly aligned. Get the value in a safe + // way and then check against JVMTI_MAGIC. + + switch (sizeof(_magic)) { + case 2: + value = Bytes::get_native_u2((address)&_magic); + break; + + case 4: + value = Bytes::get_native_u4((address)&_magic); + break; + + case 8: + value = Bytes::get_native_u8((address)&_magic); + break; + + default: + guarantee(false, "_magic field is an unexpected size"); + } + + return value == JVMTI_MAGIC; +} + + JvmtiEnvBase::JvmtiEnvBase() : _env_event_enable() { _env_local_storage = NULL; _tag_map = NULL; @@ -1322,6 +1351,12 @@ HandleMark hm(current_thread); uint32_t debug_bits = 0; + // retrieve or create the state + JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread); + if (state == NULL) { + return JVMTI_ERROR_THREAD_NOT_ALIVE; + } + // Check if java_thread is fully suspended if (!is_thread_fully_suspended(java_thread, true /* wait for suspend completion */, @@ -1329,9 +1364,6 @@ return JVMTI_ERROR_THREAD_NOT_SUSPENDED; } - // retreive or create the state - JvmtiThreadState* state = JvmtiThreadState::state_for(java_thread); - // Check to see if a ForceEarlyReturn was already in progress if (state->is_earlyret_pending()) { // Probably possible for JVMTI clients to trigger this, but the diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiEnvBase.hpp --- a/src/share/vm/prims/jvmtiEnvBase.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiEnvBase.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -120,7 +120,7 @@ public: - bool is_valid() { return _magic == JVMTI_MAGIC; } + bool is_valid(); bool is_retransformable() { return _is_retransformable; } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiEventController.cpp --- a/src/share/vm/prims/jvmtiEventController.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiEventController.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -478,6 +478,11 @@ // set external state accordingly. Only thread-filtered events are included. jlong JvmtiEventControllerPrivate::recompute_thread_enabled(JvmtiThreadState *state) { + if (state == NULL) { + // associated JavaThread is exiting + return (jlong)0; + } + jlong was_any_env_enabled = state->thread_event_enable()->_event_enabled.get_bits(); jlong any_env_enabled = 0; @@ -553,6 +558,7 @@ { MutexLocker mu(Threads_lock); //hold the Threads_lock for the iteration for (JavaThread *tp = Threads::first(); tp != NULL; tp = tp->next()) { + // state_for_while_locked() makes tp->is_exiting() check JvmtiThreadState::state_for_while_locked(tp); // create the thread state if missing } }// release Threads_lock diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiExport.cpp --- a/src/share/vm/prims/jvmtiExport.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiExport.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -1872,6 +1872,9 @@ { // register the stub with the current dynamic code event collector JvmtiThreadState* state = JvmtiThreadState::state_for(JavaThread::current()); + // state can only be NULL if the current thread is exiting which + // should not happen since we're trying to post an event + guarantee(state != NULL, "attempt to register stub via an exiting thread"); JvmtiDynamicCodeEventCollector* collector = state->get_dynamic_code_event_collector(); guarantee(collector != NULL, "attempt to register stub without event collector"); collector->register_stub(name, code_begin, code_end); @@ -2253,6 +2256,9 @@ void JvmtiEventCollector::setup_jvmti_thread_state() { // set this event collector to be the current one. JvmtiThreadState* state = JvmtiThreadState::state_for(JavaThread::current()); + // state can only be NULL if the current thread is exiting which + // should not happen since we're trying to configure for event collection + guarantee(state != NULL, "exiting thread called setup_jvmti_thread_state"); if (is_vm_object_alloc_event()) { _prev = state->get_vm_object_alloc_event_collector(); state->set_vm_object_alloc_event_collector((JvmtiVMObjectAllocEventCollector *)this); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiImpl.cpp --- a/src/share/vm/prims/jvmtiImpl.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiImpl.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -238,6 +238,35 @@ } +bool +JvmtiRawMonitor::is_valid() { + int value = 0; + + // This object might not be a JvmtiRawMonitor so we can't assume + // the _magic field is properly aligned. Get the value in a safe + // way and then check against JVMTI_RM_MAGIC. + + switch (sizeof(_magic)) { + case 2: + value = Bytes::get_native_u2((address)&_magic); + break; + + case 4: + value = Bytes::get_native_u4((address)&_magic); + break; + + case 8: + value = Bytes::get_native_u8((address)&_magic); + break; + + default: + guarantee(false, "_magic field is an unexpected size"); + } + + return value == JVMTI_RM_MAGIC; +} + + // // class JvmtiBreakpoint // diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiImpl.hpp --- a/src/share/vm/prims/jvmtiImpl.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiImpl.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -349,7 +349,7 @@ ~JvmtiRawMonitor(); int magic() { return _magic; } const char *get_name() { return _name; } - bool is_valid() { return _magic == JVMTI_RM_MAGIC; } + bool is_valid(); }; // Onload pending raw monitors diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiRedefineClasses.cpp --- a/src/share/vm/prims/jvmtiRedefineClasses.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiRedefineClasses.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -831,6 +831,9 @@ ResourceMark rm(THREAD); JvmtiThreadState *state = JvmtiThreadState::state_for(JavaThread::current()); + // state can only be NULL if the current thread is exiting which + // should not happen since we're trying to do a RedefineClasses + guarantee(state != NULL, "exiting thread calling load_new_class_versions"); for (int i = 0; i < _class_count; i++) { oop mirror = JNIHandles::resolve_non_null(_class_defs[i].klass); // classes for primitives cannot be redefined @@ -1349,39 +1352,39 @@ // rewrite constant pool references in the methods: if (!rewrite_cp_refs_in_methods(scratch_class, THREAD)) { - // propogate failure back to caller + // propagate failure back to caller return false; } // rewrite constant pool references in the class_annotations: if (!rewrite_cp_refs_in_class_annotations(scratch_class, THREAD)) { - // propogate failure back to caller + // propagate failure back to caller return false; } // rewrite constant pool references in the fields_annotations: if (!rewrite_cp_refs_in_fields_annotations(scratch_class, THREAD)) { - // propogate failure back to caller + // propagate failure back to caller return false; } // rewrite constant pool references in the methods_annotations: if (!rewrite_cp_refs_in_methods_annotations(scratch_class, THREAD)) { - // propogate failure back to caller + // propagate failure back to caller return false; } // rewrite constant pool references in the methods_parameter_annotations: if (!rewrite_cp_refs_in_methods_parameter_annotations(scratch_class, THREAD)) { - // propogate failure back to caller + // propagate failure back to caller return false; } // rewrite constant pool references in the methods_default_annotations: if (!rewrite_cp_refs_in_methods_default_annotations(scratch_class, THREAD)) { - // propogate failure back to caller + // propagate failure back to caller return false; } @@ -1600,7 +1603,7 @@ byte_i_ref, THREAD)) { RC_TRACE_WITH_THREAD(0x02000000, THREAD, ("bad annotation_struct at %d", calc_num_annotations)); - // propogate failure back to caller + // propagate failure back to caller return false; } } @@ -1666,7 +1669,7 @@ byte_i_ref, THREAD)) { RC_TRACE_WITH_THREAD(0x02000000, THREAD, ("bad element_value at %d", calc_num_element_value_pairs)); - // propogate failure back to caller + // propagate failure back to caller return false; } } // end for each component @@ -1815,7 +1818,7 @@ // field. This is a nested annotation. if (!rewrite_cp_refs_in_annotation_struct(annotations_typeArray, byte_i_ref, THREAD)) { - // propogate failure back to caller + // propagate failure back to caller return false; } break; @@ -1842,7 +1845,7 @@ annotations_typeArray, byte_i_ref, THREAD)) { RC_TRACE_WITH_THREAD(0x02000000, THREAD, ("bad nested element_value at %d", calc_num_values)); - // propogate failure back to caller + // propagate failure back to caller return false; } } @@ -1886,7 +1889,7 @@ THREAD)) { RC_TRACE_WITH_THREAD(0x02000000, THREAD, ("bad field_annotations at %d", i)); - // propogate failure back to caller + // propagate failure back to caller return false; } } @@ -1923,7 +1926,7 @@ THREAD)) { RC_TRACE_WITH_THREAD(0x02000000, THREAD, ("bad method_annotations at %d", i)); - // propogate failure back to caller + // propagate failure back to caller return false; } } @@ -1991,7 +1994,7 @@ method_parameter_annotations, byte_i, THREAD)) { RC_TRACE_WITH_THREAD(0x02000000, THREAD, ("bad method_parameter_annotations at %d", calc_num_parameters)); - // propogate failure back to caller + // propagate failure back to caller return false; } } @@ -2041,7 +2044,7 @@ method_default_annotations, byte_i, THREAD)) { RC_TRACE_WITH_THREAD(0x02000000, THREAD, ("bad default element_value at %d", i)); - // propogate failure back to caller + // propagate failure back to caller return false; } } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiRedefineClassesTrace.hpp --- a/src/share/vm/prims/jvmtiRedefineClassesTrace.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiRedefineClassesTrace.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -49,8 +49,8 @@ // 0x00000400 | 1024 - previous class weak reference mgmt during // add previous ops (GC) // 0x00000800 | 2048 - previous class breakpoint mgmt -// 0x00001000 | 4096 - unused -// 0x00002000 | 8192 - unused +// 0x00001000 | 4096 - detect calls to obsolete methods +// 0x00002000 | 8192 - fail a guarantee() in addition to detection // 0x00004000 | 16384 - unused // 0x00008000 | 32768 - old/new method matching/add/delete // 0x00010000 | 65536 - impl details: CP size info diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/prims/jvmtiThreadState.hpp --- a/src/share/vm/prims/jvmtiThreadState.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/prims/jvmtiThreadState.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -314,17 +314,24 @@ void update_for_pop_top_frame(); // already holding JvmtiThreadState_lock - retrieve or create JvmtiThreadState + // Can return NULL if JavaThread is exiting. inline static JvmtiThreadState *state_for_while_locked(JavaThread *thread) { assert(JvmtiThreadState_lock->is_locked(), "sanity check"); JvmtiThreadState *state = thread->jvmti_thread_state(); if (state == NULL) { + if (thread->is_exiting()) { + // don't add a JvmtiThreadState to a thread that is exiting + return NULL; + } + state = new JvmtiThreadState(thread); } return state; } // retrieve or create JvmtiThreadState + // Can return NULL if JavaThread is exiting. inline static JvmtiThreadState *state_for(JavaThread *thread) { JvmtiThreadState *state = thread->jvmti_thread_state(); if (state == NULL) { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/extendedPC.hpp --- a/src/share/vm/runtime/extendedPC.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/extendedPC.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -23,7 +23,7 @@ */ // An ExtendedPC contains the _pc from a signal handler in a platform -// independant way. +// independent way. class ExtendedPC VALUE_OBJ_CLASS_SPEC { private: diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/fprofiler.cpp --- a/src/share/vm/runtime/fprofiler.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/fprofiler.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -988,7 +988,7 @@ void ThreadProfiler::record_tick_for_running_frame(JavaThread* thread, frame fr) { - // The tick happend in real code -> non VM code + // The tick happened in real code -> non VM code if (fr.is_interpreted_frame()) { interval_data_ref()->inc_interpreted(); record_interpreted_tick(thread, fr, tp_code, FlatProfiler::bytecode_ticks); @@ -1019,7 +1019,7 @@ } void ThreadProfiler::record_tick_for_calling_frame(JavaThread* thread, frame fr) { - // The tick happend in VM code + // The tick happened in VM code interval_data_ref()->inc_native(); if (fr.is_interpreted_frame()) { record_interpreted_tick(thread, fr, tp_native, FlatProfiler::bytecode_ticks_stub); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/frame.cpp --- a/src/share/vm/runtime/frame.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/frame.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -930,7 +930,7 @@ // => process callee's arguments // // Note: The expression stack can be empty if an exception - // occured during method resolution/execution. In all + // occurred during method resolution/execution. In all // cases we empty the expression stack completely be- // fore handling the exception (the exception handling // code in the interpreter calls a blocking runtime diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/frame.inline.hpp --- a/src/share/vm/runtime/frame.inline.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/frame.inline.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -22,7 +22,7 @@ * */ -// This file holds platform-independant bodies of inline functions for frames. +// This file holds platform-independent bodies of inline functions for frames. // Note: The bcx usually contains the bcp; however during GC it contains the bci // (changed by gc_prologue() and gc_epilogue()) to be methodOop position diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/globals.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -662,6 +662,12 @@ product(ccstrlist, OnOutOfMemoryError, "", \ "Run user-defined commands on first java.lang.OutOfMemoryError") \ \ + manageable(bool, HeapDumpBeforeFullGC, false, \ + "Dump heap to file before any major stop-world GC") \ + \ + manageable(bool, HeapDumpAfterFullGC, false, \ + "Dump heap to file after any major stop-world GC") \ + \ manageable(bool, HeapDumpOnOutOfMemoryError, false, \ "Dump heap to file when java.lang.OutOfMemoryError is thrown") \ \ @@ -1971,6 +1977,12 @@ product(bool, PrintHeapAtSIGBREAK, true, \ "Print heap layout in response to SIGBREAK") \ \ + manageable(bool, PrintClassHistogramBeforeFullGC, false, \ + "Print a class histogram before any major stop-world GC") \ + \ + manageable(bool, PrintClassHistogramAfterFullGC, false, \ + "Print a class histogram after any major stop-world GC") \ + \ manageable(bool, PrintClassHistogram, false, \ "Print a histogram of class instances") \ \ diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/mutex.hpp --- a/src/share/vm/runtime/mutex.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/mutex.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -82,7 +82,7 @@ // *in that order*. If their implementations change such that these // assumptions are violated, a whole lot of code will break. -// The default length of monitor name is choosen to be 64 to avoid false sharing. +// The default length of monitor name is chosen to be 64 to avoid false sharing. static const int MONITOR_NAME_LEN = 64; class Monitor : public CHeapObj { diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/orderAccess.hpp --- a/src/share/vm/runtime/orderAccess.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/orderAccess.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -31,7 +31,7 @@ // at runtime. // // In the following, the terms 'previous', 'subsequent', 'before', -// 'after', 'preceeding' and 'succeeding' refer to program order. The +// 'after', 'preceding' and 'succeeding' refer to program order. The // terms 'down' and 'below' refer to forward load or store motion // relative to program order, while 'up' and 'above' refer to backward // motion. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/os.cpp --- a/src/share/vm/runtime/os.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/os.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -207,7 +207,8 @@ VMThread::execute(&op1); Universe::print_heap_at_SIGBREAK(); if (PrintClassHistogram) { - VM_GC_HeapInspection op1(gclog_or_tty, true /* force full GC before heap inspection */); + VM_GC_HeapInspection op1(gclog_or_tty, true /* force full GC before heap inspection */, + true /* need_prologue */); VMThread::execute(&op1); } if (JvmtiExport::should_post_data_dump()) { @@ -943,7 +944,7 @@ assert(StackRedPages > 0 && StackYellowPages > 0,"Sanity check"); address sp = current_stack_pointer(); // Check if we have StackShadowPages above the yellow zone. This parameter - // is dependant on the depth of the maximum VM call stack possible from + // is dependent on the depth of the maximum VM call stack possible from // the handler for stack overflow. 'instanceof' in the stack overflow // handler or a println uses at least 8k stack of VM and native code // respectively. diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/safepoint.cpp --- a/src/share/vm/runtime/safepoint.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/safepoint.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -369,7 +369,7 @@ // Start suspended threads for(JavaThread *current = Threads::first(); current; current = current->next()) { - // A problem occuring on Solaris is when attempting to restart threads + // A problem occurring on Solaris is when attempting to restart threads // the first #cpus - 1 go well, but then the VMThread is preempted when we get // to the next one (since it has been running the longest). We then have // to wait for a cpu to become available before we can continue restarting diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/sharedRuntime.cpp --- a/src/share/vm/runtime/sharedRuntime.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/sharedRuntime.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -377,6 +377,32 @@ throw_and_post_jvmti_exception(thread, h_exception); } +// The interpreter code to call this tracing function is only +// called/generated when TraceRedefineClasses has the right bits +// set. Since obsolete methods are never compiled, we don't have +// to modify the compilers to generate calls to this function. +// +JRT_LEAF(int, SharedRuntime::rc_trace_method_entry( + JavaThread* thread, methodOopDesc* method)) + assert(RC_TRACE_IN_RANGE(0x00001000, 0x00002000), "wrong call"); + + if (method->is_obsolete()) { + // We are calling an obsolete method, but this is not necessarily + // an error. Our method could have been redefined just after we + // fetched the methodOop from the constant pool. + + // RC_TRACE macro has an embedded ResourceMark + RC_TRACE_WITH_THREAD(0x00001000, thread, + ("calling obsolete method '%s'", + method->name_and_sig_as_C_string())); + if (RC_TRACE_ENABLED(0x00002000)) { + // this option is provided to debug calls to obsolete methods + guarantee(false, "faulting at call to an obsolete method."); + } + } + return 0; +JRT_END + // ret_pc points into caller; we are returning caller's exception handler // for given exception address SharedRuntime::compute_compiled_exc_handler(nmethod* nm, address ret_pc, Handle& exception, diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/sharedRuntime.hpp --- a/src/share/vm/runtime/sharedRuntime.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/sharedRuntime.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -166,6 +166,9 @@ static void throw_and_post_jvmti_exception(JavaThread *thread, Handle h_exception); static void throw_and_post_jvmti_exception(JavaThread *thread, symbolOop name, const char *message = NULL); + // RedefineClasses() tracing support for obsolete method entry + static int rc_trace_method_entry(JavaThread* thread, methodOopDesc* m); + // To be used as the entry point for unresolved native methods. static address native_method_throw_unsatisfied_link_error_entry(); diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/signature.hpp --- a/src/share/vm/runtime/signature.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/signature.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -266,7 +266,7 @@ class NativeSignatureIterator: public SignatureIterator { private: methodHandle _method; -// We need seperate JNI and Java offset values because in 64 bit mode, +// We need separate JNI and Java offset values because in 64 bit mode, // the argument offsets are not in sync with the Java stack. // For example a long takes up 1 "C" stack entry but 2 Java stack entries. int _offset; // The java stack offset diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/thread.hpp --- a/src/share/vm/runtime/thread.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/thread.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -1345,6 +1345,13 @@ public: // Thread local information maintained by JVMTI. void set_jvmti_thread_state(JvmtiThreadState *value) { _jvmti_thread_state = value; } + // A JvmtiThreadState is lazily allocated. This jvmti_thread_state() + // getter is used to get this JavaThread's JvmtiThreadState if it has + // one which means NULL can be returned. JvmtiThreadState::state_for() + // is used to get the specified JavaThread's JvmtiThreadState if it has + // one or it allocates a new JvmtiThreadState for the JavaThread and + // returns it. JvmtiThreadState::state_for() will return NULL only if + // the specified JavaThread is exiting. JvmtiThreadState *jvmti_thread_state() const { return _jvmti_thread_state; } static ByteSize jvmti_thread_state_offset() { return byte_offset_of(JavaThread, _jvmti_thread_state); } void set_jvmti_get_loaded_classes_closure(JvmtiGetLoadedClassesClosure* value) { _jvmti_get_loaded_classes_closure = value; } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/runtime/threadCritical.hpp --- a/src/share/vm/runtime/threadCritical.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/runtime/threadCritical.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -29,7 +29,7 @@ // // Due to race conditions during vm exit, some of the os level // synchronization primitives may not be deallocated at exit. It -// is a good plan to implement the platform dependant sections of +// is a good plan to implement the platform dependent sections of // code with resources that are recoverable during process // cleanup by the os. Calling the initialize method before use // is also problematic, it is best to use preinitialized primitives diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/services/attachListener.cpp --- a/src/share/vm/services/attachListener.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/services/attachListener.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -194,7 +194,7 @@ } live_objects_only = strcmp(arg0, "-live") == 0; } - VM_GC_HeapInspection heapop(out, live_objects_only /* request gc */); + VM_GC_HeapInspection heapop(out, live_objects_only /* request full gc */, true /* need_prologue */); VMThread::execute(&heapop); return JNI_OK; } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/services/heapDumper.cpp --- a/src/share/vm/services/heapDumper.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/services/heapDumper.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -347,7 +347,6 @@ INITIAL_CLASS_COUNT = 200 }; - // Supports I/O operations on a dump file class DumpWriter : public StackObj { @@ -1303,7 +1302,9 @@ // The VM operation that performs the heap dump class VM_HeapDumper : public VM_GC_Operation { private: - DumpWriter* _writer; + static VM_HeapDumper* _global_dumper; + static DumpWriter* _global_writer; + DumpWriter* _local_writer; bool _gc_before_heap_dump; bool _is_segmented_dump; jlong _dump_start; @@ -1311,8 +1312,20 @@ ThreadStackTrace** _stack_traces; int _num_threads; - // accessors - DumpWriter* writer() const { return _writer; } + // accessors and setters + static VM_HeapDumper* dumper() { assert(_global_dumper != NULL, "Error"); return _global_dumper; } + static DumpWriter* writer() { assert(_global_writer != NULL, "Error"); return _global_writer; } + void set_global_dumper() { + assert(_global_dumper == NULL, "Error"); + _global_dumper = this; + } + void set_global_writer() { + assert(_global_writer == NULL, "Error"); + _global_writer = _local_writer; + } + void clear_global_dumper() { _global_dumper = NULL; } + void clear_global_writer() { _global_writer = NULL; } + bool is_segmented_dump() const { return _is_segmented_dump; } void set_segmented_dump() { _is_segmented_dump = true; } jlong dump_start() const { return _dump_start; } @@ -1357,7 +1370,7 @@ VM_GC_Operation(0 /* total collections, dummy, ignored */, 0 /* total full collections, dummy, ignored */, gc_before_heap_dump) { - _writer = writer; + _local_writer = writer; _gc_before_heap_dump = gc_before_heap_dump; _is_segmented_dump = false; _dump_start = (jlong)-1; @@ -1381,6 +1394,9 @@ void doit(); }; +VM_HeapDumper* VM_HeapDumper::_global_dumper = NULL; +DumpWriter* VM_HeapDumper::_global_writer = NULL; + bool VM_HeapDumper::skip_operation() const { return false; } @@ -1479,31 +1495,28 @@ void VM_HeapDumper::do_load_class(klassOop k) { static u4 class_serial_num = 0; - VM_HeapDumper* dumper = ((VM_HeapDumper*)VMThread::vm_operation()); - DumpWriter* writer = dumper->writer(); - // len of HPROF_LOAD_CLASS record u4 remaining = 2*oopSize + 2*sizeof(u4); // write a HPROF_LOAD_CLASS for the class and each array class do { - DumperSupport::write_header(writer, HPROF_LOAD_CLASS, remaining); + DumperSupport::write_header(writer(), HPROF_LOAD_CLASS, remaining); // class serial number is just a number - writer->write_u4(++class_serial_num); + writer()->write_u4(++class_serial_num); // class ID Klass* klass = Klass::cast(k); - writer->write_classID(klass); + writer()->write_classID(klass); // add the klassOop and class serial number pair - dumper->add_class_serial_number(klass, class_serial_num); + dumper()->add_class_serial_number(klass, class_serial_num); - writer->write_u4(STACK_TRACE_ID); + writer()->write_u4(STACK_TRACE_ID); // class name ID symbolOop name = klass->name(); - writer->write_objectID(name); + writer()->write_objectID(name); // write a LOAD_CLASS record for the array type (if it exists) k = klass->array_klass_or_null(); @@ -1512,17 +1525,13 @@ // writes a HPROF_GC_CLASS_DUMP record for the given class void VM_HeapDumper::do_class_dump(klassOop k) { - VM_HeapDumper* dumper = ((VM_HeapDumper*)VMThread::vm_operation()); - DumpWriter* writer = dumper->writer(); - DumperSupport::dump_class_and_array_classes(writer, k); + DumperSupport::dump_class_and_array_classes(writer(), k); } // writes a HPROF_GC_CLASS_DUMP records for a given basic type // array (and each multi-dimensional array too) void VM_HeapDumper::do_basic_type_array_class_dump(klassOop k) { - VM_HeapDumper* dumper = ((VM_HeapDumper*)VMThread::vm_operation()); - DumpWriter* writer = dumper->writer(); - DumperSupport::dump_basic_type_array_class(writer, k); + DumperSupport::dump_basic_type_array_class(writer(), k); } // Walk the stack of the given thread. @@ -1658,6 +1667,11 @@ ch->ensure_parsability(false); } + // At this point we should be the only dumper active, so + // the following should be safe. + set_global_dumper(); + set_global_writer(); + // Write the file header - use 1.0.2 for large heaps, otherwise 1.0.1 size_t used = ch->used(); const char* header; @@ -1667,6 +1681,7 @@ } else { header = "JAVA PROFILE 1.0.1"; } + // header is few bytes long - no chance to overflow int writer()->write_raw((void*)header, (int)strlen(header)); writer()->write_u1(0); // terminator @@ -1723,6 +1738,10 @@ // fixes up the length of the dump record. In the case of a segmented // heap then the HPROF_HEAP_DUMP_END record is also written. end_of_dump(); + + // Now we clear the global variables, so that a future dumper might run. + clear_global_dumper(); + clear_global_writer(); } void VM_HeapDumper::dump_stack_traces() { @@ -1790,7 +1809,12 @@ // generate the dump VM_HeapDumper dumper(&writer, _gc_before_heap_dump); - VMThread::execute(&dumper); + if (Thread::current()->is_VM_thread()) { + assert(SafepointSynchronize::is_at_safepoint(), "Expected to be called at a safepoint"); + dumper.doit(); + } else { + VMThread::execute(&dumper); + } // close dump file and record any error that the writer may have encountered writer.close(); @@ -1845,49 +1869,68 @@ } } - -// Called by error reporting +// Called by error reporting by a single Java thread outside of a JVM safepoint, +// or by heap dumping by the VM thread during a (GC) safepoint. Thus, these various +// callers are strictly serialized and guaranteed not to interfere below. For more +// general use, however, this method will need modification to prevent +// inteference when updating the static variables base_path and dump_file_seq below. void HeapDumper::dump_heap() { - static char path[JVM_MAXPATHLEN]; + static char base_path[JVM_MAXPATHLEN] = {'\0'}; + static uint dump_file_seq = 0; + char my_path[JVM_MAXPATHLEN] = {'\0'}; // The dump file defaults to java_pid.hprof in the current working // directory. HeapDumpPath= can be used to specify an alternative // dump file name or a directory where dump file is created. - bool use_default_filename = true; - if (HeapDumpPath == NULL || HeapDumpPath[0] == '\0') { - path[0] = '\0'; // HeapDumpPath= not specified - } else { - assert(strlen(HeapDumpPath) < sizeof(path), "HeapDumpPath too long"); - strcpy(path, HeapDumpPath); - // check if the path is a directory (must exist) - DIR* dir = os::opendir(path); - if (dir == NULL) { - use_default_filename = false; + if (dump_file_seq == 0) { // first time in, we initialize base_path + bool use_default_filename = true; + if (HeapDumpPath == NULL || HeapDumpPath[0] == '\0') { + // HeapDumpPath= not specified } else { - // HeapDumpPath specified a directory. We append a file separator - // (if needed). - os::closedir(dir); - size_t fs_len = strlen(os::file_separator()); - if (strlen(path) >= fs_len) { - char* end = path; - end += (strlen(path) - fs_len); - if (strcmp(end, os::file_separator()) != 0) { - assert(strlen(path) + strlen(os::file_separator()) < sizeof(path), - "HeapDumpPath too long"); - strcat(path, os::file_separator()); + assert(strlen(HeapDumpPath) < sizeof(base_path), "HeapDumpPath too long"); + strcpy(base_path, HeapDumpPath); + // check if the path is a directory (must exist) + DIR* dir = os::opendir(base_path); + if (dir == NULL) { + use_default_filename = false; + } else { + // HeapDumpPath specified a directory. We append a file separator + // (if needed). + os::closedir(dir); + size_t fs_len = strlen(os::file_separator()); + if (strlen(base_path) >= fs_len) { + char* end = base_path; + end += (strlen(base_path) - fs_len); + if (strcmp(end, os::file_separator()) != 0) { + assert(strlen(base_path) + strlen(os::file_separator()) < sizeof(base_path), + "HeapDumpPath too long"); + strcat(base_path, os::file_separator()); + } } } } + // If HeapDumpPath wasn't a file name then we append the default name + if (use_default_filename) { + char fn[32]; + sprintf(fn, "java_pid%d", os::current_process_id()); + assert(strlen(base_path) + strlen(fn) < sizeof(base_path), "HeapDumpPath too long"); + strcat(base_path, fn); + } + assert(strlen(base_path) < sizeof(my_path), "Buffer too small"); + strcpy(my_path, base_path); + } else { + // Append a sequence number id for dumps following the first + char fn[33]; + sprintf(fn, ".%d", dump_file_seq); + assert(strlen(base_path) + strlen(fn) < sizeof(my_path), "HeapDumpPath too long"); + strcpy(my_path, base_path); + strcat(my_path, fn); } - // If HeapDumpPath wasn't a file name then we append the default name - if (use_default_filename) { - char fn[32]; - sprintf(fn, "java_pid%d.hprof", os::current_process_id()); - assert(strlen(path) + strlen(fn) < sizeof(path), "HeapDumpPath too long"); - strcat(path, fn); - } + dump_file_seq++; // increment seq number for next time we dump + assert(strlen(".hprof") + strlen(my_path) < sizeof(my_path), "HeapDumpPath too long"); + strcat(my_path, ".hprof"); HeapDumper dumper(false /* no GC before heap dump */, true /* send to tty */); - dumper.dump(path); + dumper.dump(my_path); } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/services/heapDumper.hpp --- a/src/share/vm/services/heapDumper.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/services/heapDumper.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -53,7 +53,7 @@ public: HeapDumper(bool gc_before_heap_dump) : - _gc_before_heap_dump(gc_before_heap_dump), _error(NULL), _print_to_tty(false) { } + _gc_before_heap_dump(gc_before_heap_dump), _error(NULL), _print_to_tty(false) { } HeapDumper(bool gc_before_heap_dump, bool print_to_tty) : _gc_before_heap_dump(gc_before_heap_dump), _error(NULL), _print_to_tty(print_to_tty) { } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/utilities/globalDefinitions.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -881,7 +881,7 @@ i++; p *= 2; } // p = 2^(i+1) && x < p (i.e., 2^i <= x < 2^(i+1)) - // (if p = 0 then overflow occured and i = 31) + // (if p = 0 then overflow occurred and i = 31) return i; } @@ -895,7 +895,7 @@ i++; p *= 2; } // p = 2^(i+1) && x < p (i.e., 2^i <= x < 2^(i+1)) - // (if p = 0 then overflow occured and i = 63) + // (if p = 0 then overflow occurred and i = 63) return i; } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/utilities/growableArray.cpp --- a/src/share/vm/utilities/growableArray.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/utilities/growableArray.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -43,11 +43,13 @@ #endif void* GenericGrowableArray::raw_allocate(int elementSize) { + assert(_max >= 0, "integer overflow"); + size_t byte_size = elementSize * (size_t) _max; if (on_stack()) { - return (void*)resource_allocate_bytes(elementSize * _max); + return (void*)resource_allocate_bytes(byte_size); } else if (on_C_heap()) { - return (void*)AllocateHeap(elementSize * _max, "GrET in " __FILE__); + return (void*)AllocateHeap(byte_size, "GrET in " __FILE__); } else { - return _arena->Amalloc(elementSize * _max); + return _arena->Amalloc(byte_size); } } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/utilities/vmError.cpp --- a/src/share/vm/utilities/vmError.cpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/utilities/vmError.cpp Thu Mar 12 18:16:36 2009 -0700 @@ -306,7 +306,7 @@ strncpy(buf, file, buflen); if (len + 10 < buflen) { - sprintf(buf + len, ":" SIZE_FORMAT, _lineno); + sprintf(buf + len, ":%d", _lineno); } st->print(" (%s)", buf); } else { @@ -420,7 +420,7 @@ if (fr.sp()) { st->print(", sp=" PTR_FORMAT, fr.sp()); - st->print(", free space=%dk", + st->print(", free space=%" INTPTR_FORMAT "k", ((intptr_t)fr.sp() - (intptr_t)stack_bottom) >> 10); } diff -r ce2272390558 -r 7bb995fbd3c0 src/share/vm/utilities/vmError.hpp --- a/src/share/vm/utilities/vmError.hpp Mon Mar 09 13:34:00 2009 -0700 +++ b/src/share/vm/utilities/vmError.hpp Thu Mar 12 18:16:36 2009 -0700 @@ -50,7 +50,7 @@ // additional info for VM internal errors const char * _filename; - size_t _lineno; + int _lineno; // used by fatal error handler int _current_step; diff -r ce2272390558 -r 7bb995fbd3c0 test/compiler/6797305/Test6797305.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6797305/Test6797305.java Thu Mar 12 18:16:36 2009 -0700 @@ -0,0 +1,114 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/** + * @test + * @bug 6797305 + * @summary Add LoadUB and LoadUI opcode class + * + * @run main/othervm -Xcomp -XX:CompileOnly=Test6797305.loadB,Test6797305.loadB2L,Test6797305.loadUB,Test6797305.loadUBmask,Test6797305.loadUB2L,Test6797305.loadS,Test6797305.loadS2L,Test6797305.loadUS,Test6797305.loadUSmask,Test6797305.loadUS2L,Test6797305.loadI,Test6797305.loadI2L,Test6797305.loadUI2L,Test6797305.loadL Test6797305 + */ + +public class Test6797305 { + static final byte[] ba = new byte[] { -1 }; + static final short[] sa = new short[] { -1 }; + static final int[] ia = new int[] { -1 }; + static final long[] la = new long[] { -1 }; + + public static void main(String[] args) + { + long b = loadB(ba); + if (b != -1) + throw new InternalError("loadB failed: " + b + " != " + -1); + + long b2l = loadB2L(ba); + if (b2l != -1L) + throw new InternalError("loadB2L failed: " + b2l + " != " + -1L); + + int ub = loadUB(ba); + if (ub != 0xFF) + throw new InternalError("loadUB failed: " + ub + " != " + 0xFF); + + int ubmask = loadUBmask(ba); + if (ubmask != 0xFE) + throw new InternalError("loadUBmask failed: " + ubmask + " != " + 0xFE); + + long ub2l = loadUB2L(ba); + if (ub2l != 0xFFL) + throw new InternalError("loadUB2L failed: " + ub2l + " != " + 0xFFL); + + int s = loadS(sa); + if (s != -1) + throw new InternalError("loadS failed: " + s + " != " + -1); + + long s2l = loadS2L(sa); + if (s2l != -1L) + throw new InternalError("loadS2L failed: " + s2l + " != " + -1L); + + int us = loadUS(sa); + if (us != 0xFFFF) + throw new InternalError("loadUS failed: " + us + " != " + 0xFFFF); + + int usmask = loadUSmask(sa); + if (usmask != 0xFFFE) + throw new InternalError("loadUBmask failed: " + ubmask + " != " + 0xFFFE); + + long us2l = loadUS2L(sa); + if (us2l != 0xFFFFL) + throw new InternalError("loadUS2L failed: " + us2l + " != " + 0xFFFFL); + + int i = loadI(ia); + if (i != -1) + throw new InternalError("loadI failed: " + i + " != " + -1); + + long i2l = loadI2L(ia); + if (i2l != -1L) + throw new InternalError("loadI2L failed: " + i2l + " != " + -1L); + + long ui2l = loadUI2L(ia); + if (ui2l != 0xFFFFFFFFL) + throw new InternalError("loadUI2L failed: " + ui2l + " != " + 0xFFFFFFFFL); + + long l = loadL(la); + if (l != -1L) + throw new InternalError("loadL failed: " + l + " != " + -1L); + } + + static int loadB (byte[] ba) { return ba[0]; } + static long loadB2L (byte[] ba) { return ba[0]; } + static int loadUB (byte[] ba) { return ba[0] & 0xFF; } + static int loadUBmask(byte[] ba) { return ba[0] & 0xFE; } + static long loadUB2L (byte[] ba) { return ba[0] & 0xFF; } + + static int loadS (short[] sa) { return sa[0]; } + static long loadS2L (short[] sa) { return sa[0]; } + static int loadUS (short[] sa) { return sa[0] & 0xFFFF; } + static int loadUSmask(short[] sa) { return sa[0] & 0xFFFE; } + static long loadUS2L (short[] sa) { return sa[0] & 0xFFFF; } + + static int loadI (int[] ia) { return ia[0]; } + static long loadI2L (int[] ia) { return ia[0]; } + static long loadUI2L (int[] ia) { return ia[0] & 0xFFFFFFFFL; } + + static long loadL (long[] la) { return la[0]; } +}