# HG changeset patch # User trims # Date 1208903778 25200 # Node ID ad0b851458ff9d1d490ed2d79bb84f75a9fdb753 # Parent 8b0b3490194fb2239c272ec93f2e6b66e2fd358b# Parent 6cc3576e5142a09196ba43f8e11a32b8b78b5d5f Merge diff -r 8b0b3490194f -r ad0b851458ff agent/src/os/linux/ps_core.c --- a/agent/src/os/linux/ps_core.c Wed Apr 09 11:18:58 2008 -0700 +++ b/agent/src/os/linux/ps_core.c Tue Apr 22 15:36:18 2008 -0700 @@ -518,10 +518,10 @@ } static ps_prochandle_ops core_ops = { - release: core_release, - p_pread: core_read_data, - p_pwrite: core_write_data, - get_lwp_regs: core_get_lwp_regs + .release= core_release, + .p_pread= core_read_data, + .p_pwrite= core_write_data, + .get_lwp_regs= core_get_lwp_regs }; // read regs and create thread from NT_PRSTATUS entries from core file diff -r 8b0b3490194f -r ad0b851458ff agent/src/os/linux/ps_proc.c --- a/agent/src/os/linux/ps_proc.c Wed Apr 09 11:18:58 2008 -0700 +++ b/agent/src/os/linux/ps_proc.c Tue Apr 22 15:36:18 2008 -0700 @@ -291,10 +291,10 @@ } static ps_prochandle_ops process_ops = { - release: process_cleanup, - p_pread: process_read_data, - p_pwrite: process_write_data, - get_lwp_regs: process_get_lwp_regs + .release= process_cleanup, + .p_pread= process_read_data, + .p_pwrite= process_write_data, + .get_lwp_regs= process_get_lwp_regs }; // attach to the process. One and only one exposed stuff diff -r 8b0b3490194f -r ad0b851458ff build/linux/Makefile --- a/build/linux/Makefile Wed Apr 09 11:18:58 2008 -0700 +++ b/build/linux/Makefile Tue Apr 22 15:36:18 2008 -0700 @@ -80,6 +80,11 @@ MFLAGS += " LP64=1 " endif +# pass USE_SUNCC further, through MFLAGS +ifdef USE_SUNCC + MFLAGS += " USE_SUNCC=1 " +endif + # The following renders pathnames in generated Makefiles valid on # machines other than the machine containing the build tree. # diff -r 8b0b3490194f -r ad0b851458ff build/linux/makefiles/amd64.make --- a/build/linux/makefiles/amd64.make Wed Apr 09 11:18:58 2008 -0700 +++ b/build/linux/makefiles/amd64.make Tue Apr 22 15:36:18 2008 -0700 @@ -35,6 +35,8 @@ CFLAGS += -D_LP64=1 # The serviceability agent relies on frame pointer (%rbp) to walk thread stack -CFLAGS += -fno-omit-frame-pointer +ifndef USE_SUNCC + CFLAGS += -fno-omit-frame-pointer +endif OPT_CFLAGS/compactingPermGenGen.o = -O1 diff -r 8b0b3490194f -r ad0b851458ff build/linux/makefiles/buildtree.make --- a/build/linux/makefiles/buildtree.make Wed Apr 09 11:18:58 2008 -0700 +++ b/build/linux/makefiles/buildtree.make Tue Apr 22 15:36:18 2008 -0700 @@ -63,7 +63,11 @@ # For now, until the compiler is less wobbly: TESTFLAGS = -Xbatch -showversion -PLATFORM_FILE = $(GAMMADIR)/build/$(OS_FAMILY)/platform_$(BUILDARCH) +ifdef USE_SUNCC +PLATFORM_FILE = $(GAMMADIR)/build/$(OS_FAMILY)/platform_$(BUILDARCH).suncc +else +PLATFORM_FILE = $(GAMMADIR)/build/$(OS_FAMILY)/platform_$(BUILDARCH) +endif ifdef FORCE_TIERED ifeq ($(VARIANT),tiered) diff -r 8b0b3490194f -r ad0b851458ff build/linux/makefiles/sparcWorks.make --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/linux/makefiles/sparcWorks.make Tue Apr 22 15:36:18 2008 -0700 @@ -0,0 +1,93 @@ +# +# Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, +# CA 95054 USA or visit www.sun.com if you need additional information or +# have any questions. +# +# + +#------------------------------------------------------------------------ +# CC, CPP & AS + +CPP = CC +CC = cc +AS = $(CC) -c + +ARCHFLAG = $(ARCHFLAG/$(BUILDARCH)) +ARCHFLAG/i486 = -m32 +ARCHFLAG/amd64 = -m64 + +CFLAGS += $(ARCHFLAG) +AOUT_FLAGS += $(ARCHFLAG) +LFLAGS += $(ARCHFLAG) +ASFLAGS += $(ARCHFLAG) + +#------------------------------------------------------------------------ +# Compiler flags + +# position-independent code +PICFLAG = -KPIC + +CFLAGS += $(PICFLAG) +# no more exceptions +CFLAGS += -features=no%except +# Reduce code bloat by reverting back to 5.0 behavior for static initializers +CFLAGS += -features=no%split_init +# allow zero sized arrays +CFLAGS += -features=zla + +# Use C++ Interpreter +ifdef CC_INTERP + CFLAGS += -DCC_INTERP +endif + +# We don't need libCstd.so and librwtools7.so, only libCrun.so +CFLAGS += -library=Crun +LIBS += -lCrun + +CFLAGS += -mt +LFLAGS += -mt + +# Compiler warnings are treated as errors +#WARNINGS_ARE_ERRORS = -errwarn=%all +CFLAGS_WARN/DEFAULT = $(WARNINGS_ARE_ERRORS) +# Special cases +CFLAGS_WARN/BYFILE = $(CFLAGS_WARN/$@)$(CFLAGS_WARN/DEFAULT$(CFLAGS_WARN/$@)) + +# The flags to use for an Optimized build +OPT_CFLAGS+=-xO4 +OPT_CFLAGS/NOOPT=-xO0 + +#------------------------------------------------------------------------ +# Linker flags + +# Use $(MAPFLAG:FILENAME=real_file_name) to specify a map file. +MAPFLAG = -Wl,--version-script=FILENAME + +# Use $(SONAMEFLAG:SONAME=soname) to specify the intrinsic name of a shared obj +SONAMEFLAG = -h SONAME + +# Build shared library +SHARED_FLAG = -G + +#------------------------------------------------------------------------ +# Debug flags +DEBUG_CFLAGS += -g +FASTDEBUG_CFLAGS = -g0 + diff -r 8b0b3490194f -r ad0b851458ff build/linux/platform_amd64.suncc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/linux/platform_amd64.suncc Tue Apr 22 15:36:18 2008 -0700 @@ -0,0 +1,17 @@ +os_family = linux + +arch = x86 + +arch_model = x86_64 + +os_arch = linux_x86 + +os_arch_model = linux_x86_64 + +lib_arch = amd64 + +compiler = sparcWorks + +gnu_dis_arch = amd64 + +sysdefs = -DLINUX -DSPARC_WORKS -D_GNU_SOURCE -DAMD64 diff -r 8b0b3490194f -r ad0b851458ff build/linux/platform_i486.suncc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build/linux/platform_i486.suncc Tue Apr 22 15:36:18 2008 -0700 @@ -0,0 +1,17 @@ +os_family = linux + +arch = x86 + +arch_model = x86_32 + +os_arch = linux_x86 + +os_arch_model = linux_x86_32 + +lib_arch = i386 + +compiler = sparcWorks + +gnu_dis_arch = i386 + +sysdefs = -DLINUX -DSPARC_WORKS -D_GNU_SOURCE -DIA32 diff -r 8b0b3490194f -r ad0b851458ff build/solaris/makefiles/amd64.make --- a/build/solaris/makefiles/amd64.make Wed Apr 09 11:18:58 2008 -0700 +++ b/build/solaris/makefiles/amd64.make Tue Apr 22 15:36:18 2008 -0700 @@ -19,7 +19,7 @@ # Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, # CA 95054 USA or visit www.sun.com if you need additional information or # have any questions. -# +# # # Must also specify if CPU is little endian @@ -45,6 +45,10 @@ OPT_CFLAGS/generateOptoStub.o = -xO2 OPT_CFLAGS/thread.o = -xO2 +# Work around for 6624782 +OPT_CFLAGS/instanceKlass.o = -Qoption ube -no_a2lf +OPT_CFLAGS/objArrayKlass.o = -Qoption ube -no_a2lf + else ifeq ("${Platform_compiler}", "gcc") @@ -58,6 +62,6 @@ # error _JUNK2_ := $(shell echo >&2 \ "*** ERROR: this compiler is not yet supported by this code base!") - @exit 1 + @exit 1 endif endif diff -r 8b0b3490194f -r ad0b851458ff build/windows/makefiles/compile.make --- a/build/windows/makefiles/compile.make Wed Apr 09 11:18:58 2008 -0700 +++ b/build/windows/makefiles/compile.make Tue Apr 22 15:36:18 2008 -0700 @@ -44,6 +44,10 @@ # /Od Disable all optimizations # # NOTE: Normally following any of the above with a '-' will turn off that flag +# +# 6655385: For VS2003/2005 we now specify /Oy- (disable frame pointer +# omission.) This has little to no effect on performance while vastly +# improving the quality of crash log stack traces involving jvm.dll. # These are always used in all compiles CPP_FLAGS=/nologo /W3 /WX @@ -141,14 +145,14 @@ !endif !if "$(COMPILER_NAME)" == "VS2003" -PRODUCT_OPT_OPTION = /O2 -FASTDEBUG_OPT_OPTION = /O2 +PRODUCT_OPT_OPTION = /O2 /Oy- +FASTDEBUG_OPT_OPTION = /O2 /Oy- DEBUG_OPT_OPTION = /Od !endif !if "$(COMPILER_NAME)" == "VS2005" -PRODUCT_OPT_OPTION = /O2 -FASTDEBUG_OPT_OPTION = /O2 +PRODUCT_OPT_OPTION = /O2 /Oy- +FASTDEBUG_OPT_OPTION = /O2 /Oy- DEBUG_OPT_OPTION = /Od GX_OPTION = /EHsc # This VS2005 compiler has /GS as a default and requires bufferoverflowU.lib @@ -165,8 +169,8 @@ # Compile for space above time. !if "$(Variant)" == "kernel" -PRODUCT_OPT_OPTION = /O1 -FASTDEBUG_OPT_OPTION = /O1 +PRODUCT_OPT_OPTION = /O1 /Oy- +FASTDEBUG_OPT_OPTION = /O1 /Oy- DEBUG_OPT_OPTION = /Od !endif diff -r 8b0b3490194f -r ad0b851458ff make/hotspot_version --- a/make/hotspot_version Wed Apr 09 11:18:58 2008 -0700 +++ b/make/hotspot_version Tue Apr 22 15:36:18 2008 -0700 @@ -35,7 +35,7 @@ HS_MAJOR_VER=12 HS_MINOR_VER=0 -HS_BUILD_NUMBER=01 +HS_BUILD_NUMBER=03 JDK_MAJOR_VER=1 JDK_MINOR_VER=7 diff -r 8b0b3490194f -r ad0b851458ff src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -2037,7 +2037,7 @@ int LIR_Assembler::shift_amount(BasicType t) { - int elem_size = type2aelembytes[t]; + int elem_size = type2aelembytes(t); switch (elem_size) { case 1 : return 0; case 2 : return 1; @@ -2360,7 +2360,7 @@ op->tmp2()->as_register(), op->tmp3()->as_register(), arrayOopDesc::header_size(op->type()), - type2aelembytes[op->type()], + type2aelembytes(op->type()), op->klass()->as_register(), *op->stub()->entry()); } diff -r 8b0b3490194f -r ad0b851458ff src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp --- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -179,7 +179,7 @@ LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type, bool needs_card_mark) { - int elem_size = type2aelembytes[type]; + int elem_size = type2aelembytes(type); int shift = exact_log2(elem_size); LIR_Opr base_opr; diff -r 8b0b3490194f -r ad0b851458ff src/cpu/sparc/vm/sparc.ad --- a/src/cpu/sparc/vm/sparc.ad Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/sparc/vm/sparc.ad Tue Apr 22 15:36:18 2008 -0700 @@ -6023,7 +6023,7 @@ ins_pipe(ialu_imm); %} -instruct cmovII_U_reg(cmpOp cmp, flagsRegU icc, iRegI dst, iRegI src) %{ +instruct cmovII_U_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); ins_cost(150); size(4); @@ -6032,7 +6032,7 @@ ins_pipe(ialu_reg); %} -instruct cmovII_U_imm(cmpOp cmp, flagsRegU icc, iRegI dst, immI11 src) %{ +instruct cmovII_U_imm(cmpOpU cmp, flagsRegU icc, iRegI dst, immI11 src) %{ match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); ins_cost(140); size(4); diff -r 8b0b3490194f -r ad0b851458ff src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -2911,6 +2911,7 @@ // These entry points require SharedInfo::stack0 to be set up in non-core builds StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); + StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); diff -r 8b0b3490194f -r ad0b851458ff src/cpu/sparc/vm/vm_version_sparc.cpp --- a/src/cpu/sparc/vm/vm_version_sparc.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -28,6 +28,12 @@ int VM_Version::_features = VM_Version::unknown_m; const char* VM_Version::_features_str = ""; +bool VM_Version::is_niagara1_plus() { + // This is a placeholder until the real test is determined. + return is_niagara1() && + (os::processor_count() > maximum_niagara1_processor_count()); +} + void VM_Version::initialize() { _features = determine_features(); PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes(); @@ -160,3 +166,13 @@ void VM_Version::revert() { _features = saved_features; } + +unsigned int VM_Version::calc_parallel_worker_threads() { + unsigned int result; + if (is_niagara1_plus()) { + result = nof_parallel_worker_threads(5, 16, 8); + } else { + result = nof_parallel_worker_threads(5, 8, 8); + } + return result; +} diff -r 8b0b3490194f -r ad0b851458ff src/cpu/sparc/vm/vm_version_sparc.hpp --- a/src/cpu/sparc/vm/vm_version_sparc.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/sparc/vm/vm_version_sparc.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -64,6 +64,11 @@ static bool is_niagara1(int features) { return (features & niagara1_m) == niagara1_m; } + static int maximum_niagara1_processor_count() { return 32; } + // Returns true if the platform is in the niagara line and + // newer than the niagara1. + static bool is_niagara1_plus(); + public: // Initialization static void initialize(); @@ -129,4 +134,7 @@ // Override the Abstract_VM_Version implementation. static uint page_size_count() { return is_sun4v() ? 4 : 2; } + + // Calculates the number of parallel threads + static unsigned int calc_parallel_worker_threads(); }; diff -r 8b0b3490194f -r ad0b851458ff src/cpu/sparc/vm/vtableStubs_sparc.cpp --- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -175,17 +175,12 @@ // %%%% Could load both offset and interface in one ldx, if they were // in the opposite order. This would save a load. __ ld_ptr(L0, base + itableOffsetEntry::interface_offset_in_bytes(), L1); -#ifdef ASSERT - Label ok; - // Check that entry is non-null and an Oop - __ bpr(Assembler::rc_nz, false, Assembler::pt, L1, ok); - __ delayed()->nop(); - __ stop("null entry point found in itable's offset table"); - __ bind(ok); - __ verify_oop(L1); -#endif // ASSERT - __ cmp(G5_interface, L1); + // If the entry is NULL then we've reached the end of the table + // without finding the expected interface, so throw an exception + Label throw_icce; + __ bpr(Assembler::rc_z, false, Assembler::pn, L1, throw_icce); + __ delayed()->cmp(G5_interface, L1); __ brx(Assembler::notEqual, true, Assembler::pn, search); __ delayed()->add(L0, itableOffsetEntry::size() * wordSize, L0); @@ -223,24 +218,30 @@ __ JMP(G3_scratch, 0); __ delayed()->nop(); + __ bind(throw_icce); + Address icce(G3_scratch, StubRoutines::throw_IncompatibleClassChangeError_entry()); + __ jump_to(icce, 0); + __ delayed()->restore(); + masm->flush(); + + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + s->set_exception_points(npe_addr, ame_addr); return s; } int VtableStub::pd_code_size_limit(bool is_vtable_stub) { - if (TraceJumps || DebugVtables || CountCompiledCalls || VerifyOops) return 999; + if (TraceJumps || DebugVtables || CountCompiledCalls || VerifyOops) return 1000; else { const int slop = 2*BytesPerInstWord; // sethi;add (needed for long offsets) if (is_vtable_stub) { const int basic = 5*BytesPerInstWord; // ld;ld;ld,jmp,nop return basic + slop; } else { -#ifdef ASSERT - return 999; -#endif // ASSERT - const int basic = 17*BytesPerInstWord; // save, ld, ld, sll, and, add, add, ld, cmp, br, add, ld, add, ld, ld, jmp, restore + // save, ld, ld, sll, and, add, add, ld, cmp, br, add, ld, add, ld, ld, jmp, restore, sethi, jmpl, restore + const int basic = (20 LP64_ONLY(+ 6)) * BytesPerInstWord; return (basic + slop); } } @@ -252,29 +253,3 @@ const unsigned int icache_line_size = 32; return icache_line_size; } - - -//Reconciliation History -// 1.2 97/12/09 17:13:31 vtableStubs_i486.cpp -// 1.4 98/01/21 19:18:37 vtableStubs_i486.cpp -// 1.5 98/02/13 16:33:55 vtableStubs_i486.cpp -// 1.7 98/03/05 17:17:28 vtableStubs_i486.cpp -// 1.9 98/05/18 09:26:17 vtableStubs_i486.cpp -// 1.10 98/05/26 16:28:13 vtableStubs_i486.cpp -// 1.11 98/05/27 08:51:35 vtableStubs_i486.cpp -// 1.12 98/06/15 15:04:12 vtableStubs_i486.cpp -// 1.13 98/07/28 18:44:22 vtableStubs_i486.cpp -// 1.15 98/08/28 11:31:19 vtableStubs_i486.cpp -// 1.16 98/09/02 12:58:31 vtableStubs_i486.cpp -// 1.17 98/09/04 12:15:52 vtableStubs_i486.cpp -// 1.18 98/11/19 11:55:24 vtableStubs_i486.cpp -// 1.19 99/01/12 14:57:56 vtableStubs_i486.cpp -// 1.20 99/01/19 17:42:52 vtableStubs_i486.cpp -// 1.22 99/01/21 10:29:25 vtableStubs_i486.cpp -// 1.30 99/06/02 15:27:39 vtableStubs_i486.cpp -// 1.26 99/06/24 14:25:07 vtableStubs_i486.cpp -// 1.23 99/02/22 14:37:52 vtableStubs_i486.cpp -// 1.28 99/06/29 18:06:17 vtableStubs_i486.cpp -// 1.29 99/07/22 17:03:44 vtableStubs_i486.cpp -// 1.30 99/08/11 09:33:27 vtableStubs_i486.cpp -//End diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/assembler_x86_32.cpp --- a/src/cpu/x86/vm/assembler_x86_32.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86_32.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -2672,6 +2672,22 @@ emit_sse_operand(dst, src); } +void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse2(), ""); + + emit_byte(0xF3); + emit_byte(0x0F); + emit_byte(0xE6); + emit_sse_operand(dst, src); +} + +void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse2(), ""); + + emit_byte(0x0F); + emit_byte(0x5B); + emit_sse_operand(dst, src); +} emit_sse_instruction(andps, sse, 0, 0x54, XMMRegister, XMMRegister); emit_sse_instruction(andpd, sse2, 0x66, 0x54, XMMRegister, XMMRegister); diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/assembler_x86_32.hpp --- a/src/cpu/x86/vm/assembler_x86_32.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86_32.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -901,6 +901,8 @@ void cvtss2sd(XMMRegister dst, XMMRegister src); void cvtsd2ss(XMMRegister dst, Address src); // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value void cvtsd2ss(XMMRegister dst, XMMRegister src); + void cvtdq2pd(XMMRegister dst, XMMRegister src); + void cvtdq2ps(XMMRegister dst, XMMRegister src); void cvtsi2ss(XMMRegister dst, Address src); // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value void cvtsi2ss(XMMRegister dst, Register src); diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/assembler_x86_64.cpp --- a/src/cpu/x86/vm/assembler_x86_64.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86_64.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1304,7 +1304,7 @@ emit_operand(src, dst); } -void Assembler::mov64(Register dst, int64_t imm64) { +void Assembler::mov64(Register dst, intptr_t imm64) { InstructionMark im(this); int encode = prefixq_and_encode(dst->encoding()); emit_byte(0xB8 | encode); @@ -1331,7 +1331,7 @@ emit_operand(dst, src); } -void Assembler::mov64(Address dst, int64_t imm32) { +void Assembler::mov64(Address dst, intptr_t imm32) { assert(is_simm32(imm32), "lost bits"); InstructionMark im(this); prefixq(dst); @@ -3372,6 +3372,21 @@ emit_byte(0xC0 | encode); } +void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { + emit_byte(0xF3); + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0xE6); + emit_byte(0xC0 | encode); +} + +void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { + int encode = prefix_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0x5B); + emit_byte(0xC0 | encode); +} + void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { emit_byte(0xF2); int encode = prefix_and_encode(dst->encoding(), src->encoding()); diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/assembler_x86_64.hpp --- a/src/cpu/x86/vm/assembler_x86_64.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/assembler_x86_64.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -922,6 +922,8 @@ void cvttsd2siq(Register dst, XMMRegister src); // truncates void cvtss2sd(XMMRegister dst, XMMRegister src); void cvtsd2ss(XMMRegister dst, XMMRegister src); + void cvtdq2pd(XMMRegister dst, XMMRegister src); + void cvtdq2ps(XMMRegister dst, XMMRegister src); void pxor(XMMRegister dst, Address src); // Xor Packed Byte Integer Values void pxor(XMMRegister dst, XMMRegister src); // Xor Packed Byte Integer Values diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -546,8 +546,8 @@ // set rsi.edi to the end of the arrays (arrays have same length) // negate the index - __ leal(rsi, Address(rsi, rax, Address::times_2, type2aelembytes[T_CHAR])); - __ leal(rdi, Address(rdi, rax, Address::times_2, type2aelembytes[T_CHAR])); + __ leal(rsi, Address(rsi, rax, Address::times_2, type2aelembytes(T_CHAR))); + __ leal(rdi, Address(rdi, rax, Address::times_2, type2aelembytes(T_CHAR))); __ negl(rax); // compare the strings in a loop @@ -1232,7 +1232,7 @@ NEEDS_CLEANUP; // This could be static? Address::ScaleFactor LIR_Assembler::array_element_size(BasicType type) const { - int elem_size = type2aelembytes[type]; + int elem_size = type2aelembytes(type); switch (elem_size) { case 1: return Address::times_1; case 2: return Address::times_2; @@ -2739,7 +2739,7 @@ assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point"); - int elem_size = type2aelembytes[basic_type]; + int elem_size = type2aelembytes(basic_type); int shift_amount; Address::ScaleFactor scale; diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -151,7 +151,7 @@ LIR_Address* addr; if (index_opr->is_constant()) { - int elem_size = type2aelembytes[type]; + int elem_size = type2aelembytes(type); addr = new LIR_Address(array_opr, offset_in_bytes + index_opr->as_jint() * elem_size, type); } else { diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/stubGenerator_x86_32.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1416,8 +1416,8 @@ // ======== end loop ======== // It was a real error; we must depend on the caller to finish the job. - // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops. - // Emit GC store barriers for the oops we have copied (r14 + rdx), + // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops. + // Emit GC store barriers for the oops we have copied (length_arg + count), // and report their number to the caller. __ addl(count, length_arg); // transfers = (length - remaining) __ movl(rax, count); // save the value @@ -1430,6 +1430,7 @@ // Come here on success only. __ BIND(L_do_card_marks); __ movl(count, length_arg); + __ movl(to, to_arg); // reload gen_write_ref_array_post_barrier(to, count); __ xorl(rax, rax); // return 0 on success @@ -2151,6 +2152,7 @@ // These entry points require SharedInfo::stack0 to be set up in non-core builds // and need to be relocatable, so they each fabricate a RuntimeStub internally. StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); + StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_ArithmeticException), true); StubRoutines::_throw_NullPointerException_entry = generate_throw_exception("NullPointerException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException), true); StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/stubGenerator_x86_64.cpp --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -2832,6 +2832,13 @@ throw_AbstractMethodError), false); + StubRoutines::_throw_IncompatibleClassChangeError_entry = + generate_throw_exception("IncompatibleClassChangeError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_IncompatibleClassChangeError), + false); + StubRoutines::_throw_ArithmeticException_entry = generate_throw_exception("ArithmeticException throw_exception", CAST_FROM_FN_PTR(address, diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/vm_version_x86_32.cpp --- a/src/cpu/x86/vm/vm_version_x86_32.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/vm_version_x86_32.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -321,6 +321,20 @@ UseXmmRegToRegMoveAll = false; } } + if( FLAG_IS_DEFAULT(UseXmmI2F) ) { + if( supports_sse4a() ) { + UseXmmI2F = true; + } else { + UseXmmI2F = false; + } + } + if( FLAG_IS_DEFAULT(UseXmmI2D) ) { + if( supports_sse4a() ) { + UseXmmI2D = true; + } else { + UseXmmI2D = false; + } + } } if( is_intel() ) { // Intel cpus specific settings diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/vm_version_x86_64.cpp --- a/src/cpu/x86/vm/vm_version_x86_64.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/vm_version_x86_64.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -265,6 +265,20 @@ UseXmmRegToRegMoveAll = false; } } + if( FLAG_IS_DEFAULT(UseXmmI2F) ) { + if( supports_sse4a() ) { + UseXmmI2F = true; + } else { + UseXmmI2F = false; + } + } + if( FLAG_IS_DEFAULT(UseXmmI2D) ) { + if( supports_sse4a() ) { + UseXmmI2D = true; + } else { + UseXmmI2D = false; + } + } } if( is_intel() ) { // Intel cpus specific settings diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/vtableStubs_x86_32.cpp --- a/src/cpu/x86/vm/vtableStubs_x86_32.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/vtableStubs_x86_32.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -138,29 +138,21 @@ __ round_to(rbx, BytesPerLong); } - Label hit, next, entry; + Label hit, next, entry, throw_icce; - __ jmp(entry); + __ jmpb(entry); __ bind(next); __ addl(rbx, itableOffsetEntry::size() * wordSize); __ bind(entry); -#ifdef ASSERT - // Check that the entry is non-null - if (DebugVtables) { - Label L; - __ pushl(rbx); - __ movl(rbx, Address(rbx, itableOffsetEntry::interface_offset_in_bytes())); - __ testl(rbx, rbx); - __ jcc(Assembler::notZero, L); - __ stop("null entry point found in itable's offset table"); - __ bind(L); - __ popl(rbx); - } -#endif - __ cmpl(rax, Address(rbx, itableOffsetEntry::interface_offset_in_bytes())); + // If the entry is NULL then we've reached the end of the table + // without finding the expected interface, so throw an exception + __ movl(rdx, Address(rbx, itableOffsetEntry::interface_offset_in_bytes())); + __ testl(rdx, rdx); + __ jcc(Assembler::zero, throw_icce); + __ cmpl(rax, rdx); __ jcc(Assembler::notEqual, next); // We found a hit, move offset into rbx, @@ -194,7 +186,15 @@ address ame_addr = __ pc(); __ jmp(Address(method, methodOopDesc::from_compiled_offset())); + __ bind(throw_icce); + // Restore saved register + __ popl(rdx); + __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + masm->flush(); + + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + s->set_exception_points(npe_addr, ame_addr); return s; } @@ -207,7 +207,7 @@ return (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0); } else { // Itable stub size - return (DebugVtables ? 140 : 55) + (CountCompiledCalls ? 6 : 0); + return (DebugVtables ? 144 : 64) + (CountCompiledCalls ? 6 : 0); } } diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/vtableStubs_x86_64.cpp --- a/src/cpu/x86/vm/vtableStubs_x86_64.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/vtableStubs_x86_64.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -153,7 +153,7 @@ // Round up to align_object_offset boundary __ round_to_q(rbx, BytesPerLong); } - Label hit, next, entry; + Label hit, next, entry, throw_icce; __ jmpb(entry); @@ -162,22 +162,13 @@ __ bind(entry); -#ifdef ASSERT - // Check that the entry is non-null - if (DebugVtables) { - Label L; - __ pushq(rbx); - __ movq(rbx, Address(rbx, itableOffsetEntry::interface_offset_in_bytes())); - __ testq(rbx, rbx); - __ jcc(Assembler::notZero, L); - __ stop("null entry point found in itable's offset table"); - __ bind(L); - __ popq(rbx); - } -#endif - - __ cmpq(rax, Address(rbx, itableOffsetEntry::interface_offset_in_bytes())); - __ jcc(Assembler::notEqual, next); + // If the entry is NULL then we've reached the end of the table + // without finding the expected interface, so throw an exception + __ movq(j_rarg1, Address(rbx, itableOffsetEntry::interface_offset_in_bytes())); + __ testq(j_rarg1, j_rarg1); + __ jcc(Assembler::zero, throw_icce); + __ cmpq(rax, j_rarg1); + __ jccb(Assembler::notEqual, next); // We found a hit, move offset into j_rarg1 __ movl(j_rarg1, Address(rbx, itableOffsetEntry::offset_offset_in_bytes())); @@ -203,23 +194,31 @@ #ifdef ASSERT - if (DebugVtables) { - Label L2; - __ cmpq(method, (int)NULL); - __ jcc(Assembler::equal, L2); - __ cmpq(Address(method, methodOopDesc::from_compiled_offset()), (int)NULL_WORD); - __ jcc(Assembler::notZero, L2); - __ stop("compiler entrypoint is null"); - __ bind(L2); - } + if (DebugVtables) { + Label L2; + __ cmpq(method, (int)NULL); + __ jcc(Assembler::equal, L2); + __ cmpq(Address(method, methodOopDesc::from_compiled_offset()), (int)NULL_WORD); + __ jcc(Assembler::notZero, L2); + __ stop("compiler entrypoint is null"); + __ bind(L2); + } #endif // ASSERT - // rbx: methodOop - // j_rarg0: receiver - address ame_addr = __ pc(); - __ jmp(Address(method, methodOopDesc::from_compiled_offset())); + // rbx: methodOop + // j_rarg0: receiver + address ame_addr = __ pc(); + __ jmp(Address(method, methodOopDesc::from_compiled_offset())); + + __ bind(throw_icce); + // Restore saved register + __ popq(j_rarg1); + __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); __ flush(); + + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + s->set_exception_points(npe_addr, ame_addr); return s; } @@ -230,7 +229,7 @@ return (DebugVtables ? 512 : 24) + (CountCompiledCalls ? 13 : 0); } else { // Itable stub size - return (DebugVtables ? 636 : 64) + (CountCompiledCalls ? 13 : 0); + return (DebugVtables ? 636 : 72) + (CountCompiledCalls ? 13 : 0); } } diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/x86_32.ad --- a/src/cpu/x86/vm/x86_32.ad Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/x86_32.ad Tue Apr 22 15:36:18 2008 -0700 @@ -10970,7 +10970,7 @@ %} instruct convI2XD_reg(regXD dst, eRegI src) %{ - predicate( UseSSE>=2 ); + predicate( UseSSE>=2 && !UseXmmI2D ); match(Set dst (ConvI2D src)); format %{ "CVTSI2SD $dst,$src" %} opcode(0xF2, 0x0F, 0x2A); @@ -10987,6 +10987,20 @@ ins_pipe( pipe_slow ); %} +instruct convXI2XD_reg(regXD dst, eRegI src) +%{ + predicate( UseSSE>=2 && UseXmmI2D ); + match(Set dst (ConvI2D src)); + + format %{ "MOVD $dst,$src\n\t" + "CVTDQ2PD $dst,$dst\t# i2d" %} + ins_encode %{ + __ movd($dst$$XMMRegister, $src$$Register); + __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe(pipe_slow); // XXX +%} + instruct convI2D_mem(regD dst, memory mem) %{ predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); match(Set dst (ConvI2D (LoadI mem))); @@ -11062,7 +11076,7 @@ // Convert an int to a float in xmm; no rounding step needed. instruct convI2X_reg(regX dst, eRegI src) %{ - predicate(UseSSE>=1); + predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); match(Set dst (ConvI2F src)); format %{ "CVTSI2SS $dst, $src" %} @@ -11071,6 +11085,20 @@ ins_pipe( pipe_slow ); %} + instruct convXI2X_reg(regX dst, eRegI src) +%{ + predicate( UseSSE>=2 && UseXmmI2F ); + match(Set dst (ConvI2F src)); + + format %{ "MOVD $dst,$src\n\t" + "CVTDQ2PS $dst,$dst\t# i2f" %} + ins_encode %{ + __ movd($dst$$XMMRegister, $src$$Register); + __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe(pipe_slow); // XXX +%} + instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{ match(Set dst (ConvI2L src)); effect(KILL cr); diff -r 8b0b3490194f -r ad0b851458ff src/cpu/x86/vm/x86_64.ad --- a/src/cpu/x86/vm/x86_64.ad Wed Apr 09 11:18:58 2008 -0700 +++ b/src/cpu/x86/vm/x86_64.ad Tue Apr 22 15:36:18 2008 -0700 @@ -10098,6 +10098,7 @@ instruct convI2F_reg_reg(regF dst, rRegI src) %{ + predicate(!UseXmmI2F); match(Set dst (ConvI2F src)); format %{ "cvtsi2ssl $dst, $src\t# i2f" %} @@ -10118,6 +10119,7 @@ instruct convI2D_reg_reg(regD dst, rRegI src) %{ + predicate(!UseXmmI2D); match(Set dst (ConvI2D src)); format %{ "cvtsi2sdl $dst, $src\t# i2d" %} @@ -10136,6 +10138,34 @@ ins_pipe(pipe_slow); // XXX %} +instruct convXI2F_reg(regF dst, rRegI src) +%{ + predicate(UseXmmI2F); + match(Set dst (ConvI2F src)); + + format %{ "movdl $dst, $src\n\t" + "cvtdq2psl $dst, $dst\t# i2f" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe(pipe_slow); // XXX +%} + +instruct convXI2D_reg(regD dst, rRegI src) +%{ + predicate(UseXmmI2D); + match(Set dst (ConvI2D src)); + + format %{ "movdl $dst, $src\n\t" + "cvtdq2pdl $dst, $dst\t# i2d" %} + ins_encode %{ + __ movdl($dst$$XMMRegister, $src$$Register); + __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); + %} + ins_pipe(pipe_slow); // XXX +%} + instruct convL2F_reg_reg(regF dst, rRegL src) %{ match(Set dst (ConvL2F src)); diff -r 8b0b3490194f -r ad0b851458ff src/os/linux/vm/attachListener_linux.cpp --- a/src/os/linux/vm/attachListener_linux.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/os/linux/vm/attachListener_linux.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -232,7 +232,7 @@ // where is the protocol version (1), is the command // name ("load", "datadump", ...), and is an argument int expected_str_count = 2 + AttachOperation::arg_count_max; - int max_len = (strlen(ver_str) + 1) + (AttachOperation::name_length_max + 1) + + const int max_len = (sizeof(ver_str) + 1) + (AttachOperation::name_length_max + 1) + AttachOperation::arg_count_max*(AttachOperation::arg_length_max + 1); char buf[max_len]; diff -r 8b0b3490194f -r ad0b851458ff src/os/linux/vm/os_linux.cpp --- a/src/os/linux/vm/os_linux.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/os/linux/vm/os_linux.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -116,6 +116,20 @@ return Linux::physical_memory(); } +julong os::allocatable_physical_memory(julong size) { +#ifdef _LP64 + return size; +#else + julong result = MIN2(size, (julong)3800*M); + if (!is_allocatable(result)) { + // See comments under solaris for alignment considerations + julong reasonable_size = (julong)2*G - 2 * os::vm_page_size(); + result = MIN2(size, reasonable_size); + } + return result; +#endif // _LP64 +} + //////////////////////////////////////////////////////////////////////////////// // environment support @@ -1247,19 +1261,13 @@ return (1000 * 1000); } -jlong os::timeofday() { +jlong os::javaTimeMillis() { timeval time; int status = gettimeofday(&time, NULL); assert(status != -1, "linux error"); return jlong(time.tv_sec) * 1000 + jlong(time.tv_usec / 1000); } -// Must return millis since Jan 1 1970 for JVM_CurrentTimeMillis -// _use_global_time is only set if CacheTimeMillis is true -jlong os::javaTimeMillis() { - return (_use_global_time ? read_global_time() : timeofday()); -} - #ifndef CLOCK_MONOTONIC #define CLOCK_MONOTONIC (1) #endif @@ -2472,6 +2480,10 @@ return false; } +bool os::can_execute_large_page_memory() { + return false; +} + // Reserve memory at an arbitrary address, only if that area is // available (and not reserved for something else). diff -r 8b0b3490194f -r ad0b851458ff src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/os/solaris/vm/os_solaris.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1691,19 +1691,14 @@ return (jlong)(nanotime / NANOSECS_PER_MILLISECS); } -jlong os::timeofday() { +// Must return millis since Jan 1 1970 for JVM_CurrentTimeMillis +jlong os::javaTimeMillis() { timeval t; if (gettimeofday( &t, NULL) == -1) - fatal1("timeofday: gettimeofday (%s)", strerror(errno)); + fatal1("os::javaTimeMillis: gettimeofday (%s)", strerror(errno)); return jlong(t.tv_sec) * 1000 + jlong(t.tv_usec) / 1000; } -// Must return millis since Jan 1 1970 for JVM_CurrentTimeMillis -// _use_global_time is only set if CacheTimeMillis is true -jlong os::javaTimeMillis() { - return (_use_global_time ? read_global_time() : timeofday()); -} - jlong os::javaTimeNanos() { return (jlong)getTimeNanos(); } @@ -2785,16 +2780,15 @@ return b; } -char* -os::reserve_memory(size_t bytes, char* requested_addr, size_t alignment_hint) { - char* addr = NULL; - int flags; - - flags = MAP_PRIVATE | MAP_NORESERVE; - if (requested_addr != NULL) { - flags |= MAP_FIXED; - addr = requested_addr; - } else if (has_map_align && alignment_hint > (size_t) vm_page_size()) { +char* os::Solaris::anon_mmap(char* requested_addr, size_t bytes, size_t alignment_hint, bool fixed) { + char* addr = requested_addr; + int flags = MAP_PRIVATE | MAP_NORESERVE; + + assert(!(fixed && (alignment_hint > 0)), "alignment hint meaningless with fixed mmap"); + + if (fixed) { + flags |= MAP_FIXED; + } else if (has_map_align && (alignment_hint > (size_t) vm_page_size())) { flags |= MAP_ALIGN; addr = (char*) alignment_hint; } @@ -2802,11 +2796,14 @@ // Map uncommitted pages PROT_NONE so we fail early if we touch an // uncommitted page. Otherwise, the read/write might succeed if we // have enough swap space to back the physical page. - addr = Solaris::mmap_chunk(addr, bytes, flags, PROT_NONE); + return mmap_chunk(addr, bytes, flags, PROT_NONE); +} + +char* os::reserve_memory(size_t bytes, char* requested_addr, size_t alignment_hint) { + char* addr = Solaris::anon_mmap(requested_addr, bytes, alignment_hint, (requested_addr != NULL)); guarantee(requested_addr == NULL || requested_addr == addr, "OS failed to return requested mmap address."); - return addr; } @@ -2832,6 +2829,31 @@ // in one of the methods further up the call chain. See bug 5044738. assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block"); + // Since snv_84, Solaris attempts to honor the address hint - see 5003415. + // Give it a try, if the kernel honors the hint we can return immediately. + char* addr = Solaris::anon_mmap(requested_addr, bytes, 0, false); + volatile int err = errno; + if (addr == requested_addr) { + return addr; + } else if (addr != NULL) { + unmap_memory(addr, bytes); + } + + if (PrintMiscellaneous && Verbose) { + char buf[256]; + buf[0] = '\0'; + if (addr == NULL) { + jio_snprintf(buf, sizeof(buf), ": %s", strerror(err)); + } + warning("attempt_reserve_memory_at: couldn't reserve %d bytes at " + PTR_FORMAT ": reserve_memory_helper returned " PTR_FORMAT + "%s", bytes, requested_addr, addr, buf); + } + + // Address hint method didn't work. Fall back to the old method. + // In theory, once SNV becomes our oldest supported platform, this + // code will no longer be needed. + // // Repeatedly allocate blocks until the block is allocated at the // right spot. Give up after max_tries. int i; @@ -3067,6 +3089,8 @@ if (UseISM) { // ISM disables MPSS to be compatible with old JDK behavior UseMPSS = false; + _page_sizes[0] = _large_page_size; + _page_sizes[1] = vm_page_size(); } UseMPSS = UseMPSS && @@ -3156,6 +3180,10 @@ return UseISM ? false : true; } +bool os::can_execute_large_page_memory() { + return UseISM ? false : true; +} + static int os_sleep(jlong millis, bool interruptible) { const jlong limit = INT_MAX; jlong prevtime; diff -r 8b0b3490194f -r ad0b851458ff src/os/solaris/vm/os_solaris.hpp --- a/src/os/solaris/vm/os_solaris.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/os/solaris/vm/os_solaris.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -156,6 +156,7 @@ static int get_dev_zero_fd() { return _dev_zero_fd; } static void set_dev_zero_fd(int fd) { _dev_zero_fd = fd; } static char* mmap_chunk(char *addr, size_t size, int flags, int prot); + static char* anon_mmap(char* requested_addr, size_t bytes, size_t alignment_hint, bool fixed); static bool mpss_sanity_check(bool warn, size_t * page_size); static bool ism_sanity_check (bool warn, size_t * page_size); diff -r 8b0b3490194f -r ad0b851458ff src/os/windows/vm/os_windows.cpp --- a/src/os/windows/vm/os_windows.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/os/windows/vm/os_windows.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -621,7 +621,12 @@ } julong os::allocatable_physical_memory(julong size) { +#ifdef _LP64 + return size; +#else + // Limit to 1400m because of the 2gb address space wall return MIN2(size, (julong)1400*M); +#endif } // VC6 lacks DWORD_PTR @@ -732,20 +737,13 @@ return result; } -jlong os::timeofday() { - FILETIME wt; - GetSystemTimeAsFileTime(&wt); - return windows_to_java_time(wt); -} - - -// Must return millis since Jan 1 1970 for JVM_CurrentTimeMillis -// _use_global_time is only set if CacheTimeMillis is true jlong os::javaTimeMillis() { if (UseFakeTimers) { return fake_time++; } else { - return (_use_global_time ? read_global_time() : timeofday()); + FILETIME wt; + GetSystemTimeAsFileTime(&wt); + return windows_to_java_time(wt); } } @@ -2518,9 +2516,13 @@ return false; } +bool os::can_execute_large_page_memory() { + return true; +} + char* os::reserve_memory_special(size_t bytes) { DWORD flag = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES; - char * res = (char *)VirtualAlloc(NULL, bytes, flag, PAGE_READWRITE); + char * res = (char *)VirtualAlloc(NULL, bytes, flag, PAGE_EXECUTE_READWRITE); return res; } diff -r 8b0b3490194f -r ad0b851458ff src/os_cpu/linux_x86/vm/bytes_linux_x86.inline.hpp --- a/src/os_cpu/linux_x86/vm/bytes_linux_x86.inline.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/os_cpu/linux_x86/vm/bytes_linux_x86.inline.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -60,7 +60,18 @@ #ifdef AMD64 inline u8 Bytes::swap_u8(u8 x) { +#ifdef SPARC_WORKS + // workaround for SunStudio12 CR6615391 + __asm__ __volatile__ ( + "bswapq %0" + :"=r" (x) // output : register 0 => x + :"0" (x) // input : x => register 0 + :"0" // clobbered register + ); + return x; +#else return bswap_64(x); +#endif } #else // Helper function for swap_u8 diff -r 8b0b3490194f -r ad0b851458ff src/os_cpu/linux_x86/vm/os_linux_x86.cpp --- a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -62,8 +62,14 @@ #endif // AMD64 address os::current_stack_pointer() { +#ifdef SPARC_WORKS + register void *esp; + __asm__("mov %%"SPELL_REG_SP", %0":"=r"(esp)); + return (address) ((char*)esp + sizeof(long)*2); +#else register void *esp __asm__ (SPELL_REG_SP); return (address) esp; +#endif } char* os::non_memory_address_word() { @@ -139,7 +145,12 @@ } intptr_t* _get_previous_fp() { +#ifdef SPARC_WORKS + register intptr_t **ebp; + __asm__("mov %%"SPELL_REG_FP", %0":"=r"(ebp)); +#else register intptr_t **ebp __asm__ (SPELL_REG_FP); +#endif return (intptr_t*) *ebp; // we want what it points to. } @@ -157,23 +168,8 @@ } } - // Utility functions -julong os::allocatable_physical_memory(julong size) { -#ifdef AMD64 - return size; -#else - julong result = MIN2(size, (julong)3800*M); - if (!is_allocatable(result)) { - // See comments under solaris for alignment considerations - julong reasonable_size = (julong)2*G - 2 * os::vm_page_size(); - result = MIN2(size, reasonable_size); - } - return result; -#endif // AMD64 -} - // From IA32 System Programming Guide enum { trap_page_fault = 0xE @@ -575,7 +571,9 @@ #else size_t os::Linux::min_stack_allowed = (48 DEBUG_ONLY(+4))*K; +#ifdef __GNUC__ #define GET_GS() ({int gs; __asm__ volatile("movw %%gs, %w0":"=q"(gs)); gs&0xffff;}) +#endif // Test if pthread library can support variable thread stack size. LinuxThreads // in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads @@ -606,7 +604,11 @@ // return true and skip _thread_safety_check(), so we may not be able to // detect stack-heap collisions. But otherwise it's harmless. // +#ifdef __GNUC__ return (GET_GS() != 0); +#else + return false; +#endif } } #endif // AMD64 diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/c1/c1_LIR.cpp --- a/src/share/vm/c1/c1_LIR.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/c1/c1_LIR.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -105,7 +105,7 @@ LIR_Address::Scale LIR_Address::scale(BasicType type) { - int elem_size = type2aelembytes[type]; + int elem_size = type2aelembytes(type); switch (elem_size) { case 1: return LIR_Address::times_1; case 2: return LIR_Address::times_2; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/bcEscapeAnalyzer.cpp --- a/src/share/vm/ci/bcEscapeAnalyzer.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/bcEscapeAnalyzer.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -104,7 +104,7 @@ }; void BCEscapeAnalyzer::set_returned(ArgumentMap vars) { - for (int i = 0; i <= _arg_size; i++) { + for (int i = 0; i < _arg_size; i++) { if (vars.contains(i)) _arg_returned.set_bit(i); } @@ -112,10 +112,9 @@ _return_allocated = _return_allocated && vars.contains_allocated() && !(vars.contains_unknown() || vars.contains_vars()); } - // return true if any element of vars is an argument bool BCEscapeAnalyzer::is_argument(ArgumentMap vars) { - for (int i = 0; i <= _arg_size; i++) { + for (int i = 0; i < _arg_size; i++) { if (vars.contains(i)) return true; } @@ -126,7 +125,7 @@ bool BCEscapeAnalyzer::is_arg_stack(ArgumentMap vars){ if (_conservative) return true; - for (int i = 0; i <= _arg_size; i++) { + for (int i = 0; i < _arg_size; i++) { if (vars.contains(i) && _arg_stack.at(i)) return true; } @@ -134,12 +133,13 @@ } void BCEscapeAnalyzer::clear_bits(ArgumentMap vars, BitMap &bm) { - for (int i = 0; i <= _arg_size; i++) { + for (int i = 0; i < _arg_size; i++) { if (vars.contains(i)) { bm.clear_bit(i); } } } + void BCEscapeAnalyzer::set_method_escape(ArgumentMap vars) { clear_bits(vars, _arg_local); } @@ -155,6 +155,17 @@ clear_bits(vars, _dirty); } +void BCEscapeAnalyzer::set_modified(ArgumentMap vars, int offs, int size) { + + for (int i = 0; i < _arg_size; i++) { + if (vars.contains(i)) { + set_arg_modified(i, offs, size); + } + } + if (vars.contains_unknown()) + _unknown_modified = true; +} + bool BCEscapeAnalyzer::is_recursive_call(ciMethod* callee) { for (BCEscapeAnalyzer* scope = this; scope != NULL; scope = scope->_parent) { if (scope->method() == callee) { @@ -164,6 +175,40 @@ return false; } +bool BCEscapeAnalyzer::is_arg_modified(int arg, int offset, int size_in_bytes) { + if (offset == OFFSET_ANY) + return _arg_modified[arg] != 0; + assert(arg >= 0 && arg < _arg_size, "must be an argument."); + bool modified = false; + int l = offset / HeapWordSize; + int h = round_to(offset + size_in_bytes, HeapWordSize) / HeapWordSize; + if (l > ARG_OFFSET_MAX) + l = ARG_OFFSET_MAX; + if (h > ARG_OFFSET_MAX+1) + h = ARG_OFFSET_MAX + 1; + for (int i = l; i < h; i++) { + modified = modified || (_arg_modified[arg] & (1 << i)) != 0; + } + return modified; +} + +void BCEscapeAnalyzer::set_arg_modified(int arg, int offset, int size_in_bytes) { + if (offset == OFFSET_ANY) { + _arg_modified[arg] = (uint) -1; + return; + } + assert(arg >= 0 && arg < _arg_size, "must be an argument."); + int l = offset / HeapWordSize; + int h = round_to(offset + size_in_bytes, HeapWordSize) / HeapWordSize; + if (l > ARG_OFFSET_MAX) + l = ARG_OFFSET_MAX; + if (h > ARG_OFFSET_MAX+1) + h = ARG_OFFSET_MAX + 1; + for (int i = l; i < h; i++) { + _arg_modified[arg] |= (1 << i); + } +} + void BCEscapeAnalyzer::invoke(StateInfo &state, Bytecodes::Code code, ciMethod* target, ciKlass* holder) { int i; @@ -197,6 +242,7 @@ for (i = 0; i < arg_size; i++) { set_method_escape(state.raw_pop()); } + _unknown_modified = true; // assume the worst since we don't analyze the called method return; } @@ -224,6 +270,11 @@ ArgumentMap arg = state.raw_pop(); if (!is_argument(arg)) continue; + for (int j = 0; j < _arg_size; j++) { + if (arg.contains(j)) { + _arg_modified[j] |= analyzer._arg_modified[i]; + } + } if (!is_arg_stack(arg)) { // arguments have already been recognized as escaping } else if (analyzer.is_arg_stack(i) && !analyzer.is_arg_returned(i)) { @@ -233,6 +284,7 @@ set_global_escape(arg); } } + _unknown_modified = _unknown_modified || analyzer.has_non_arg_side_affects(); // record dependencies if at least one parameter retained stack-allocatable if (must_record_dependencies) { @@ -250,8 +302,10 @@ ArgumentMap arg = state.raw_pop(); if (!is_argument(arg)) continue; + set_modified(arg, OFFSET_ANY, type2size[T_INT]*HeapWordSize); set_global_escape(arg); } + _unknown_modified = true; // assume the worst since we don't know the called method } } @@ -421,6 +475,7 @@ state.spop(); ArgumentMap arr = state.apop(); set_method_escape(arr); + set_modified(arr, OFFSET_ANY, type2size[T_INT]*HeapWordSize); break; } case Bytecodes::_lastore: @@ -430,6 +485,7 @@ state.spop(); ArgumentMap arr = state.apop(); set_method_escape(arr); + set_modified(arr, OFFSET_ANY, type2size[T_LONG]*HeapWordSize); break; } case Bytecodes::_aastore: @@ -437,6 +493,7 @@ set_global_escape(state.apop()); state.spop(); ArgumentMap arr = state.apop(); + set_modified(arr, OFFSET_ANY, type2size[T_OBJECT]*HeapWordSize); break; } case Bytecodes::_pop: @@ -762,6 +819,7 @@ if (s.cur_bc() != Bytecodes::_putstatic) { ArgumentMap p = state.apop(); set_method_escape(p); + set_modified(p, will_link ? field->offset() : OFFSET_ANY, type2size[field_type]*HeapWordSize); } } break; @@ -872,7 +930,7 @@ } void BCEscapeAnalyzer::merge_block_states(StateInfo *blockstates, ciBlock *dest, StateInfo *s_state) { - StateInfo *d_state = blockstates+dest->index(); + StateInfo *d_state = blockstates + dest->index(); int nlocals = _method->max_locals(); // exceptions may cause transfer of control to handlers in the middle of a @@ -916,6 +974,7 @@ } for (int i = 0; i < s_state->_stack_height; i++) { ArgumentMap t; + //extra_vars |= !d_state->_vars[i] & s_state->_vars[i]; t.clear(); t = s_state->_stack[i]; t.set_difference(d_state->_stack[i]); @@ -933,7 +992,7 @@ int datacount = (numblocks + 1) * (stkSize + numLocals); int datasize = datacount * sizeof(ArgumentMap); - StateInfo *blockstates = (StateInfo *) arena->Amalloc(_methodBlocks->num_blocks() * sizeof(StateInfo)); + StateInfo *blockstates = (StateInfo *) arena->Amalloc(numblocks * sizeof(StateInfo)); ArgumentMap *statedata = (ArgumentMap *) arena->Amalloc(datasize); for (int i = 0; i < datacount; i++) ::new ((void*)&statedata[i]) ArgumentMap(); ArgumentMap *dp = statedata; @@ -961,33 +1020,35 @@ ArgumentMap allVars; // all oop arguments to method ciSignature* sig = method()->signature(); int j = 0; + ciBlock* first_blk = _methodBlocks->block_containing(0); + int fb_i = first_blk->index(); if (!method()->is_static()) { // record information for "this" - blockstates[0]._vars[j].set(j); + blockstates[fb_i]._vars[j].set(j); allVars.add(j); j++; } for (int i = 0; i < sig->count(); i++) { ciType* t = sig->type_at(i); if (!t->is_primitive_type()) { - blockstates[0]._vars[j].set(j); + blockstates[fb_i]._vars[j].set(j); allVars.add(j); } j += t->size(); } - blockstates[0]._initialized = true; + blockstates[fb_i]._initialized = true; assert(j == _arg_size, "just checking"); ArgumentMap unknown_map; unknown_map.add_unknown(); - worklist.push(_methodBlocks->block_containing(0)); + worklist.push(first_blk); while(worklist.length() > 0) { ciBlock *blk = worklist.pop(); - StateInfo *blkState = blockstates+blk->index(); + StateInfo *blkState = blockstates + blk->index(); if (blk->is_handler() || blk->is_ret_target()) { // for an exception handler or a target of a ret instruction, we assume the worst case, - // that any variable or stack slot could contain any argument + // that any variable could contain any argument for (int i = 0; i < numLocals; i++) { state._vars[i] = allVars; } @@ -997,6 +1058,7 @@ state._stack_height = blkState->_stack_height; } for (int i = 0; i < state._stack_height; i++) { +// ??? should this be unknown_map ??? state._stack[i] = allVars; } } else { @@ -1053,6 +1115,7 @@ vmIntrinsics::ID iid = method()->intrinsic_id(); if (iid == vmIntrinsics::_getClass || + iid == vmIntrinsics::_fillInStackTrace || iid == vmIntrinsics::_hashCode) return iid; else @@ -1060,12 +1123,16 @@ } bool BCEscapeAnalyzer::compute_escape_for_intrinsic(vmIntrinsics::ID iid) { - ArgumentMap empty; - empty.clear(); + ArgumentMap arg; + arg.clear(); switch (iid) { case vmIntrinsics::_getClass: _return_local = false; break; + case vmIntrinsics::_fillInStackTrace: + arg.set(0); // 'this' + set_returned(arg); + break; case vmIntrinsics::_hashCode: // initialized state is correct break; @@ -1109,15 +1176,21 @@ _return_allocated = true; } _allocated_escapes = false; + _unknown_modified = false; } void BCEscapeAnalyzer::clear_escape_info() { ciSignature* sig = method()->signature(); int arg_count = sig->count(); ArgumentMap var; + if (!method()->is_static()) { + arg_count++; // allow for "this" + } for (int i = 0; i < arg_count; i++) { + set_arg_modified(i, OFFSET_ANY, 4); var.clear(); var.set(i); + set_modified(var, OFFSET_ANY, 4); set_global_escape(var); } _arg_local.clear(); @@ -1126,6 +1199,7 @@ _return_local = false; _return_allocated = false; _allocated_escapes = true; + _unknown_modified = true; } @@ -1173,8 +1247,14 @@ initialize(); - // do not scan method if it has no object parameters - if (_arg_local.is_empty()) { + // Do not scan method if it has no object parameters and + // does not returns an object (_return_allocated is set in initialize()). + if (_arg_local.is_empty() && !_return_allocated) { + // Clear all info since method's bytecode was not analysed and + // set pessimistic escape information. + clear_escape_info(); + methodData()->set_eflag(methodDataOopDesc::allocated_escapes); + methodData()->set_eflag(methodDataOopDesc::unknown_modified); methodData()->set_eflag(methodDataOopDesc::estimated); return; } @@ -1185,36 +1265,8 @@ success = do_analysis(); } - // dump result of bytecode analysis -#ifndef PRODUCT - if (BCEATraceLevel >= 3) { - tty->print("[EA] estimated escape information for"); - if (iid != vmIntrinsics::_none) - tty->print(" intrinsic"); - method()->print_short_name(); - tty->print_cr(has_dependencies() ? " (not stored)" : ""); - tty->print(" non-escaping args: "); - _arg_local.print_on(tty); - tty->print(" stack-allocatable args: "); - _arg_stack.print_on(tty); - if (_return_local) { - tty->print(" returned args: "); - _arg_returned.print_on(tty); - } else if (is_return_allocated()) { - tty->print_cr(" allocated return values"); - } else { - tty->print_cr(" non-local return values"); - } - tty->cr(); - tty->print(" flags: "); - if (_return_allocated) - tty->print(" return_allocated"); - tty->cr(); - } - -#endif - // don't store interprocedural escape information if it introduces dependencies - // or if method data is empty + // don't store interprocedural escape information if it introduces + // dependencies or if method data is empty // if (!has_dependencies() && !methodData()->is_empty()) { for (i = 0; i < _arg_size; i++) { @@ -1228,10 +1280,20 @@ if (_arg_returned.at(i)) { methodData()->set_arg_returned(i); } + methodData()->set_arg_modified(i, _arg_modified[i]); } if (_return_local) { methodData()->set_eflag(methodDataOopDesc::return_local); } + if (_return_allocated) { + methodData()->set_eflag(methodDataOopDesc::return_allocated); + } + if (_allocated_escapes) { + methodData()->set_eflag(methodDataOopDesc::allocated_escapes); + } + if (_unknown_modified) { + methodData()->set_eflag(methodDataOopDesc::unknown_modified); + } methodData()->set_eflag(methodDataOopDesc::estimated); } } @@ -1244,29 +1306,50 @@ _arg_local.at_put(i, methodData()->is_arg_local(i)); _arg_stack.at_put(i, methodData()->is_arg_stack(i)); _arg_returned.at_put(i, methodData()->is_arg_returned(i)); + _arg_modified[i] = methodData()->arg_modified(i); } _return_local = methodData()->eflag_set(methodDataOopDesc::return_local); - - // dump result of loaded escape information -#ifndef PRODUCT - if (BCEATraceLevel >= 4) { - tty->print(" non-escaping args: "); - _arg_local.print_on(tty); - tty->print(" stack-allocatable args: "); - _arg_stack.print_on(tty); - if (_return_local) { - tty->print(" returned args: "); - _arg_returned.print_on(tty); - } else { - tty->print_cr(" non-local return values"); - } - tty->print(" modified args: "); - tty->cr(); - } -#endif + _return_allocated = methodData()->eflag_set(methodDataOopDesc::return_allocated); + _allocated_escapes = methodData()->eflag_set(methodDataOopDesc::allocated_escapes); + _unknown_modified = methodData()->eflag_set(methodDataOopDesc::unknown_modified); } +#ifndef PRODUCT +void BCEscapeAnalyzer::dump() { + tty->print("[EA] estimated escape information for"); + method()->print_short_name(); + tty->print_cr(has_dependencies() ? " (not stored)" : ""); + tty->print(" non-escaping args: "); + _arg_local.print_on(tty); + tty->print(" stack-allocatable args: "); + _arg_stack.print_on(tty); + if (_return_local) { + tty->print(" returned args: "); + _arg_returned.print_on(tty); + } else if (is_return_allocated()) { + tty->print_cr(" return allocated value"); + } else { + tty->print_cr(" return non-local value"); + } + tty->print(" modified args: "); + for (int i = 0; i < _arg_size; i++) { + if (_arg_modified[i] == 0) + tty->print(" 0"); + else + tty->print(" 0x%x", _arg_modified[i]); + } + tty->cr(); + tty->print(" flags: "); + if (_return_allocated) + tty->print(" return_allocated"); + if (_allocated_escapes) + tty->print(" allocated_escapes"); + if (_unknown_modified) + tty->print(" unknown_modified"); + tty->cr(); +} +#endif BCEscapeAnalyzer::BCEscapeAnalyzer(ciMethod* method, BCEscapeAnalyzer* parent) : _conservative(method == NULL || !EstimateArgEscape) @@ -1281,6 +1364,7 @@ , _return_local(false) , _return_allocated(false) , _allocated_escapes(false) + , _unknown_modified(false) , _dependencies() , _parent(parent) , _level(parent == NULL ? 0 : parent->level() + 1) { @@ -1290,6 +1374,8 @@ _arg_returned.clear(); _dirty.clear(); Arena* arena = CURRENT_ENV->arena(); + _arg_modified = (uint *) arena->Amalloc(_arg_size * sizeof(uint)); + Copy::zero_to_bytes(_arg_modified, _arg_size * sizeof(uint)); if (methodData() == NULL) return; @@ -1307,6 +1393,12 @@ compute_escape_info(); methodData()->update_escape_info(); } +#ifndef PRODUCT + if (BCEATraceLevel >= 3) { + // dump escape information + dump(); + } +#endif } } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/bcEscapeAnalyzer.hpp --- a/src/share/vm/ci/bcEscapeAnalyzer.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/bcEscapeAnalyzer.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -46,10 +46,13 @@ BitMap _arg_stack; BitMap _arg_returned; BitMap _dirty; + enum{ ARG_OFFSET_MAX = 31}; + uint *_arg_modified; bool _return_local; + bool _return_allocated; bool _allocated_escapes; - bool _return_allocated; + bool _unknown_modified; ciObjectList _dependencies; @@ -80,6 +83,7 @@ void set_method_escape(ArgumentMap vars); void set_global_escape(ArgumentMap vars); void set_dirty(ArgumentMap vars); + void set_modified(ArgumentMap vars, int offs, int size); bool is_recursive_call(ciMethod* callee); void add_dependence(ciKlass *klass, ciMethod *meth); @@ -140,6 +144,18 @@ return !_conservative && _return_allocated && !_allocated_escapes; } + // Tracking of argument modification + + enum {OFFSET_ANY = -1}; + bool is_arg_modified(int arg, int offset, int size_in_bytes); + void set_arg_modified(int arg, int offset, int size_in_bytes); + bool has_non_arg_side_affects() { return _unknown_modified; } + // Copy dependencies from this analysis into "deps" void copy_dependencies(Dependencies *deps); + +#ifndef PRODUCT + // dump escape information + void dump(); +#endif }; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciField.hpp --- a/src/share/vm/ci/ciField.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/ciField.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -102,7 +102,7 @@ BasicType layout_type() { return type2field[(_type == NULL) ? T_OBJECT : _type->basic_type()]; } // How big is this field in memory? - int size_in_bytes() { return type2aelembytes[layout_type()]; } + int size_in_bytes() { return type2aelembytes(layout_type()); } // What is the offset of this field? int offset() { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciInstanceKlass.cpp --- a/src/share/vm/ci/ciInstanceKlass.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/ciInstanceKlass.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -34,7 +34,9 @@ // ciInstanceKlass::ciInstanceKlass // // Loaded instance klass. -ciInstanceKlass::ciInstanceKlass(KlassHandle h_k) : ciKlass(h_k) { +ciInstanceKlass::ciInstanceKlass(KlassHandle h_k) : + ciKlass(h_k), _non_static_fields(NULL) +{ assert(get_Klass()->oop_is_instance(), "wrong type"); instanceKlass* ik = get_instanceKlass(); @@ -335,6 +337,37 @@ return field; } +// ------------------------------------------------------------------ +// ciInstanceKlass::non_static_fields. + +class NonStaticFieldFiller: public FieldClosure { + GrowableArray* _arr; + ciEnv* _curEnv; +public: + NonStaticFieldFiller(ciEnv* curEnv, GrowableArray* arr) : + _curEnv(curEnv), _arr(arr) + {} + void do_field(fieldDescriptor* fd) { + ciField* field = new (_curEnv->arena()) ciField(fd); + _arr->append(field); + } +}; + +GrowableArray* ciInstanceKlass::non_static_fields() { + if (_non_static_fields == NULL) { + VM_ENTRY_MARK; + ciEnv* curEnv = ciEnv::current(); + instanceKlass* ik = get_instanceKlass(); + int max_n_fields = ik->fields()->length()/instanceKlass::next_offset; + + _non_static_fields = + new (curEnv->arena()) GrowableArray(max_n_fields); + NonStaticFieldFiller filler(curEnv, _non_static_fields); + ik->do_nonstatic_fields(&filler); + } + return _non_static_fields; +} + static int sort_field_by_offset(ciField** a, ciField** b) { return (*a)->offset_in_bytes() - (*b)->offset_in_bytes(); // (no worries about 32-bit overflow...) diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciInstanceKlass.hpp --- a/src/share/vm/ci/ciInstanceKlass.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/ciInstanceKlass.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -46,6 +46,7 @@ bool _has_subklass; ciFlags _flags; jint _nonstatic_field_size; + jint _nonstatic_oop_map_size; // Lazy fields get filled in only upon request. ciInstanceKlass* _super; @@ -58,6 +59,8 @@ ciInstanceKlass* _implementors[implementors_limit]; jint _nof_implementors; + GrowableArray* _non_static_fields; + protected: ciInstanceKlass(KlassHandle h_k); ciInstanceKlass(ciSymbol* name, jobject loader, jobject protection_domain); @@ -129,6 +132,9 @@ jint nonstatic_field_size() { assert(is_loaded(), "must be loaded"); return _nonstatic_field_size; } + jint nonstatic_oop_map_size() { + assert(is_loaded(), "must be loaded"); + return _nonstatic_oop_map_size; } ciInstanceKlass* super(); jint nof_implementors() { assert(is_loaded(), "must be loaded"); @@ -138,6 +144,9 @@ ciInstanceKlass* get_canonical_holder(int offset); ciField* get_field_by_offset(int field_offset, bool is_static); + + GrowableArray* non_static_fields(); + // total number of nonstatic fields (including inherited): int nof_nonstatic_fields() { if (_nonstatic_fields == NULL) diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciMethod.cpp --- a/src/share/vm/ci/ciMethod.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/ciMethod.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -146,7 +146,7 @@ memcpy(_code, me->code_base(), code_size()); // Revert any breakpoint bytecodes in ci's copy - if (_is_compilable && me->number_of_breakpoints() > 0) { + if (me->number_of_breakpoints() > 0) { BreakpointInfo* bp = instanceKlass::cast(me->method_holder())->breakpoints(); for (; bp != NULL; bp = bp->next()) { if (bp->match(me)) { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciMethodBlocks.cpp --- a/src/share/vm/ci/ciMethodBlocks.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/ciMethodBlocks.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -67,6 +67,14 @@ break; } } + // Move an exception handler information if needed. + if (former_block->is_handler()) { + int ex_start = former_block->ex_start_bci(); + int ex_end = former_block->ex_limit_bci(); + new_block->set_exception_range(ex_start, ex_end); + // Clear information in former_block. + former_block->clear_exception_handler(); + } return former_block; } @@ -102,7 +110,7 @@ // one and end the old one. assert(cur_block != NULL, "must always have a current block"); ciBlock *new_block = block_containing(bci); - if (new_block == NULL) { + if (new_block == NULL || new_block == cur_block) { // We have not marked this bci as the start of a new block. // Keep interpreting the current_range. _bci_to_block[bci] = cur_block; @@ -254,9 +262,33 @@ for(ciExceptionHandlerStream str(meth); !str.is_done(); str.next()) { ciExceptionHandler* handler = str.handler(); ciBlock *eb = make_block_at(handler->handler_bci()); - eb->set_handler(); + // + // Several exception handlers can have the same handler_bci: + // + // try { + // if (a.foo(b) < 0) { + // return a.error(); + // } + // return CoderResult.UNDERFLOW; + // } finally { + // a.position(b); + // } + // + // The try block above is divided into 2 exception blocks + // separated by 'areturn' bci. + // int ex_start = handler->start(); int ex_end = handler->limit(); + if (eb->is_handler()) { + // Extend old handler exception range to cover additional range. + int old_ex_start = eb->ex_start_bci(); + int old_ex_end = eb->ex_limit_bci(); + if (ex_start > old_ex_start) + ex_start = old_ex_start; + if (ex_end < old_ex_end) + ex_end = old_ex_end; + eb->clear_exception_handler(); // Reset exception information + } eb->set_exception_range(ex_start, ex_end); // ensure a block at the start of exception range and start of following code (void) make_block_at(ex_start); @@ -312,9 +344,10 @@ void ciBlock::set_exception_range(int start_bci, int limit_bci) { assert(limit_bci >= start_bci, "valid range"); - assert(is_handler(), "must be handler"); + assert(!is_handler() && _ex_start_bci == -1 && _ex_limit_bci == -1, "must not be handler"); _ex_start_bci = start_bci; _ex_limit_bci = limit_bci; + set_handler(); } #ifndef PRODUCT diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciMethodBlocks.hpp --- a/src/share/vm/ci/ciMethodBlocks.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/ciMethodBlocks.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -110,9 +110,10 @@ void set_does_jsr() { _flags |= DoesJsr; } void clear_does_jsr() { _flags &= ~DoesJsr; } void set_does_ret() { _flags |= DoesRet; } - void clear_does_ret() { _flags |= DoesRet; } + void clear_does_ret() { _flags &= ~DoesRet; } void set_is_ret_target() { _flags |= RetTarget; } void set_has_handler() { _flags |= HasHandler; } + void clear_exception_handler() { _flags &= ~Handler; _ex_start_bci = -1; _ex_limit_bci = -1; } #ifndef PRODUCT ciMethod *method() const { return _method; } void dump(); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciMethodData.cpp --- a/src/share/vm/ci/ciMethodData.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/ciMethodData.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -42,6 +42,8 @@ // Set an initial hint. Don't use set_hint_di() because // first_di() may be out of bounds if data_size is 0. _hint_di = first_di(); + // Initialize the escape information (to "don't know."); + _eflags = _arg_local = _arg_stack = _arg_returned = 0; } // ------------------------------------------------------------------ @@ -59,6 +61,8 @@ // Set an initial hint. Don't use set_hint_di() because // first_di() may be out of bounds if data_size is 0. _hint_di = first_di(); + // Initialize the escape information (to "don't know."); + _eflags = _arg_local = _arg_stack = _arg_returned = 0; } void ciMethodData::load_data() { @@ -142,6 +146,8 @@ return new ciBranchData(data_layout); case DataLayout::multi_branch_data_tag: return new ciMultiBranchData(data_layout); + case DataLayout::arg_info_data_tag: + return new ciArgInfoData(data_layout); }; } @@ -172,6 +178,9 @@ _saw_free_extra_data = true; // observed an empty slot (common case) return NULL; } + if (dp->tag() == DataLayout::arg_info_data_tag) { + break; // ArgInfoData is at the end of extra data section. + } if (dp->bci() == bci) { assert(dp->tag() == DataLayout::bit_data_tag, "sane"); return new ciBitData(dp); @@ -217,8 +226,14 @@ void ciMethodData::clear_escape_info() { VM_ENTRY_MARK; methodDataOop mdo = get_methodDataOop(); - if (mdo != NULL) + if (mdo != NULL) { mdo->clear_escape_info(); + ArgInfoData *aid = arg_info(); + int arg_count = (aid == NULL) ? 0 : aid->number_of_args(); + for (int i = 0; i < arg_count; i++) { + set_arg_modified(i, 0); + } + } _eflags = _arg_local = _arg_stack = _arg_returned = 0; } @@ -231,6 +246,10 @@ mdo->set_arg_local(_arg_local); mdo->set_arg_stack(_arg_stack); mdo->set_arg_returned(_arg_returned); + int arg_count = mdo->method()->size_of_parameters(); + for (int i = 0; i < arg_count; i++) { + mdo->set_arg_modified(i, arg_modified(i)); + } } } @@ -262,6 +281,14 @@ set_nth_bit(_arg_returned, i); } +void ciMethodData::set_arg_modified(int arg, uint val) { + ArgInfoData *aid = arg_info(); + if (aid == NULL) + return; + assert(arg >= 0 && arg < aid->number_of_args(), "valid argument number"); + aid->set_arg_modified(arg, val); +} + bool ciMethodData::is_arg_local(int i) const { return is_set_nth_bit(_arg_local, i); } @@ -274,6 +301,14 @@ return is_set_nth_bit(_arg_returned, i); } +uint ciMethodData::arg_modified(int arg) const { + ArgInfoData *aid = arg_info(); + if (aid == NULL) + return 0; + assert(arg >= 0 && arg < aid->number_of_args(), "valid argument number"); + return aid->arg_modified(arg); +} + ByteSize ciMethodData::offset_of_slot(ciProfileData* data, ByteSize slot_offset_in_data) { // Get offset within methodDataOop of the data array ByteSize data_offset = methodDataOopDesc::data_offset(); @@ -287,6 +322,18 @@ return in_ByteSize(offset); } +ciArgInfoData *ciMethodData::arg_info() const { + // Should be last, have to skip all traps. + DataLayout* dp = data_layout_at(data_size()); + DataLayout* end = data_layout_at(data_size() + extra_data_size()); + for (; dp < end; dp = methodDataOopDesc::next_extra(dp)) { + if (dp->tag() == DataLayout::arg_info_data_tag) + return new ciArgInfoData(dp); + } + return NULL; +} + + // Implementation of the print method. void ciMethodData::print_impl(outputStream* st) { ciObject::print_impl(st); @@ -305,6 +352,22 @@ st->fill_to(6); data->print_data_on(st); } + st->print_cr("--- Extra data:"); + DataLayout* dp = data_layout_at(data_size()); + DataLayout* end = data_layout_at(data_size() + extra_data_size()); + for (; dp < end; dp = methodDataOopDesc::next_extra(dp)) { + if (dp->tag() == DataLayout::no_tag) continue; + if (dp->tag() == DataLayout::bit_data_tag) { + data = new BitData(dp); + } else { + assert(dp->tag() == DataLayout::arg_info_data_tag, "must be BitData or ArgInfo"); + data = new ciArgInfoData(dp); + dp = end; // ArgInfoData is at the end of extra data section. + } + st->print("%d", dp_to_di(data->dp())); + st->fill_to(6); + data->print_data_on(st); + } } void ciReceiverTypeData::print_receiver_data_on(outputStream* st) { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciMethodData.hpp --- a/src/share/vm/ci/ciMethodData.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/ciMethodData.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -30,6 +30,7 @@ class ciBranchData; class ciArrayData; class ciMultiBranchData; +class ciArgInfoData; typedef ProfileData ciProfileData; @@ -121,6 +122,11 @@ ciMultiBranchData(DataLayout* layout) : MultiBranchData(layout) {}; }; +class ciArgInfoData : public ArgInfoData { +public: + ciArgInfoData(DataLayout* layout) : ArgInfoData(layout) {}; +}; + // ciMethodData // // This class represents a methodDataOop in the HotSpot virtual @@ -163,9 +169,9 @@ ciMethodData(); // Accessors - int data_size() { return _data_size; } - int extra_data_size() { return _extra_data_size; } - intptr_t * data() { return _data; } + int data_size() const { return _data_size; } + int extra_data_size() const { return _extra_data_size; } + intptr_t * data() const { return _data; } methodDataOop get_methodDataOop() const { if (handle() == NULL) return NULL; @@ -178,7 +184,7 @@ void print_impl(outputStream* st); - DataLayout* data_layout_at(int data_index) { + DataLayout* data_layout_at(int data_index) const { assert(data_index % sizeof(intptr_t) == 0, "unaligned"); return (DataLayout*) (((address)_data) + data_index); } @@ -207,6 +213,8 @@ // What is the index of the first data entry? int first_di() { return 0; } + ciArgInfoData *arg_info() const; + public: bool is_method_data() { return true; } bool is_empty() { return _state == empty_state; } @@ -270,10 +278,12 @@ void set_arg_local(int i); void set_arg_stack(int i); void set_arg_returned(int i); + void set_arg_modified(int arg, uint val); bool is_arg_local(int i) const; bool is_arg_stack(int i) const; bool is_arg_returned(int i) const; + uint arg_modified(int arg) const; // Code generation helper ByteSize offset_of_slot(ciProfileData* data, ByteSize slot_offset_in_data); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciObjArray.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/ci/ciObjArray.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -0,0 +1,43 @@ +/* + * Copyright 1999-2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_ciObjArray.cpp.incl" + +// ciObjArray +// +// This class represents an objArrayOop in the HotSpot virtual +// machine. + +ciObject* ciObjArray::obj_at(int index) { + VM_ENTRY_MARK; + objArrayOop array = get_objArrayOop(); + if (index < 0 || index >= array->length()) return NULL; + oop o = array->obj_at(index); + if (o == NULL) { + return ciNullObject::make(); + } else { + return CURRENT_ENV->get_object(o); + } +} diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/ci/ciObjArray.hpp --- a/src/share/vm/ci/ciObjArray.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/ci/ciObjArray.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -43,4 +43,6 @@ public: // What kind of ciObject is this? bool is_obj_array() { return true; } + + ciObject* obj_at(int index); }; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/classfile/dictionary.cpp --- a/src/share/vm/classfile/dictionary.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/classfile/dictionary.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -155,8 +155,8 @@ for (int i = ik->previous_versions()->length() - 1; i >= 0; i--) { // check the previous versions array for GC'ed weak refs PreviousVersionNode * pv_node = ik->previous_versions()->at(i); - jweak cp_ref = pv_node->prev_constant_pool(); - assert(cp_ref != NULL, "weak cp ref was unexpectedly cleared"); + jobject cp_ref = pv_node->prev_constant_pool(); + assert(cp_ref != NULL, "cp ref was unexpectedly cleared"); if (cp_ref == NULL) { delete pv_node; ik->previous_versions()->remove_at(i); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/classfile/javaClasses.cpp --- a/src/share/vm/classfile/javaClasses.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/classfile/javaClasses.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -143,13 +143,43 @@ jstring js = NULL; { JavaThread* thread = (JavaThread*)THREAD; assert(thread->is_Java_thread(), "must be java thread"); + HandleMark hm(thread); ThreadToNativeFromVM ttn(thread); - HandleMark hm(thread); js = (_to_java_string_fn)(thread->jni_environment(), str); } return Handle(THREAD, JNIHandles::resolve(js)); } +// Converts a Java String to a native C string that can be used for +// native OS calls. +char* java_lang_String::as_platform_dependent_str(Handle java_string, TRAPS) { + + typedef char* (*to_platform_string_fn_t)(JNIEnv*, jstring, bool*); + static to_platform_string_fn_t _to_platform_string_fn = NULL; + + if (_to_platform_string_fn == NULL) { + void *lib_handle = os::native_java_library(); + _to_platform_string_fn = CAST_TO_FN_PTR(to_platform_string_fn_t, hpi::dll_lookup(lib_handle, "GetStringPlatformChars")); + if (_to_platform_string_fn == NULL) { + fatal("GetStringPlatformChars missing"); + } + } + + char *native_platform_string; + { JavaThread* thread = (JavaThread*)THREAD; + assert(thread->is_Java_thread(), "must be java thread"); + JNIEnv *env = thread->jni_environment(); + jstring js = (jstring) JNIHandles::make_local(env, java_string()); + bool is_copy; + HandleMark hm(thread); + ThreadToNativeFromVM ttn(thread); + native_platform_string = (_to_platform_string_fn)(env, js, &is_copy); + assert(is_copy == JNI_TRUE, "is_copy value changed"); + JNIHandles::destroy_local(js); + } + return native_platform_string; +} + Handle java_lang_String::char_converter(Handle java_string, jchar from_char, jchar to_char, TRAPS) { oop obj = java_string(); // Typical usage is to convert all '/' to '.' in string. diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/classfile/javaClasses.hpp --- a/src/share/vm/classfile/javaClasses.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/classfile/javaClasses.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -96,6 +96,7 @@ // String converters static char* as_utf8_string(oop java_string); static char* as_utf8_string(oop java_string, int start, int len); + static char* as_platform_dependent_str(Handle java_string, TRAPS); static jchar* as_unicode_string(oop java_string, int& length); static bool equals(oop java_string, jchar* chars, int len); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/classfile/systemDictionary.cpp --- a/src/share/vm/classfile/systemDictionary.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/classfile/systemDictionary.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1242,7 +1242,9 @@ oop obj = (oop) result.get_jobject(); if (obj == NULL) { return nk; } - char* new_class_name = java_lang_String::as_utf8_string(obj); + Handle h_obj(THREAD, obj); + char* new_class_name = java_lang_String::as_platform_dependent_str(h_obj, + CHECK_(nk)); // lock the loader // we use this lock because JVMTI does. diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/classfile/vmSymbols.cpp --- a/src/share/vm/classfile/vmSymbols.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/classfile/vmSymbols.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -318,6 +318,11 @@ const int neg = JVM_ACC_SYNCHRONIZED; return (flags & (req | neg)) == req; } +inline bool match_F_RNY(jshort flags) { + const int req = JVM_ACC_NATIVE | JVM_ACC_SYNCHRONIZED; + const int neg = JVM_ACC_STATIC; + return (flags & (req | neg)) == req; +} // These are for forming case labels: #define ID3(x, y, z) (( jint)(z) + \ @@ -359,6 +364,7 @@ case F_RN: fname = "native "; break; case F_SN: fname = "native static "; break; case F_S: fname = "static "; break; + case F_RNY:fname = "native synchronized "; break; } const char* kptr = strrchr(kname, '/'); if (kptr != NULL) kname = kptr + 1; @@ -485,7 +491,7 @@ if (PrintMiscellaneous && (WizardMode || Verbose)) { tty->print_cr("*** misidentified method; %s(%d) should be %s(%d):", declared_name, declared_id, actual_name, actual_id); - m->print_short_name(tty); + mh()->print_short_name(tty); tty->cr(); } } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/classfile/vmSymbols.hpp --- a/src/share/vm/classfile/vmSymbols.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/classfile/vmSymbols.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -58,12 +58,17 @@ template(java_lang_ThreadDeath, "java/lang/ThreadDeath") \ template(java_lang_Boolean, "java/lang/Boolean") \ template(java_lang_Character, "java/lang/Character") \ + template(java_lang_Character_CharacterCache, "java/lang/Character$CharacterCache") \ template(java_lang_Float, "java/lang/Float") \ template(java_lang_Double, "java/lang/Double") \ template(java_lang_Byte, "java/lang/Byte") \ + template(java_lang_Byte_Cache, "java/lang/Byte$ByteCache") \ template(java_lang_Short, "java/lang/Short") \ + template(java_lang_Short_ShortCache, "java/lang/Short$ShortCache") \ template(java_lang_Integer, "java/lang/Integer") \ + template(java_lang_Integer_IntegerCache, "java/lang/Integer$IntegerCache") \ template(java_lang_Long, "java/lang/Long") \ + template(java_lang_Long_LongCache, "java/lang/Long$LongCache") \ template(java_lang_Shutdown, "java/lang/Shutdown") \ template(java_lang_ref_Reference, "java/lang/ref/Reference") \ template(java_lang_ref_SoftReference, "java/lang/ref/SoftReference") \ @@ -91,10 +96,11 @@ template(java_util_Vector, "java/util/Vector") \ template(java_util_AbstractList, "java/util/AbstractList") \ template(java_util_Hashtable, "java/util/Hashtable") \ + template(java_util_HashMap, "java/util/HashMap") \ template(java_lang_Compiler, "java/lang/Compiler") \ template(sun_misc_Signal, "sun/misc/Signal") \ template(java_lang_AssertionStatusDirectives, "java/lang/AssertionStatusDirectives") \ - template(sun_jkernel_DownloadManager, "sun/jkernel/DownloadManager") \ + template(sun_jkernel_DownloadManager, "sun/jkernel/DownloadManager") \ template(getBootClassPathEntryForClass_name, "getBootClassPathEntryForClass") \ \ /* class file format tags */ \ @@ -274,7 +280,9 @@ template(exclusive_owner_thread_name, "exclusiveOwnerThread") \ template(park_blocker_name, "parkBlocker") \ template(park_event_name, "nativeParkEventPointer") \ + template(cache_field_name, "cache") \ template(value_name, "value") \ + template(frontCacheEnabled_name, "frontCacheEnabled") \ \ /* non-intrinsic name/signature pairs: */ \ template(register_method_name, "register") \ @@ -576,6 +584,8 @@ do_name( attemptUpdate_name, "attemptUpdate") \ do_signature(attemptUpdate_signature, "(JJ)Z") \ \ + do_intrinsic(_fillInStackTrace, java_lang_Throwable, fillInStackTrace_name, void_throwable_signature, F_RNY) \ + \ /* support for sun.misc.Unsafe */ \ do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \ \ @@ -863,7 +873,8 @@ F_R, // !static !synchronized (R="regular") F_S, // static !synchronized F_RN, // !static native !synchronized - F_SN // static native !synchronized + F_SN, // static native !synchronized + F_RNY // !static native synchronized }; public: diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/code/debugInfo.cpp --- a/src/share/vm/code/debugInfo.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/code/debugInfo.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -47,7 +47,8 @@ } #endif ObjectValue* result = new ObjectValue(id); - _obj_pool->append(result); + // Cache the object since an object field could reference it. + _obj_pool->push(result); result->read_object(this); return result; } @@ -56,9 +57,9 @@ int id = read_int(); assert(_obj_pool != NULL, "object pool does not exist"); for (int i = _obj_pool->length() - 1; i >= 0; i--) { - ObjectValue* sv = (ObjectValue*) _obj_pool->at(i); - if (sv->id() == id) { - return sv; + ObjectValue* ov = (ObjectValue*) _obj_pool->at(i); + if (ov->id() == id) { + return ov; } } ShouldNotReachHere(); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/code/dependencies.cpp --- a/src/share/vm/code/dependencies.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/code/dependencies.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -882,6 +882,14 @@ // Must not move the class hierarchy during this check: assert_locked_or_safepoint(Compile_lock); + int nof_impls = instanceKlass::cast(context_type)->nof_implementors(); + if (nof_impls > 1) { + // Avoid this case: *I.m > { A.m, C }; B.m > C + // %%% Until this is fixed more systematically, bail out. + // See corresponding comment in find_witness_anywhere. + return context_type; + } + assert(!is_participant(new_type), "only old classes are participants"); if (participants_hide_witnesses) { // If the new type is a subtype of a participant, we are done. @@ -1491,9 +1499,12 @@ // fall through: _change_type = Change_new_sub; case Change_new_sub: - _klass = instanceKlass::cast(_klass)->super(); - if (_klass != NULL) { - return true; + // 6598190: brackets workaround Sun Studio C++ compiler bug 6629277 + { + _klass = instanceKlass::cast(_klass)->super(); + if (_klass != NULL) { + return true; + } } // else set up _ti_limit and fall through: _ti_limit = (_ti_base == NULL) ? 0 : _ti_base->length(); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/code/nmethod.cpp --- a/src/share/vm/code/nmethod.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/code/nmethod.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1971,7 +1971,7 @@ if (ctxk != NULL) { Klass* k = Klass::cast(ctxk); if (k->oop_is_instance() && ((instanceKlass*)k)->is_dependent_nmethod(this)) { - tty->print(" [nmethod<=klass]%s", k->external_name()); + tty->print_cr(" [nmethod<=klass]%s", k->external_name()); } } deps.log_dependency(); // put it into the xml log also diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/code/scopeDesc.cpp --- a/src/share/vm/code/scopeDesc.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/code/scopeDesc.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -91,7 +91,9 @@ DebugInfoReadStream* stream = new DebugInfoReadStream(_code, decode_offset, result); int length = stream->read_int(); for (int index = 0; index < length; index++) { - result->push(ScopeValue::read_from(stream)); + // Objects values are pushed to 'result' array during read so that + // object's fields could reference it (OBJECT_ID_CODE). + (void)ScopeValue::read_from(stream); } assert(result->length() == length, "inconsistent debug information"); return result; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/code/vmreg.cpp --- a/src/share/vm/code/vmreg.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/code/vmreg.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -36,16 +36,16 @@ // Register names const char *VMRegImpl::regName[ConcreteRegisterImpl::number_of_registers]; -void VMRegImpl::print() { #ifndef PRODUCT +void VMRegImpl::print_on(outputStream* st) const { if( is_reg() ) { assert( VMRegImpl::regName[value()], "" ); - tty->print("%s",VMRegImpl::regName[value()]); + st->print("%s",VMRegImpl::regName[value()]); } else if (is_stack()) { int stk = value() - stack0->value(); - tty->print("[%d]", stk*4); + st->print("[%d]", stk*4); } else { - tty->print("BAD!"); + st->print("BAD!"); } +} #endif // PRODUCT -} diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/code/vmreg.hpp --- a/src/share/vm/code/vmreg.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/code/vmreg.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -66,9 +66,9 @@ } } static VMReg Bad() { return (VMReg) (intptr_t) BAD; } - bool is_valid() { return ((intptr_t) this) != BAD; } - bool is_stack() { return (intptr_t) this >= (intptr_t) stack0; } - bool is_reg() { return is_valid() && !is_stack(); } + bool is_valid() const { return ((intptr_t) this) != BAD; } + bool is_stack() const { return (intptr_t) this >= (intptr_t) stack0; } + bool is_reg() const { return is_valid() && !is_stack(); } // A concrete register is a value that returns true for is_reg() and is // also a register you could use in the assembler. On machines with @@ -96,7 +96,8 @@ intptr_t value() const {return (intptr_t) this; } - void print(); + void print_on(outputStream* st) const PRODUCT_RETURN; + void print() const { print_on(tty); } // bias a stack slot. // Typically used to adjust a virtual frame slots by amounts that are offset by diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/compiler/oopMap.cpp --- a/src/share/vm/compiler/oopMap.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/compiler/oopMap.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -506,27 +506,27 @@ } -void print_register_type(OopMapValue::oop_types x, VMReg optional) { +static void print_register_type(OopMapValue::oop_types x, VMReg optional, outputStream* st) { switch( x ) { case OopMapValue::oop_value: - tty->print("Oop"); + st->print("Oop"); break; case OopMapValue::value_value: - tty->print("Value" ); + st->print("Value" ); break; case OopMapValue::dead_value: - tty->print("Dead" ); + st->print("Dead" ); break; case OopMapValue::callee_saved_value: - tty->print("Callers_" ); - optional->print(); + st->print("Callers_" ); + optional->print_on(st); break; case OopMapValue::derived_oop_value: - tty->print("Derived_oop_" ); - optional->print(); + st->print("Derived_oop_" ); + optional->print_on(st); break; case OopMapValue::stack_obj: - tty->print("Stack"); + st->print("Stack"); break; default: ShouldNotReachHere(); @@ -534,11 +534,11 @@ } -void OopMapValue::print() const { - reg()->print(); - tty->print("="); - print_register_type(type(),content_reg()); - tty->print(" "); +void OopMapValue::print_on(outputStream* st) const { + reg()->print_on(st); + st->print("="); + print_register_type(type(),content_reg(),st); + st->print(" "); } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/compiler/oopMap.hpp --- a/src/share/vm/compiler/oopMap.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/compiler/oopMap.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -129,7 +129,8 @@ return reg()->reg2stack(); } - void print( ) const PRODUCT_RETURN; + void print_on(outputStream* st) const PRODUCT_RETURN; + void print() const { print_on(tty); } }; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1071,85 +1071,56 @@ // for each list in the tree. Also print some summary // information. class printTreeCensusClosure : public AscendTreeCensusClosure { + int _print_line; size_t _totalFree; - AllocationStats _totals; - size_t _count; + FreeList _total; public: printTreeCensusClosure() { + _print_line = 0; _totalFree = 0; - _count = 0; - _totals.initialize(); } - AllocationStats* totals() { return &_totals; } - size_t count() { return _count; } - void increment_count_by(size_t v) { _count += v; } + FreeList* total() { return &_total; } size_t totalFree() { return _totalFree; } - void increment_totalFree_by(size_t v) { _totalFree += v; } void do_list(FreeList* fl) { - bool nl = false; // "maybe this is not needed" isNearLargestChunk(fl->head()); - - gclog_or_tty->print("%c %4d\t\t" "%7d\t" "%7d\t" - "%7d\t" "%7d\t" "%7d\t" "%7d\t" - "%7d\t" "%7d\t" "%7d\t" - "%7d\t" "\n", - " n"[nl], fl->size(), fl->bfrSurp(), fl->surplus(), - fl->desired(), fl->prevSweep(), fl->beforeSweep(), fl->count(), - fl->coalBirths(), fl->coalDeaths(), fl->splitBirths(), - fl->splitDeaths()); - - increment_totalFree_by(fl->count() * fl->size()); - increment_count_by(fl->count()); - totals()->set_bfrSurp(totals()->bfrSurp() + fl->bfrSurp()); - totals()->set_surplus(totals()->splitDeaths() + fl->surplus()); - totals()->set_prevSweep(totals()->prevSweep() + fl->prevSweep()); - totals()->set_beforeSweep(totals()->beforeSweep() + fl->beforeSweep()); - totals()->set_coalBirths(totals()->coalBirths() + fl->coalBirths()); - totals()->set_coalDeaths(totals()->coalDeaths() + fl->coalDeaths()); - totals()->set_splitBirths(totals()->splitBirths() + fl->splitBirths()); - totals()->set_splitDeaths(totals()->splitDeaths() + fl->splitDeaths()); + if (++_print_line >= 40) { + FreeList::print_labels_on(gclog_or_tty, "size"); + _print_line = 0; + } + fl->print_on(gclog_or_tty); + _totalFree += fl->count() * fl->size() ; + total()->set_count( total()->count() + fl->count() ); + total()->set_bfrSurp( total()->bfrSurp() + fl->bfrSurp() ); + total()->set_surplus( total()->splitDeaths() + fl->surplus() ); + total()->set_desired( total()->desired() + fl->desired() ); + total()->set_prevSweep( total()->prevSweep() + fl->prevSweep() ); + total()->set_beforeSweep(total()->beforeSweep() + fl->beforeSweep()); + total()->set_coalBirths( total()->coalBirths() + fl->coalBirths() ); + total()->set_coalDeaths( total()->coalDeaths() + fl->coalDeaths() ); + total()->set_splitBirths(total()->splitBirths() + fl->splitBirths()); + total()->set_splitDeaths(total()->splitDeaths() + fl->splitDeaths()); } }; void BinaryTreeDictionary::printDictCensus(void) const { gclog_or_tty->print("\nBinaryTree\n"); - gclog_or_tty->print( - "%4s\t\t" "%7s\t" "%7s\t" "%7s\t" "%7s\t" "%7s\t" - "%7s\t" "%7s\t" "%7s\t" "%7s\t" "%7s\t" "\n", - "size", "bfrsurp", "surplus", "desired", "prvSwep", "bfrSwep", - "count", "cBirths", "cDeaths", "sBirths", "sDeaths"); - + FreeList::print_labels_on(gclog_or_tty, "size"); printTreeCensusClosure ptc; ptc.do_tree(root()); - gclog_or_tty->print( - "\t\t" "%7s\t" "%7s\t" "%7s\t" "%7s\t" - "%7s\t" "%7s\t" "%7s\t" "%7s\t" "%7s\t" "\n", - "bfrsurp", "surplus", "prvSwep", "bfrSwep", - "count", "cBirths", "cDeaths", "sBirths", "sDeaths"); + FreeList* total = ptc.total(); + FreeList::print_labels_on(gclog_or_tty, " "); + total->print_on(gclog_or_tty, "TOTAL\t"); gclog_or_tty->print( - "%s\t\t" "%7d\t" "%7d\t" "%7d\t" "%7d\t" - "%7d\t" "%7d\t" "%7d\t" "%7d\t" "%7d\t" "\n", - "totl", - ptc.totals()->bfrSurp(), - ptc.totals()->surplus(), - ptc.totals()->prevSweep(), - ptc.totals()->beforeSweep(), - ptc.count(), - ptc.totals()->coalBirths(), - ptc.totals()->coalDeaths(), - ptc.totals()->splitBirths(), - ptc.totals()->splitDeaths()); - gclog_or_tty->print("totalFree(words): %7d growth: %8.5f deficit: %8.5f\n", + "totalFree(words): " SIZE_FORMAT_W(16) + " growth: %8.5f deficit: %8.5f\n", ptc.totalFree(), - (double)(ptc.totals()->splitBirths()+ptc.totals()->coalBirths() - -ptc.totals()->splitDeaths()-ptc.totals()->coalDeaths()) - /(ptc.totals()->prevSweep() != 0 ? - (double)ptc.totals()->prevSweep() : 1.0), - (double)(ptc.totals()->desired() - ptc.count()) - /(ptc.totals()->desired() != 0 ? - (double)ptc.totals()->desired() : 1.0)); + (double)(total->splitBirths() + total->coalBirths() + - total->splitDeaths() - total->coalDeaths()) + /(total->prevSweep() != 0 ? (double)total->prevSweep() : 1.0), + (double)(total->desired() - total->count()) + /(total->desired() != 0 ? (double)total->desired() : 1.0)); } // Verify the following tree invariants: diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1835,7 +1835,7 @@ guarantee(false, "NYI"); } -bool CompactibleFreeListSpace::linearAllocationWouldFail() { +bool CompactibleFreeListSpace::linearAllocationWouldFail() const { return _smallLinearAllocBlock._word_size == 0; } @@ -1906,6 +1906,13 @@ } } +// Support for concurrent collection policy decisions. +bool CompactibleFreeListSpace::should_concurrent_collect() const { + // In the future we might want to add in frgamentation stats -- + // including erosion of the "mountain" into this decision as well. + return !adaptive_freelists() && linearAllocationWouldFail(); +} + // Support for compaction void CompactibleFreeListSpace::prepare_for_compaction(CompactPoint* cp) { @@ -2013,11 +2020,11 @@ } } -void CompactibleFreeListSpace::endSweepFLCensus(int sweepCt) { +void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) { setFLSurplus(); setFLHints(); if (PrintGC && PrintFLSCensus > 0) { - printFLCensus(sweepCt); + printFLCensus(sweep_count); } clearFLCensus(); assert_locked(); @@ -2293,59 +2300,37 @@ } #endif -void CompactibleFreeListSpace::printFLCensus(int sweepCt) const { +void CompactibleFreeListSpace::printFLCensus(size_t sweep_count) const { assert_lock_strong(&_freelistLock); - ssize_t bfrSurp = 0; - ssize_t surplus = 0; - ssize_t desired = 0; - ssize_t prevSweep = 0; - ssize_t beforeSweep = 0; - ssize_t count = 0; - ssize_t coalBirths = 0; - ssize_t coalDeaths = 0; - ssize_t splitBirths = 0; - ssize_t splitDeaths = 0; - gclog_or_tty->print("end sweep# %d\n", sweepCt); - gclog_or_tty->print("%4s\t" "%7s\t" "%7s\t" "%7s\t" "%7s\t" - "%7s\t" "%7s\t" "%7s\t" "%7s\t" "%7s\t" - "%7s\t" "\n", - "size", "bfrsurp", "surplus", "desired", "prvSwep", - "bfrSwep", "count", "cBirths", "cDeaths", "sBirths", - "sDeaths"); - + FreeList total; + gclog_or_tty->print("end sweep# " SIZE_FORMAT "\n", sweep_count); + FreeList::print_labels_on(gclog_or_tty, "size"); size_t totalFree = 0; for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) { const FreeList *fl = &_indexedFreeList[i]; - totalFree += fl->count() * fl->size(); - - gclog_or_tty->print("%4d\t" "%7d\t" "%7d\t" "%7d\t" - "%7d\t" "%7d\t" "%7d\t" "%7d\t" - "%7d\t" "%7d\t" "%7d\t" "\n", - fl->size(), fl->bfrSurp(), fl->surplus(), fl->desired(), - fl->prevSweep(), fl->beforeSweep(), fl->count(), fl->coalBirths(), - fl->coalDeaths(), fl->splitBirths(), fl->splitDeaths()); - bfrSurp += fl->bfrSurp(); - surplus += fl->surplus(); - desired += fl->desired(); - prevSweep += fl->prevSweep(); - beforeSweep += fl->beforeSweep(); - count += fl->count(); - coalBirths += fl->coalBirths(); - coalDeaths += fl->coalDeaths(); - splitBirths += fl->splitBirths(); - splitDeaths += fl->splitDeaths(); + totalFree += fl->count() * fl->size(); + if (i % (40*IndexSetStride) == 0) { + FreeList::print_labels_on(gclog_or_tty, "size"); + } + fl->print_on(gclog_or_tty); + total.set_bfrSurp( total.bfrSurp() + fl->bfrSurp() ); + total.set_surplus( total.surplus() + fl->surplus() ); + total.set_desired( total.desired() + fl->desired() ); + total.set_prevSweep( total.prevSweep() + fl->prevSweep() ); + total.set_beforeSweep(total.beforeSweep() + fl->beforeSweep()); + total.set_count( total.count() + fl->count() ); + total.set_coalBirths( total.coalBirths() + fl->coalBirths() ); + total.set_coalDeaths( total.coalDeaths() + fl->coalDeaths() ); + total.set_splitBirths(total.splitBirths() + fl->splitBirths()); + total.set_splitDeaths(total.splitDeaths() + fl->splitDeaths()); } - gclog_or_tty->print("%4s\t" - "%7d\t" "%7d\t" "%7d\t" "%7d\t" "%7d\t" - "%7d\t" "%7d\t" "%7d\t" "%7d\t" "%7d\t" "\n", - "totl", - bfrSurp, surplus, desired, prevSweep, beforeSweep, - count, coalBirths, coalDeaths, splitBirths, splitDeaths); - gclog_or_tty->print_cr("Total free in indexed lists %d words", totalFree); + total.print_on(gclog_or_tty, "TOTAL"); + gclog_or_tty->print_cr("Total free in indexed lists " + SIZE_FORMAT " words", totalFree); gclog_or_tty->print("growth: %8.5f deficit: %8.5f\n", - (double)(splitBirths+coalBirths-splitDeaths-coalDeaths)/ - (prevSweep != 0 ? (double)prevSweep : 1.0), - (double)(desired - count)/(desired != 0 ? (double)desired : 1.0)); + (double)(total.splitBirths()+total.coalBirths()-total.splitDeaths()-total.coalDeaths())/ + (total.prevSweep() != 0 ? (double)total.prevSweep() : 1.0), + (double)(total.desired() - total.count())/(total.desired() != 0 ? (double)total.desired() : 1.0)); _dictionary->printDictCensus(); } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -418,7 +418,7 @@ // chunk exists, return NULL. FreeChunk* find_chunk_at_end(); - bool adaptive_freelists() { return _adaptive_freelists; } + bool adaptive_freelists() const { return _adaptive_freelists; } void set_collector(CMSCollector* collector) { _collector = collector; } @@ -566,7 +566,7 @@ FreeChunk* allocateScratch(size_t size); // returns true if either the small or large linear allocation buffer is empty. - bool linearAllocationWouldFail(); + bool linearAllocationWouldFail() const; // Adjust the chunk for the minimum size. This version is called in // most cases in CompactibleFreeListSpace methods. @@ -585,6 +585,9 @@ void addChunkAndRepairOffsetTable(HeapWord* chunk, size_t size, bool coalesced); + // Support for decisions regarding concurrent collection policy + bool should_concurrent_collect() const; + // Support for compaction void prepare_for_compaction(CompactPoint* cp); void adjust_pointers(); @@ -622,7 +625,7 @@ // coalescing of chunks during the sweep of garbage. // Print the statistics for the free lists. - void printFLCensus(int sweepCt) const; + void printFLCensus(size_t sweep_count) const; // Statistics functions // Initialize census for lists before the sweep. @@ -635,12 +638,11 @@ // Clear the census for each of the free lists. void clearFLCensus(); // Perform functions for the census after the end of the sweep. - void endSweepFLCensus(int sweepCt); + void endSweepFLCensus(size_t sweep_count); // Return true if the count of free chunks is greater // than the desired number of free chunks. bool coalOverPopulated(size_t size); - // Record (for each size): // // split-births = #chunks added due to splits in (prev-sweep-end, diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -3121,12 +3121,7 @@ if (GCExpandToAllocateDelayMillis > 0) { os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false); } - size_t adj_word_sz = CompactibleFreeListSpace::adjustObjectSize(word_size); - if (parallel) { - return cmsSpace()->par_allocate(adj_word_sz); - } else { - return cmsSpace()->allocate(adj_word_sz); - } + return have_lock_and_allocate(word_size, tlab); } // YSR: All of this generation expansion/shrinking stuff is an exact copy of @@ -5732,13 +5727,19 @@ // in the perm_gen_verify_bit_map. In order to do that we traverse // all blocks in perm gen and mark all dead objects. if (verifying() && !cms_should_unload_classes()) { - CMSTokenSyncWithLocks ts(true, _permGen->freelistLock(), - bitMapLock()); assert(perm_gen_verify_bit_map()->sizeInBits() != 0, "Should have already been allocated"); MarkDeadObjectsClosure mdo(this, _permGen->cmsSpace(), markBitMap(), perm_gen_verify_bit_map()); - _permGen->cmsSpace()->blk_iterate(&mdo); + if (asynch) { + CMSTokenSyncWithLocks ts(true, _permGen->freelistLock(), + bitMapLock()); + _permGen->cmsSpace()->blk_iterate(&mdo); + } else { + // In the case of synchronous sweep, we already have + // the requisite locks/tokens. + _permGen->cmsSpace()->blk_iterate(&mdo); + } } if (asynch) { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -302,3 +302,29 @@ #endif } #endif + +// Print the "label line" for free list stats. +void FreeList::print_labels_on(outputStream* st, const char* c) { + st->print("%16s\t", c); + st->print("%14s\t" "%14s\t" "%14s\t" "%14s\t" "%14s\t" + "%14s\t" "%14s\t" "%14s\t" "%14s\t" "%14s\t" "\n", + "bfrsurp", "surplus", "desired", "prvSwep", "bfrSwep", + "count", "cBirths", "cDeaths", "sBirths", "sDeaths"); +} + +// Print the AllocationStats for the given free list. If the second argument +// to the call is a non-null string, it is printed in the first column; +// otherwise, if the argument is null (the default), then the size of the +// (free list) block is printed in the first column. +void FreeList::print_on(outputStream* st, const char* c) const { + if (c != NULL) { + st->print("%16s", c); + } else { + st->print(SIZE_FORMAT_W(16), size()); + } + st->print("\t" + SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" + SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n", + bfrSurp(), surplus(), desired(), prevSweep(), beforeSweep(), + count(), coalBirths(), coalDeaths(), splitBirths(), splitDeaths()); +} diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -38,6 +38,7 @@ class FreeList VALUE_OBJ_CLASS_SPEC { friend class CompactibleFreeListSpace; + friend class printTreeCensusClosure; FreeChunk* _head; // List of free chunks FreeChunk* _tail; // Tail of list of free chunks size_t _size; // Size in Heap words of each chunks @@ -63,10 +64,11 @@ protected: void init_statistics(); void set_count(ssize_t v) { _count = v;} - void increment_count() { _count++; } + void increment_count() { _count++; } void decrement_count() { _count--; - assert(_count >= 0, "Count should not be negative"); } + assert(_count >= 0, "Count should not be negative"); + } public: // Constructor @@ -159,6 +161,10 @@ ssize_t desired() const { return _allocation_stats.desired(); } + void set_desired(ssize_t v) { + assert_proper_lock_protection(); + _allocation_stats.set_desired(v); + } void compute_desired(float inter_sweep_current, float inter_sweep_estimate) { assert_proper_lock_protection(); @@ -298,4 +304,8 @@ // Verify that the chunk is in the list. // found. Return NULL if "fc" is not found. bool verifyChunkInFreeLists(FreeChunk* fc) const; + + // Printing support + static void print_labels_on(outputStream* st, const char* c); + void print_on(outputStream* st, const char* c = NULL) const; }; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/includeDB_gc_shared --- a/src/share/vm/gc_implementation/includeDB_gc_shared Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_shared Tue Apr 22 15:36:18 2008 -0700 @@ -19,15 +19,22 @@ // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, // CA 95054 USA or visit www.sun.com if you need additional information or // have any questions. -// +// // // NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps! -gcAdaptivePolicyCounters.hpp adaptiveSizePolicy.hpp -gcAdaptivePolicyCounters.hpp gcPolicyCounters.hpp +allocationStats.cpp allocationStats.hpp +allocationStats.cpp ostream.hpp -gcAdaptivePolicyCounters.cpp resourceArea.hpp +allocationStats.hpp allocation.hpp +allocationStats.hpp gcUtil.hpp +allocationStats.hpp globalDefinitions.hpp + +gcAdaptivePolicyCounters.hpp adaptiveSizePolicy.hpp +gcAdaptivePolicyCounters.hpp gcPolicyCounters.hpp + +gcAdaptivePolicyCounters.cpp resourceArea.hpp gcAdaptivePolicyCounters.cpp gcAdaptivePolicyCounters.hpp gSpaceCounters.cpp generation.hpp @@ -44,7 +51,7 @@ isGCActiveMark.hpp parallelScavengeHeap.hpp -markSweep.inline.hpp psParallelCompact.hpp +markSweep.inline.hpp psParallelCompact.hpp mutableNUMASpace.cpp mutableNUMASpace.hpp mutableNUMASpace.cpp sharedHeap.hpp diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/parNew/asParNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/asParNewGeneration.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/parNew/asParNewGeneration.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -74,8 +74,8 @@ #ifdef SHRINKS_AT_END_OF_EDEN size_t delta_in_survivor = 0; ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); - const size_t space_alignment = heap->intra_generation_alignment(); - const size_t gen_alignment = heap->generation_alignment(); + const size_t space_alignment = heap->intra_heap_alignment(); + const size_t gen_alignment = heap->object_heap_alignment(); MutableSpace* space_shrinking = NULL; if (from_space()->end() > to_space()->end()) { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/parNew/parNewGeneration.cpp --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -785,6 +785,9 @@ swap_spaces(); // Make life simpler for CMS || rescan; see 6483690. from()->set_next_compaction_space(to()); gch->set_incremental_collection_will_fail(); + + // Reset the PromotionFailureALot counters. + NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();) } // set new iteration safe limit for the survivor spaces from()->set_concurrent_iteration_safe_limit(from()->top()); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/parallelScavenge/asPSYoungGen.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/asPSYoungGen.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/asPSYoungGen.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -86,7 +86,7 @@ if (eden_space()->is_empty()) { // Respect the minimum size for eden and for the young gen as a whole. ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); - const size_t eden_alignment = heap->intra_generation_alignment(); + const size_t eden_alignment = heap->intra_heap_alignment(); const size_t gen_alignment = heap->young_gen_alignment(); assert(eden_space()->capacity_in_bytes() >= eden_alignment, @@ -124,7 +124,7 @@ // to_space can be. size_t ASPSYoungGen::available_to_live() { ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); - const size_t alignment = heap->intra_generation_alignment(); + const size_t alignment = heap->intra_heap_alignment(); // Include any space that is committed but is not in eden. size_t available = pointer_delta(eden_space()->bottom(), @@ -275,7 +275,7 @@ assert(eden_start < from_start, "Cannot push into from_space"); ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); - const size_t alignment = heap->intra_generation_alignment(); + const size_t alignment = heap->intra_heap_alignment(); // Check whether from space is below to space if (from_start < to_start) { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/parallelScavenge/generationSizer.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/generationSizer.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/generationSizer.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -39,10 +39,10 @@ // If the user hasn't explicitly set the number of worker // threads, set the count. - if (ParallelGCThreads == 0) { - assert(UseParallelGC, "Setting ParallelGCThreads without UseParallelGC"); - ParallelGCThreads = os::active_processor_count(); - } + assert(UseSerialGC || + !FLAG_IS_DEFAULT(ParallelGCThreads) || + (ParallelGCThreads > 0), + "ParallelGCThreads should be set before flag initialization"); // The survivor ratio's are calculated "raw", unlike the // default gc, which adds 2 to the ratio value. We need to diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -41,7 +41,7 @@ const size_t rs_align = page_sz == (size_t) os::vm_page_size() ? 0 : MAX2(page_sz, granularity); - ReservedSpace rs(bytes, rs_align, false); + ReservedSpace rs(bytes, rs_align, rs_align > 0); os::trace_page_sizes("par bitmap", raw_bytes, raw_bytes, page_sz, rs.base(), rs.size()); _virtual_space = new PSVirtualSpace(rs, page_sz); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -173,7 +173,7 @@ new PSAdaptiveSizePolicy(eden_capacity, initial_promo_size, young_gen()->to_space()->capacity_in_bytes(), - intra_generation_alignment(), + intra_heap_alignment(), max_gc_pause_sec, max_gc_minor_pause_sec, GCTimeRatio diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp --- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -58,9 +58,9 @@ public: ParallelScavengeHeap() : CollectedHeap() { - set_alignment(_perm_gen_alignment, intra_generation_alignment()); - set_alignment(_young_gen_alignment, intra_generation_alignment()); - set_alignment(_old_gen_alignment, intra_generation_alignment()); + set_alignment(_perm_gen_alignment, intra_heap_alignment()); + set_alignment(_young_gen_alignment, intra_heap_alignment()); + set_alignment(_old_gen_alignment, intra_heap_alignment()); } // For use by VM operations @@ -92,14 +92,14 @@ void post_initialize(); void update_counters(); - // The alignment used for the various generations. size_t perm_gen_alignment() const { return _perm_gen_alignment; } size_t young_gen_alignment() const { return _young_gen_alignment; } size_t old_gen_alignment() const { return _old_gen_alignment; } - // The alignment used for eden and survivors within the young gen. - size_t intra_generation_alignment() const { return 64 * K; } + // The alignment used for eden and survivors within the young gen + // and for boundary between young gen and old gen. + size_t intra_heap_alignment() const { return 64 * K; } size_t capacity() const; size_t used() const; @@ -217,6 +217,6 @@ inline size_t ParallelScavengeHeap::set_alignment(size_t& var, size_t val) { assert(is_power_of_2((intptr_t)val), "must be a power of 2"); - var = round_to(val, intra_generation_alignment()); + var = round_to(val, intra_heap_alignment()); return var; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -413,7 +413,7 @@ const size_t rs_align = page_sz == (size_t) os::vm_page_size() ? 0 : MAX2(page_sz, granularity); - ReservedSpace rs(bytes, rs_align, false); + ReservedSpace rs(bytes, rs_align, rs_align > 0); os::trace_page_sizes("par compact", raw_bytes, raw_bytes, page_sz, rs.base(), rs.size()); PSVirtualSpace* vspace = new PSVirtualSpace(rs, page_sz); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp --- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -88,7 +88,7 @@ // Compute maximum space sizes for performance counters ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); - size_t alignment = heap->intra_generation_alignment(); + size_t alignment = heap->intra_heap_alignment(); size_t size = _virtual_space->reserved_size(); size_t max_survivor_size; @@ -141,7 +141,7 @@ assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); // Compute sizes - size_t alignment = heap->intra_generation_alignment(); + size_t alignment = heap->intra_heap_alignment(); size_t size = _virtual_space->committed_size(); size_t survivor_size = size / InitialSurvivorRatio; @@ -192,7 +192,7 @@ #ifndef PRODUCT void PSYoungGen::space_invariants() { ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); - const size_t alignment = heap->intra_generation_alignment(); + const size_t alignment = heap->intra_heap_alignment(); // Currently, our eden size cannot shrink to zero guarantee(eden_space()->capacity_in_bytes() >= alignment, "eden too small"); @@ -392,7 +392,7 @@ char* to_end = (char*)to_space()->end(); ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); - const size_t alignment = heap->intra_generation_alignment(); + const size_t alignment = heap->intra_heap_alignment(); const bool maintain_minimum = (requested_eden_size + 2 * requested_survivor_size) <= min_gen_size(); @@ -708,7 +708,7 @@ size_t PSYoungGen::available_to_live() { size_t delta_in_survivor = 0; ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); - const size_t space_alignment = heap->intra_generation_alignment(); + const size_t space_alignment = heap->intra_heap_alignment(); const size_t gen_alignment = heap->young_gen_alignment(); MutableSpace* space_shrinking = NULL; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/shared/allocationStats.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/allocationStats.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -0,0 +1,30 @@ +/* + * Copyright 2005 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_allocationStats.cpp.incl" + +// Technically this should be derived from machine speed, and +// ideally it would be dynamically adjusted. +float AllocationStats::_threshold = ((float)CMS_SweepTimerThresholdMillis)/1000; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/gc_implementation/shared/allocationStats.hpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/share/vm/gc_implementation/shared/allocationStats.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -0,0 +1,138 @@ +/* + * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class AllocationStats VALUE_OBJ_CLASS_SPEC { + // A duration threshold (in ms) used to filter + // possibly unreliable samples. + static float _threshold; + + // We measure the demand between the end of the previous sweep and + // beginning of this sweep: + // Count(end_last_sweep) - Count(start_this_sweep) + // + splitBirths(between) - splitDeaths(between) + // The above number divided by the time since the start [END???] of the + // previous sweep gives us a time rate of demand for blocks + // of this size. We compute a padded average of this rate as + // our current estimate for the time rate of demand for blocks + // of this size. Similarly, we keep a padded average for the time + // between sweeps. Our current estimate for demand for blocks of + // this size is then simply computed as the product of these two + // estimates. + AdaptivePaddedAverage _demand_rate_estimate; + + ssize_t _desired; // Estimate computed as described above + ssize_t _coalDesired; // desired +/- small-percent for tuning coalescing + + ssize_t _surplus; // count - (desired +/- small-percent), + // used to tune splitting in best fit + ssize_t _bfrSurp; // surplus at start of current sweep + ssize_t _prevSweep; // count from end of previous sweep + ssize_t _beforeSweep; // count from before current sweep + ssize_t _coalBirths; // additional chunks from coalescing + ssize_t _coalDeaths; // loss from coalescing + ssize_t _splitBirths; // additional chunks from splitting + ssize_t _splitDeaths; // loss from splitting + size_t _returnedBytes; // number of bytes returned to list. + public: + void initialize() { + AdaptivePaddedAverage* dummy = + new (&_demand_rate_estimate) AdaptivePaddedAverage(CMS_FLSWeight, + CMS_FLSPadding); + _desired = 0; + _coalDesired = 0; + _surplus = 0; + _bfrSurp = 0; + _prevSweep = 0; + _beforeSweep = 0; + _coalBirths = 0; + _coalDeaths = 0; + _splitBirths = 0; + _splitDeaths = 0; + _returnedBytes = 0; + } + + AllocationStats() { + initialize(); + } + // The rate estimate is in blocks per second. + void compute_desired(size_t count, + float inter_sweep_current, + float inter_sweep_estimate) { + // If the latest inter-sweep time is below our granularity + // of measurement, we may call in here with + // inter_sweep_current == 0. However, even for suitably small + // but non-zero inter-sweep durations, we may not trust the accuracy + // of accumulated data, since it has not been "integrated" + // (read "low-pass-filtered") long enough, and would be + // vulnerable to noisy glitches. In such cases, we + // ignore the current sample and use currently available + // historical estimates. + if (inter_sweep_current > _threshold) { + ssize_t demand = prevSweep() - count + splitBirths() - splitDeaths(); + float rate = ((float)demand)/inter_sweep_current; + _demand_rate_estimate.sample(rate); + _desired = (ssize_t)(_demand_rate_estimate.padded_average() + *inter_sweep_estimate); + } + } + + ssize_t desired() const { return _desired; } + void set_desired(ssize_t v) { _desired = v; } + + ssize_t coalDesired() const { return _coalDesired; } + void set_coalDesired(ssize_t v) { _coalDesired = v; } + + ssize_t surplus() const { return _surplus; } + void set_surplus(ssize_t v) { _surplus = v; } + void increment_surplus() { _surplus++; } + void decrement_surplus() { _surplus--; } + + ssize_t bfrSurp() const { return _bfrSurp; } + void set_bfrSurp(ssize_t v) { _bfrSurp = v; } + ssize_t prevSweep() const { return _prevSweep; } + void set_prevSweep(ssize_t v) { _prevSweep = v; } + ssize_t beforeSweep() const { return _beforeSweep; } + void set_beforeSweep(ssize_t v) { _beforeSweep = v; } + + ssize_t coalBirths() const { return _coalBirths; } + void set_coalBirths(ssize_t v) { _coalBirths = v; } + void increment_coalBirths() { _coalBirths++; } + + ssize_t coalDeaths() const { return _coalDeaths; } + void set_coalDeaths(ssize_t v) { _coalDeaths = v; } + void increment_coalDeaths() { _coalDeaths++; } + + ssize_t splitBirths() const { return _splitBirths; } + void set_splitBirths(ssize_t v) { _splitBirths = v; } + void increment_splitBirths() { _splitBirths++; } + + ssize_t splitDeaths() const { return _splitDeaths; } + void set_splitDeaths(ssize_t v) { _splitDeaths = v; } + void increment_splitDeaths() { _splitDeaths++; } + + NOT_PRODUCT( + size_t returnedBytes() const { return _returnedBytes; } + void set_returnedBytes(size_t v) { _returnedBytes = v; } + ) +}; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/includeDB_compiler2 --- a/src/share/vm/includeDB_compiler2 Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/includeDB_compiler2 Tue Apr 22 15:36:18 2008 -0700 @@ -19,7 +19,7 @@ // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, // CA 95054 USA or visit www.sun.com if you need additional information or // have any questions. -// +// // ad_.cpp adGlobals_.hpp @@ -164,6 +164,7 @@ callGenerator.hpp type.hpp callnode.cpp callnode.hpp +callnode.cpp bcEscapeAnalyzer.hpp callnode.cpp escape.hpp callnode.cpp locknode.hpp callnode.cpp machnode.hpp @@ -176,7 +177,6 @@ callnode.cpp runtime.hpp callnode.hpp connode.hpp -callnode.hpp escape.hpp callnode.hpp mulnode.hpp callnode.hpp multnode.hpp callnode.hpp opcodes.hpp @@ -347,7 +347,6 @@ connode.cpp allocation.inline.hpp connode.cpp compile.hpp connode.cpp connode.hpp -connode.cpp escape.hpp connode.cpp machnode.hpp connode.cpp matcher.hpp connode.cpp memnode.hpp @@ -410,6 +409,7 @@ escape.cpp allocation.hpp escape.cpp bcEscapeAnalyzer.hpp +escape.cpp c2compiler.hpp escape.cpp callnode.hpp escape.cpp cfgnode.hpp escape.cpp compile.hpp @@ -843,7 +843,6 @@ phaseX.cpp callnode.hpp phaseX.cpp cfgnode.hpp phaseX.cpp connode.hpp -phaseX.cpp escape.hpp phaseX.cpp loopnode.hpp phaseX.cpp machnode.hpp phaseX.cpp opcodes.hpp @@ -990,6 +989,7 @@ subnode.cpp addnode.hpp subnode.cpp allocation.inline.hpp +subnode.cpp callnode.hpp subnode.cpp cfgnode.hpp subnode.cpp compileLog.hpp subnode.cpp connode.hpp @@ -1086,7 +1086,7 @@ idealGraphPrinter.hpp ostream.hpp idealGraphPrinter.cpp idealGraphPrinter.hpp -idealGraphPrinter.cpp chaitin.hpp +idealGraphPrinter.cpp chaitin.hpp idealGraphPrinter.cpp machnode.hpp idealGraphPrinter.cpp parse.hpp idealGraphPrinter.cpp threadCritical.hpp @@ -1098,4 +1098,4 @@ parse1.cpp idealGraphPrinter.hpp matcher.cpp idealGraphPrinter.hpp loopnode.cpp idealGraphPrinter.hpp -chaitin.cpp idealGraphPrinter.hpp +chaitin.cpp idealGraphPrinter.hpp diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/includeDB_core --- a/src/share/vm/includeDB_core Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/includeDB_core Tue Apr 22 15:36:18 2008 -0700 @@ -19,7 +19,7 @@ // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, // CA 95054 USA or visit www.sun.com if you need additional information or // have any questions. -// +// // // NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps! @@ -46,13 +46,13 @@ // as dependencies. Header files named H.inline.hpp generally contain // bodies for inline functions declared in H.hpp. // -// NOTE: Files that use the token "generate_platform_dependent_include" +// NOTE: Files that use the token "generate_platform_dependent_include" // are expected to contain macro references like , , ... and // makedeps has a dependency on these platform files looking like: -// foo_.trailing_string +// foo_.trailing_string // (where "trailing_string" can be any legal filename strings but typically // is "hpp" or "inline.hpp"). -// +// // The dependency in makedeps (and enforced) is that an underscore // will precedure the macro invocation. Note that this restriction // is only enforced on filenames that have the dependency token @@ -148,12 +148,6 @@ allocation.inline.hpp os.hpp -allocationStats.cpp allocationStats.hpp - -allocationStats.hpp allocation.hpp -allocationStats.hpp gcUtil.hpp -allocationStats.hpp globalDefinitions.hpp - aprofiler.cpp aprofiler.hpp aprofiler.cpp collectedHeap.inline.hpp aprofiler.cpp oop.inline.hpp @@ -720,6 +714,11 @@ ciObjArray.hpp ciClassList.hpp ciObjArray.hpp objArrayOop.hpp +ciObjArray.cpp ciObjArray.hpp +ciObjArray.cpp ciNullObject.hpp +ciObjArray.cpp ciUtilities.hpp +ciObjArray.cpp objArrayOop.hpp + ciObjArrayKlass.cpp ciInstanceKlass.hpp ciObjArrayKlass.cpp ciObjArrayKlass.hpp ciObjArrayKlass.cpp ciObjArrayKlassKlass.hpp @@ -1935,7 +1934,7 @@ init.cpp bytecodes.hpp init.cpp collectedHeap.hpp -init.cpp handles.inline.hpp +init.cpp handles.inline.hpp init.cpp icBuffer.hpp init.cpp icache.hpp init.cpp init.hpp @@ -3068,6 +3067,7 @@ oopMapCache.cpp allocation.inline.hpp oopMapCache.cpp handles.inline.hpp +oopMapCache.cpp jvmtiRedefineClassesTrace.hpp oopMapCache.cpp oop.inline.hpp oopMapCache.cpp oopMapCache.hpp oopMapCache.cpp resourceArea.hpp diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/interpreter/oopMapCache.cpp --- a/src/share/vm/interpreter/oopMapCache.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/interpreter/oopMapCache.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -532,6 +532,10 @@ if (!_array[i].is_empty() && _array[i].method()->is_old()) { // Cache entry is occupied by an old redefined method and we don't want // to pin it down so flush the entry. + RC_TRACE(0x08000000, ("flush: %s(%s): cached entry @%d", + _array[i].method()->name()->as_C_string(), + _array[i].method()->signature()->as_C_string(), i)); + _array[i].flush(); } } @@ -577,6 +581,15 @@ // Entry is not in hashtable. // Compute entry and return it + if (method->should_not_be_cached()) { + // It is either not safe or not a good idea to cache this methodOop + // at this time. We give the caller of lookup() a copy of the + // interesting info via parameter entry_for, but we don't add it to + // the cache. See the gory details in methodOop.cpp. + compute_one_oop_map(method, bci, entry_for); + return; + } + // First search for an empty slot for(i = 0; i < _probe_depth; i++) { entry = entry_at(probe + i); @@ -584,12 +597,6 @@ entry->fill(method, bci); entry_for->resource_copy(entry); assert(!entry_for->is_empty(), "A non-empty oop map should be returned"); - if (method->is_old()) { - // The caller of lookup() will receive a copy of the interesting - // info via entry_for, but we don't keep an old redefined method in - // the cache to avoid pinning down the method. - entry->flush(); - } return; } } @@ -623,13 +630,6 @@ } assert(!entry_for->is_empty(), "A non-empty oop map should be returned"); - if (method->is_old()) { - // The caller of lookup() will receive a copy of the interesting - // info via entry_for, but we don't keep an old redefined method in - // the cache to avoid pinning down the method. - entry->flush(); - } - return; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/allocationStats.cpp --- a/src/share/vm/memory/allocationStats.cpp Wed Apr 09 11:18:58 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -/* - * Copyright 2005 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -# include "incls/_precompiled.incl" -# include "incls/_allocationStats.cpp.incl" - -// Technically this should be derived from machine speed, and -// ideally it would be dynamically adjusted. -float AllocationStats::_threshold = ((float)CMS_SweepTimerThresholdMillis)/1000; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/allocationStats.hpp --- a/src/share/vm/memory/allocationStats.hpp Wed Apr 09 11:18:58 2008 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,136 +0,0 @@ -/* - * Copyright 2001-2005 Sun Microsystems, Inc. All Rights Reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - */ - -class AllocationStats VALUE_OBJ_CLASS_SPEC { - // A duration threshold (in ms) used to filter - // possibly unreliable samples. - static float _threshold; - - // We measure the demand between the end of the previous sweep and - // beginning of this sweep: - // Count(end_last_sweep) - Count(start_this_sweep) - // + splitBirths(between) - splitDeaths(between) - // The above number divided by the time since the start [END???] of the - // previous sweep gives us a time rate of demand for blocks - // of this size. We compute a padded average of this rate as - // our current estimate for the time rate of demand for blocks - // of this size. Similarly, we keep a padded average for the time - // between sweeps. Our current estimate for demand for blocks of - // this size is then simply computed as the product of these two - // estimates. - AdaptivePaddedAverage _demand_rate_estimate; - - ssize_t _desired; // Estimate computed as described above - ssize_t _coalDesired; // desired +/- small-percent for tuning coalescing - - ssize_t _surplus; // count - (desired +/- small-percent), - // used to tune splitting in best fit - ssize_t _bfrSurp; // surplus at start of current sweep - ssize_t _prevSweep; // count from end of previous sweep - ssize_t _beforeSweep; // count from before current sweep - ssize_t _coalBirths; // additional chunks from coalescing - ssize_t _coalDeaths; // loss from coalescing - ssize_t _splitBirths; // additional chunks from splitting - ssize_t _splitDeaths; // loss from splitting - size_t _returnedBytes; // number of bytes returned to list. - public: - void initialize() { - AdaptivePaddedAverage* dummy = - new (&_demand_rate_estimate) AdaptivePaddedAverage(CMS_FLSWeight, - CMS_FLSPadding); - _desired = 0; - _coalDesired = 0; - _surplus = 0; - _bfrSurp = 0; - _prevSweep = 0; - _beforeSweep = 0; - _coalBirths = 0; - _coalDeaths = 0; - _splitBirths = 0; - _splitDeaths = 0; - _returnedBytes = 0; - } - - AllocationStats() { - initialize(); - } - // The rate estimate is in blocks per second. - void compute_desired(size_t count, - float inter_sweep_current, - float inter_sweep_estimate) { - // If the latest inter-sweep time is below our granularity - // of measurement, we may call in here with - // inter_sweep_current == 0. However, even for suitably small - // but non-zero inter-sweep durations, we may not trust the accuracy - // of accumulated data, since it has not been "integrated" - // (read "low-pass-filtered") long enough, and would be - // vulnerable to noisy glitches. In such cases, we - // ignore the current sample and use currently available - // historical estimates. - if (inter_sweep_current > _threshold) { - ssize_t demand = prevSweep() - count + splitBirths() - splitDeaths(); - float rate = ((float)demand)/inter_sweep_current; - _demand_rate_estimate.sample(rate); - _desired = (ssize_t)(_demand_rate_estimate.padded_average() - *inter_sweep_estimate); - } - } - - ssize_t desired() const { return _desired; } - ssize_t coalDesired() const { return _coalDesired; } - void set_coalDesired(ssize_t v) { _coalDesired = v; } - - ssize_t surplus() const { return _surplus; } - void set_surplus(ssize_t v) { _surplus = v; } - void increment_surplus() { _surplus++; } - void decrement_surplus() { _surplus--; } - - ssize_t bfrSurp() const { return _bfrSurp; } - void set_bfrSurp(ssize_t v) { _bfrSurp = v; } - ssize_t prevSweep() const { return _prevSweep; } - void set_prevSweep(ssize_t v) { _prevSweep = v; } - ssize_t beforeSweep() const { return _beforeSweep; } - void set_beforeSweep(ssize_t v) { _beforeSweep = v; } - - ssize_t coalBirths() const { return _coalBirths; } - void set_coalBirths(ssize_t v) { _coalBirths = v; } - void increment_coalBirths() { _coalBirths++; } - - ssize_t coalDeaths() const { return _coalDeaths; } - void set_coalDeaths(ssize_t v) { _coalDeaths = v; } - void increment_coalDeaths() { _coalDeaths++; } - - ssize_t splitBirths() const { return _splitBirths; } - void set_splitBirths(ssize_t v) { _splitBirths = v; } - void increment_splitBirths() { _splitBirths++; } - - ssize_t splitDeaths() const { return _splitDeaths; } - void set_splitDeaths(ssize_t v) { _splitDeaths = v; } - void increment_splitDeaths() { _splitDeaths++; } - - NOT_PRODUCT( - size_t returnedBytes() const { return _returnedBytes; } - void set_returnedBytes(size_t v) { _returnedBytes = v; } - ) -}; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/cardTableModRefBS.cpp --- a/src/share/vm/memory/cardTableModRefBS.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/cardTableModRefBS.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -51,7 +51,7 @@ _whole_heap(whole_heap), _guard_index(cards_required(whole_heap.word_size()) - 1), _last_valid_index(_guard_index - 1), - _page_size(os::page_size_for_region(_guard_index + 1, _guard_index + 1, 1)), + _page_size(os::vm_page_size()), _byte_map_size(compute_byte_map_size()) { _kind = BarrierSet::CardTableModRef; @@ -196,8 +196,8 @@ assert(_whole_heap.contains(new_region), "attempt to cover area not in reserved area"); debug_only(verify_guard();) - int ind = find_covering_region_by_base(new_region.start()); - MemRegion old_region = _covered[ind]; + int const ind = find_covering_region_by_base(new_region.start()); + MemRegion const old_region = _covered[ind]; assert(old_region.start() == new_region.start(), "just checking"); if (new_region.word_size() != old_region.word_size()) { // Commit new or uncommit old pages, if necessary. @@ -205,21 +205,21 @@ // Extend the end of this _commited region // to cover the end of any lower _committed regions. // This forms overlapping regions, but never interior regions. - HeapWord* max_prev_end = largest_prev_committed_end(ind); + HeapWord* const max_prev_end = largest_prev_committed_end(ind); if (max_prev_end > cur_committed.end()) { cur_committed.set_end(max_prev_end); } // Align the end up to a page size (starts are already aligned). - jbyte* new_end = byte_after(new_region.last()); - HeapWord* new_end_aligned = - (HeapWord*)align_size_up((uintptr_t)new_end, _page_size); + jbyte* const new_end = byte_after(new_region.last()); + HeapWord* const new_end_aligned = + (HeapWord*) align_size_up((uintptr_t)new_end, _page_size); assert(new_end_aligned >= (HeapWord*) new_end, "align up, but less"); // The guard page is always committed and should not be committed over. - HeapWord* new_end_for_commit = MIN2(new_end_aligned, _guard_region.start()); + HeapWord* const new_end_for_commit = MIN2(new_end_aligned, _guard_region.start()); if (new_end_for_commit > cur_committed.end()) { // Must commit new pages. - MemRegion new_committed = + MemRegion const new_committed = MemRegion(cur_committed.end(), new_end_for_commit); assert(!new_committed.is_empty(), "Region should not be empty here"); @@ -233,7 +233,7 @@ // the cur_committed region may include the guard region. } else if (new_end_aligned < cur_committed.end()) { // Must uncommit pages. - MemRegion uncommit_region = + MemRegion const uncommit_region = committed_unique_to_self(ind, MemRegion(new_end_aligned, cur_committed.end())); if (!uncommit_region.is_empty()) { @@ -257,7 +257,7 @@ } assert(index_for(new_region.last()) < (int) _guard_index, "The guard card will be overwritten"); - jbyte* end = byte_after(new_region.last()); + jbyte* const end = byte_after(new_region.last()); // do nothing if we resized downward. if (entry < end) { memset(entry, clean_card, pointer_delta(end, entry, sizeof(jbyte))); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/cardTableRS.cpp --- a/src/share/vm/memory/cardTableRS.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/cardTableRS.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -556,10 +556,16 @@ } -void CardTableRS::verify_empty(MemRegion mr) { +void CardTableRS::verify_aligned_region_empty(MemRegion mr) { if (!mr.is_empty()) { jbyte* cur_entry = byte_for(mr.start()); jbyte* limit = byte_after(mr.last()); + // The region mr may not start on a card boundary so + // the first card may reflect a write to the space + // just prior to mr. + if (!is_aligned(mr.start())) { + cur_entry++; + } for (;cur_entry < limit; cur_entry++) { guarantee(*cur_entry == CardTableModRefBS::clean_card, "Unexpected dirty card found"); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/cardTableRS.hpp --- a/src/share/vm/memory/cardTableRS.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/cardTableRS.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -126,7 +126,7 @@ } void verify(); - void verify_empty(MemRegion mr); + void verify_aligned_region_empty(MemRegion mr); void clear(MemRegion mr) { _ct_bs.clear(mr); } void clear_into_younger(Generation* gen, bool clear_perm); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/collectorPolicy.cpp --- a/src/share/vm/memory/collectorPolicy.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/collectorPolicy.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -57,45 +57,51 @@ // User inputs from -mx and ms are aligned _initial_heap_byte_size = align_size_up(Arguments::initial_heap_size(), min_alignment()); - _min_heap_byte_size = align_size_up(Arguments::min_heap_size(), - min_alignment()); - _max_heap_byte_size = align_size_up(MaxHeapSize, max_alignment()); + set_min_heap_byte_size(align_size_up(Arguments::min_heap_size(), + min_alignment())); + set_max_heap_byte_size(align_size_up(MaxHeapSize, max_alignment())); // Check validity of heap parameters from launcher - if (_initial_heap_byte_size == 0) { - _initial_heap_byte_size = NewSize + OldSize; + if (initial_heap_byte_size() == 0) { + set_initial_heap_byte_size(NewSize + OldSize); } else { - Universe::check_alignment(_initial_heap_byte_size, min_alignment(), + Universe::check_alignment(initial_heap_byte_size(), min_alignment(), "initial heap"); } - if (_min_heap_byte_size == 0) { - _min_heap_byte_size = NewSize + OldSize; + if (min_heap_byte_size() == 0) { + set_min_heap_byte_size(NewSize + OldSize); } else { - Universe::check_alignment(_min_heap_byte_size, min_alignment(), + Universe::check_alignment(min_heap_byte_size(), min_alignment(), "initial heap"); } // Check heap parameter properties - if (_initial_heap_byte_size < M) { + if (initial_heap_byte_size() < M) { vm_exit_during_initialization("Too small initial heap"); } // Check heap parameter properties - if (_min_heap_byte_size < M) { + if (min_heap_byte_size() < M) { vm_exit_during_initialization("Too small minimum heap"); } - if (_initial_heap_byte_size <= NewSize) { + if (initial_heap_byte_size() <= NewSize) { // make sure there is at least some room in old space vm_exit_during_initialization("Too small initial heap for new size specified"); } - if (_max_heap_byte_size < _min_heap_byte_size) { + if (max_heap_byte_size() < min_heap_byte_size()) { vm_exit_during_initialization("Incompatible minimum and maximum heap sizes specified"); } - if (_initial_heap_byte_size < _min_heap_byte_size) { + if (initial_heap_byte_size() < min_heap_byte_size()) { vm_exit_during_initialization("Incompatible minimum and initial heap sizes specified"); } - if (_max_heap_byte_size < _initial_heap_byte_size) { + if (max_heap_byte_size() < initial_heap_byte_size()) { vm_exit_during_initialization("Incompatible initial and maximum heap sizes specified"); } + + if (PrintGCDetails && Verbose) { + gclog_or_tty->print_cr("Minimum heap " SIZE_FORMAT " Initial heap " + SIZE_FORMAT " Maximum heap " SIZE_FORMAT, + min_heap_byte_size(), initial_heap_byte_size(), max_heap_byte_size()); + } } void CollectorPolicy::initialize_perm_generation(PermGen::Name pgnm) { @@ -128,10 +134,26 @@ // GenCollectorPolicy methods. +size_t GenCollectorPolicy::scale_by_NewRatio_aligned(size_t base_size) { + size_t x = base_size / (NewRatio+1); + size_t new_gen_size = x > min_alignment() ? + align_size_down(x, min_alignment()) : + min_alignment(); + return new_gen_size; +} + +size_t GenCollectorPolicy::bound_minus_alignment(size_t desired_size, + size_t maximum_size) { + size_t alignment = min_alignment(); + size_t max_minus = maximum_size - alignment; + return desired_size < max_minus ? desired_size : max_minus; +} + + void GenCollectorPolicy::initialize_size_policy(size_t init_eden_size, size_t init_promo_size, size_t init_survivor_size) { - double max_gc_minor_pause_sec = ((double) MaxGCMinorPauseMillis)/1000.0; + const double max_gc_minor_pause_sec = ((double) MaxGCMinorPauseMillis)/1000.0; _size_policy = new AdaptiveSizePolicy(init_eden_size, init_promo_size, init_survivor_size, @@ -210,74 +232,260 @@ assert(MaxHeapSize % max_alignment() == 0, "maximum heap alignment"); } +// Values set on the command line win over any ergonomically +// set command line parameters. +// Ergonomic choice of parameters are done before this +// method is called. Values for command line parameters such as NewSize +// and MaxNewSize feed those ergonomic choices into this method. +// This method makes the final generation sizings consistent with +// themselves and with overall heap sizings. +// In the absence of explicitly set command line flags, policies +// such as the use of NewRatio are used to size the generation. void GenCollectorPolicy::initialize_size_info() { CollectorPolicy::initialize_size_info(); - // Minimum sizes of the generations may be different than - // the initial sizes. - if (!FLAG_IS_DEFAULT(NewSize)) { - _min_gen0_size = NewSize; + // min_alignment() is used for alignment within a generation. + // There is additional alignment done down stream for some + // collectors that sometimes causes unwanted rounding up of + // generations sizes. + + // Determine maximum size of gen0 + + size_t max_new_size = 0; + if (FLAG_IS_CMDLINE(MaxNewSize)) { + if (MaxNewSize < min_alignment()) { + max_new_size = min_alignment(); + } else if (MaxNewSize >= max_heap_byte_size()) { + max_new_size = align_size_down(max_heap_byte_size() - min_alignment(), + min_alignment()); + warning("MaxNewSize (" SIZE_FORMAT "k) is equal to or " + "greater than the entire heap (" SIZE_FORMAT "k). A " + "new generation size of " SIZE_FORMAT "k will be used.", + MaxNewSize/K, max_heap_byte_size()/K, max_new_size/K); + } else { + max_new_size = align_size_down(MaxNewSize, min_alignment()); + } + + // The case for FLAG_IS_ERGO(MaxNewSize) could be treated + // specially at this point to just use an ergonomically set + // MaxNewSize to set max_new_size. For cases with small + // heaps such a policy often did not work because the MaxNewSize + // was larger than the entire heap. The interpretation given + // to ergonomically set flags is that the flags are set + // by different collectors for their own special needs but + // are not allowed to badly shape the heap. This allows the + // different collectors to decide what's best for themselves + // without having to factor in the overall heap shape. It + // can be the case in the future that the collectors would + // only make "wise" ergonomics choices and this policy could + // just accept those choices. The choices currently made are + // not always "wise". } else { - _min_gen0_size = align_size_down(_min_heap_byte_size / (NewRatio+1), - min_alignment()); - // We bound the minimum size by NewSize below (since it historically + max_new_size = scale_by_NewRatio_aligned(max_heap_byte_size()); + // Bound the maximum size by NewSize below (since it historically // would have been NewSize and because the NewRatio calculation could // yield a size that is too small) and bound it by MaxNewSize above. - // This is not always best. The NewSize calculated by CMS (which has - // a fixed minimum of 16m) can sometimes be "too" large. Consider - // the case where -Xmx32m. The CMS calculated NewSize would be about - // half the entire heap which seems too large. But the counter - // example is seen when the client defaults for NewRatio are used. - // An initial young generation size of 640k was observed - // with -Xmx128m -XX:MaxNewSize=32m when NewSize was not used - // as a lower bound as with - // _min_gen0_size = MIN2(_min_gen0_size, MaxNewSize); - // and 640k seemed too small a young generation. - _min_gen0_size = MIN2(MAX2(_min_gen0_size, NewSize), MaxNewSize); + // Ergonomics plays here by previously calculating the desired + // NewSize and MaxNewSize. + max_new_size = MIN2(MAX2(max_new_size, NewSize), MaxNewSize); + } + assert(max_new_size > 0, "All paths should set max_new_size"); + + // Given the maximum gen0 size, determine the initial and + // minimum sizes. + + if (max_heap_byte_size() == min_heap_byte_size()) { + // The maximum and minimum heap sizes are the same so + // the generations minimum and initial must be the + // same as its maximum. + set_min_gen0_size(max_new_size); + set_initial_gen0_size(max_new_size); + set_max_gen0_size(max_new_size); + } else { + size_t desired_new_size = 0; + if (!FLAG_IS_DEFAULT(NewSize)) { + // If NewSize is set ergonomically (for example by cms), it + // would make sense to use it. If it is used, also use it + // to set the initial size. Although there is no reason + // the minimum size and the initial size have to be the same, + // the current implementation gets into trouble during the calculation + // of the tenured generation sizes if they are different. + // Note that this makes the initial size and the minimum size + // generally small compared to the NewRatio calculation. + _min_gen0_size = NewSize; + desired_new_size = NewSize; + max_new_size = MAX2(max_new_size, NewSize); + } else { + // For the case where NewSize is the default, use NewRatio + // to size the minimum and initial generation sizes. + // Use the default NewSize as the floor for these values. If + // NewRatio is overly large, the resulting sizes can be too + // small. + _min_gen0_size = MAX2(scale_by_NewRatio_aligned(min_heap_byte_size()), + NewSize); + desired_new_size = + MAX2(scale_by_NewRatio_aligned(initial_heap_byte_size()), + NewSize); + } + + assert(_min_gen0_size > 0, "Sanity check"); + set_initial_gen0_size(desired_new_size); + set_max_gen0_size(max_new_size); + + // At this point the desirable initial and minimum sizes have been + // determined without regard to the maximum sizes. + + // Bound the sizes by the corresponding overall heap sizes. + set_min_gen0_size( + bound_minus_alignment(_min_gen0_size, min_heap_byte_size())); + set_initial_gen0_size( + bound_minus_alignment(_initial_gen0_size, initial_heap_byte_size())); + set_max_gen0_size( + bound_minus_alignment(_max_gen0_size, max_heap_byte_size())); + + // At this point all three sizes have been checked against the + // maximum sizes but have not been checked for consistency + // amoung the three. + + // Final check min <= initial <= max + set_min_gen0_size(MIN2(_min_gen0_size, _max_gen0_size)); + set_initial_gen0_size( + MAX2(MIN2(_initial_gen0_size, _max_gen0_size), _min_gen0_size)); + set_min_gen0_size(MIN2(_min_gen0_size, _initial_gen0_size)); } - // Parameters are valid, compute area sizes. - size_t max_new_size = align_size_down(_max_heap_byte_size / (NewRatio+1), - min_alignment()); - max_new_size = MIN2(MAX2(max_new_size, _min_gen0_size), MaxNewSize); + if (PrintGCDetails && Verbose) { + gclog_or_tty->print_cr("Minimum gen0 " SIZE_FORMAT " Initial gen0 " + SIZE_FORMAT " Maximum gen0 " SIZE_FORMAT, + min_gen0_size(), initial_gen0_size(), max_gen0_size()); + } +} - // desired_new_size is used to set the initial size. The - // initial size must be greater than the minimum size. - size_t desired_new_size = - align_size_down(_initial_heap_byte_size / (NewRatio+1), - min_alignment()); +// Call this method during the sizing of the gen1 to make +// adjustments to gen0 because of gen1 sizing policy. gen0 initially has +// the most freedom in sizing because it is done before the +// policy for gen1 is applied. Once gen1 policies have been applied, +// there may be conflicts in the shape of the heap and this method +// is used to make the needed adjustments. The application of the +// policies could be more sophisticated (iterative for example) but +// keeping it simple also seems a worthwhile goal. +bool TwoGenerationCollectorPolicy::adjust_gen0_sizes(size_t* gen0_size_ptr, + size_t* gen1_size_ptr, + size_t heap_size, + size_t min_gen0_size) { + bool result = false; + if ((*gen1_size_ptr + *gen0_size_ptr) > heap_size) { + if (((*gen0_size_ptr + OldSize) > heap_size) && + (heap_size - min_gen0_size) >= min_alignment()) { + // Adjust gen0 down to accomodate OldSize + *gen0_size_ptr = heap_size - min_gen0_size; + *gen0_size_ptr = + MAX2((uintx)align_size_down(*gen0_size_ptr, min_alignment()), + min_alignment()); + assert(*gen0_size_ptr > 0, "Min gen0 is too large"); + result = true; + } else { + *gen1_size_ptr = heap_size - *gen0_size_ptr; + *gen1_size_ptr = + MAX2((uintx)align_size_down(*gen1_size_ptr, min_alignment()), + min_alignment()); + } + } + return result; +} - size_t new_size = MIN2(MAX2(desired_new_size, _min_gen0_size), max_new_size); - - _initial_gen0_size = new_size; - _max_gen0_size = max_new_size; -} +// Minimum sizes of the generations may be different than +// the initial sizes. An inconsistently is permitted here +// in the total size that can be specified explicitly by +// command line specification of OldSize and NewSize and +// also a command line specification of -Xms. Issue a warning +// but allow the values to pass. void TwoGenerationCollectorPolicy::initialize_size_info() { GenCollectorPolicy::initialize_size_info(); - // Minimum sizes of the generations may be different than - // the initial sizes. An inconsistently is permitted here - // in the total size that can be specified explicitly by - // command line specification of OldSize and NewSize and - // also a command line specification of -Xms. Issue a warning - // but allow the values to pass. - if (!FLAG_IS_DEFAULT(OldSize)) { - _min_gen1_size = OldSize; + // At this point the minimum, initial and maximum sizes + // of the overall heap and of gen0 have been determined. + // The maximum gen1 size can be determined from the maximum gen0 + // and maximum heap size since not explicit flags exits + // for setting the gen1 maximum. + _max_gen1_size = max_heap_byte_size() - _max_gen0_size; + _max_gen1_size = + MAX2((uintx)align_size_down(_max_gen1_size, min_alignment()), + min_alignment()); + // If no explicit command line flag has been set for the + // gen1 size, use what is left for gen1. + if (FLAG_IS_DEFAULT(OldSize) || FLAG_IS_ERGO(OldSize)) { + // The user has not specified any value or ergonomics + // has chosen a value (which may or may not be consistent + // with the overall heap size). In either case make + // the minimum, maximum and initial sizes consistent + // with the gen0 sizes and the overall heap sizes. + assert(min_heap_byte_size() > _min_gen0_size, + "gen0 has an unexpected minimum size"); + set_min_gen1_size(min_heap_byte_size() - min_gen0_size()); + set_min_gen1_size( + MAX2((uintx)align_size_down(_min_gen1_size, min_alignment()), + min_alignment())); + set_initial_gen1_size(initial_heap_byte_size() - initial_gen0_size()); + set_initial_gen1_size( + MAX2((uintx)align_size_down(_initial_gen1_size, min_alignment()), + min_alignment())); + + } else { + // It's been explicitly set on the command line. Use the + // OldSize and then determine the consequences. + set_min_gen1_size(OldSize); + set_initial_gen1_size(OldSize); + + // If the user has explicitly set an OldSize that is inconsistent + // with other command line flags, issue a warning. // The generation minimums and the overall heap mimimum should // be within one heap alignment. - if ((_min_gen1_size + _min_gen0_size + max_alignment()) < - _min_heap_byte_size) { + if ((_min_gen1_size + _min_gen0_size + min_alignment()) < + min_heap_byte_size()) { warning("Inconsistency between minimum heap size and minimum " - "generation sizes: using min heap = " SIZE_FORMAT, - _min_heap_byte_size); + "generation sizes: using minimum heap = " SIZE_FORMAT, + min_heap_byte_size()); + } + if ((OldSize > _max_gen1_size)) { + warning("Inconsistency between maximum heap size and maximum " + "generation sizes: using maximum heap = " SIZE_FORMAT + " -XX:OldSize flag is being ignored", + max_heap_byte_size()); + } + // If there is an inconsistency between the OldSize and the minimum and/or + // initial size of gen0, since OldSize was explicitly set, OldSize wins. + if (adjust_gen0_sizes(&_min_gen0_size, &_min_gen1_size, + min_heap_byte_size(), OldSize)) { + if (PrintGCDetails && Verbose) { + gclog_or_tty->print_cr("Minimum gen0 " SIZE_FORMAT " Initial gen0 " + SIZE_FORMAT " Maximum gen0 " SIZE_FORMAT, + min_gen0_size(), initial_gen0_size(), max_gen0_size()); + } } - } else { - _min_gen1_size = _min_heap_byte_size - _min_gen0_size; + // Initial size + if (adjust_gen0_sizes(&_initial_gen0_size, &_initial_gen1_size, + initial_heap_byte_size(), OldSize)) { + if (PrintGCDetails && Verbose) { + gclog_or_tty->print_cr("Minimum gen0 " SIZE_FORMAT " Initial gen0 " + SIZE_FORMAT " Maximum gen0 " SIZE_FORMAT, + min_gen0_size(), initial_gen0_size(), max_gen0_size()); + } + } } + // Enforce the maximum gen1 size. + set_min_gen1_size(MIN2(_min_gen1_size, _max_gen1_size)); - _initial_gen1_size = _initial_heap_byte_size - _initial_gen0_size; - _max_gen1_size = _max_heap_byte_size - _max_gen0_size; + // Check that min gen1 <= initial gen1 <= max gen1 + set_initial_gen1_size(MAX2(_initial_gen1_size, _min_gen1_size)); + set_initial_gen1_size(MIN2(_initial_gen1_size, _max_gen1_size)); + + if (PrintGCDetails && Verbose) { + gclog_or_tty->print_cr("Minimum gen1 " SIZE_FORMAT " Initial gen1 " + SIZE_FORMAT " Maximum gen1 " SIZE_FORMAT, + min_gen1_size(), initial_gen1_size(), max_gen1_size()); + } } HeapWord* GenCollectorPolicy::mem_allocate_work(size_t size, diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/collectorPolicy.hpp --- a/src/share/vm/memory/collectorPolicy.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/collectorPolicy.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -82,8 +82,11 @@ size_t max_alignment() { return _max_alignment; } size_t initial_heap_byte_size() { return _initial_heap_byte_size; } + void set_initial_heap_byte_size(size_t v) { _initial_heap_byte_size = v; } size_t max_heap_byte_size() { return _max_heap_byte_size; } + void set_max_heap_byte_size(size_t v) { _max_heap_byte_size = v; } size_t min_heap_byte_size() { return _min_heap_byte_size; } + void set_min_heap_byte_size(size_t v) { _min_heap_byte_size = v; } enum Name { CollectorPolicyKind, @@ -182,8 +185,24 @@ // compute max heap alignment size_t compute_max_alignment(); + // Scale the base_size by NewRation according to + // result = base_size / (NewRatio + 1) + // and align by min_alignment() + size_t scale_by_NewRatio_aligned(size_t base_size); + + // Bound the value by the given maximum minus the + // min_alignment. + size_t bound_minus_alignment(size_t desired_size, size_t maximum_size); public: + // Accessors + size_t min_gen0_size() { return _min_gen0_size; } + void set_min_gen0_size(size_t v) { _min_gen0_size = v; } + size_t initial_gen0_size() { return _initial_gen0_size; } + void set_initial_gen0_size(size_t v) { _initial_gen0_size = v; } + size_t max_gen0_size() { return _max_gen0_size; } + void set_max_gen0_size(size_t v) { _max_gen0_size = v; } + virtual int number_of_generations() = 0; virtual GenerationSpec **generations() { @@ -236,6 +255,14 @@ void initialize_generations() { ShouldNotReachHere(); } public: + // Accessors + size_t min_gen1_size() { return _min_gen1_size; } + void set_min_gen1_size(size_t v) { _min_gen1_size = v; } + size_t initial_gen1_size() { return _initial_gen1_size; } + void set_initial_gen1_size(size_t v) { _initial_gen1_size = v; } + size_t max_gen1_size() { return _max_gen1_size; } + void set_max_gen1_size(size_t v) { _max_gen1_size = v; } + // Inherited methods TwoGenerationCollectorPolicy* as_two_generation_policy() { return this; } @@ -246,6 +273,10 @@ virtual CollectorPolicy::Name kind() { return CollectorPolicy::TwoGenerationCollectorPolicyKind; } + + // Returns true is gen0 sizes were adjusted + bool adjust_gen0_sizes(size_t* gen0_size_ptr, size_t* gen1_size_ptr, + size_t heap_size, size_t min_gen1_size); }; class MarkSweepPolicy : public TwoGenerationCollectorPolicy { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/compactingPermGenGen.cpp --- a/src/share/vm/memory/compactingPermGenGen.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/compactingPermGenGen.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -26,9 +26,27 @@ #include "incls/_compactingPermGenGen.cpp.incl" -// Recursively adjust all pointers in an object and all objects by -// referenced it. Clear marks on objects in order to prevent visiting -// any object twice. +// An ObjectClosure helper: Recursively adjust all pointers in an object +// and all objects by referenced it. Clear marks on objects in order to +// prevent visiting any object twice. This helper is used when the +// RedefineClasses() API has been called. + +class AdjustSharedObjectClosure : public ObjectClosure { +public: + void do_object(oop obj) { + if (obj->is_shared_readwrite()) { + if (obj->mark()->is_marked()) { + obj->init_mark(); // Don't revisit this object. + obj->adjust_pointers(); // Adjust this object's references. + } + } + } +}; + + +// An OopClosure helper: Recursively adjust all pointers in an object +// and all objects by referenced it. Clear marks on objects in order +// to prevent visiting any object twice. class RecursiveAdjustSharedObjectClosure : public OopClosure { public: @@ -274,15 +292,34 @@ // objects in the space will page in more objects than we need. // Instead, use the system dictionary as strong roots into the read // write space. +// +// If a RedefineClasses() call has been made, then we have to iterate +// over the entire shared read-write space in order to find all the +// objects that need to be forwarded. For example, it is possible for +// an nmethod to be found and marked in GC phase-1 only for the nmethod +// to be freed by the time we reach GC phase-3. The underlying method +// is still marked, but we can't (easily) find it in GC phase-3 so we +// blow up in GC phase-4. With RedefineClasses() we want replaced code +// (EMCP or obsolete) to go away (i.e., be collectible) once it is no +// longer being executed by any thread so we keep minimal attachments +// to the replaced code. However, we can't guarantee when those EMCP +// or obsolete methods will be collected so they may still be out there +// even after we've severed our minimal attachments. void CompactingPermGenGen::pre_adjust_pointers() { if (spec()->enable_shared_spaces()) { - RecursiveAdjustSharedObjectClosure blk; - Universe::oops_do(&blk); - StringTable::oops_do(&blk); - SystemDictionary::always_strong_classes_do(&blk); - TraversePlaceholdersClosure tpc; - SystemDictionary::placeholders_do(&tpc); + if (JvmtiExport::has_redefined_a_class()) { + // RedefineClasses() requires a brute force approach + AdjustSharedObjectClosure blk; + rw_space()->object_iterate(&blk); + } else { + RecursiveAdjustSharedObjectClosure blk; + Universe::oops_do(&blk); + StringTable::oops_do(&blk); + SystemDictionary::always_strong_classes_do(&blk); + TraversePlaceholdersClosure tpc; + SystemDictionary::placeholders_do(&tpc); + } } } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/genRemSet.hpp --- a/src/share/vm/memory/genRemSet.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/genRemSet.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -91,8 +91,15 @@ virtual void verify() = 0; // Verify that the remembered set has no entries for - // the heap interval denoted by mr. - virtual void verify_empty(MemRegion mr) = 0; + // the heap interval denoted by mr. If there are any + // alignment constraints on the remembered set, only the + // part of the region that is aligned is checked. + // + // alignment boundaries + // +--------+-------+--------+-------+ + // [ region mr ) + // [ part checked ) + virtual void verify_aligned_region_empty(MemRegion mr) = 0; // If appropriate, print some information about the remset on "tty". virtual void print() {} diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/heap.cpp --- a/src/share/vm/memory/heap.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/heap.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -102,8 +102,9 @@ _log2_segment_size = exact_log2(segment_size); // Reserve and initialize space for _memory. - const size_t page_size = os::page_size_for_region(committed_size, - reserved_size, 8); + const size_t page_size = os::can_execute_large_page_memory() ? + os::page_size_for_region(committed_size, reserved_size, 8) : + os::vm_page_size(); const size_t granularity = os::vm_allocation_granularity(); const size_t r_align = MAX2(page_size, granularity); const size_t r_size = align_size_up(reserved_size, r_align); @@ -111,7 +112,7 @@ const size_t rs_align = page_size == (size_t) os::vm_page_size() ? 0 : MAX2(page_size, granularity); - ReservedSpace rs(r_size, rs_align, false); + ReservedSpace rs(r_size, rs_align, rs_align > 0); os::trace_page_sizes("code heap", committed_size, reserved_size, page_size, rs.base(), rs.size()); if (!_memory.initialize(rs, c_size)) { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/heapInspection.cpp --- a/src/share/vm/memory/heapInspection.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/heapInspection.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -65,7 +65,7 @@ name = ""; } // simplify the formatting (ILP32 vs LP64) - always cast the numbers to 64-bit - st->print_cr("%13" FORMAT64_MODIFIER "d %13" FORMAT64_MODIFIER "u %s", + st->print_cr(INT64_FORMAT_W(13) " " UINT64_FORMAT_W(13) " %s", (jlong) _instance_count, (julong) _instance_words * HeapWordSize, name); @@ -80,7 +80,10 @@ elt = elt->next(); } elt = new KlassInfoEntry(k, list()); - set_list(elt); + // We may be out of space to allocate the new entry. + if (elt != NULL) { + set_list(elt); + } return elt; } @@ -103,21 +106,25 @@ } KlassInfoTable::KlassInfoTable(int size, HeapWord* ref) { - _size = size; + _size = 0; _ref = ref; - _buckets = NEW_C_HEAP_ARRAY(KlassInfoBucket, _size); - - for (int index = 0; index < _size; index++) { - _buckets[index].initialize(); + _buckets = NEW_C_HEAP_ARRAY(KlassInfoBucket, size); + if (_buckets != NULL) { + _size = size; + for (int index = 0; index < _size; index++) { + _buckets[index].initialize(); + } } } KlassInfoTable::~KlassInfoTable() { - for (int index = 0; index < _size; index++) { - _buckets[index].empty(); + if (_buckets != NULL) { + for (int index = 0; index < _size; index++) { + _buckets[index].empty(); + } + FREE_C_HEAP_ARRAY(KlassInfoBucket, _buckets); + _size = 0; } - FREE_C_HEAP_ARRAY(KlassInfoBucket, _buckets); - _size = 0; } uint KlassInfoTable::hash(klassOop p) { @@ -127,19 +134,32 @@ KlassInfoEntry* KlassInfoTable::lookup(const klassOop k) { uint idx = hash(k) % _size; + assert(_buckets != NULL, "Allocation failure should have been caught"); KlassInfoEntry* e = _buckets[idx].lookup(k); - assert(k == e->klass(), "must be equal"); + // Lookup may fail if this is a new klass for which we + // could not allocate space for an new entry. + assert(e == NULL || k == e->klass(), "must be equal"); return e; } -void KlassInfoTable::record_instance(const oop obj) { +// Return false if the entry could not be recorded on account +// of running out of space required to create a new entry. +bool KlassInfoTable::record_instance(const oop obj) { klassOop k = obj->klass(); KlassInfoEntry* elt = lookup(k); - elt->set_count(elt->count() + 1); - elt->set_words(elt->words() + obj->size()); + // elt may be NULL if it's a new klass for which we + // could not allocate space for a new entry in the hashtable. + if (elt != NULL) { + elt->set_count(elt->count() + 1); + elt->set_words(elt->words() + obj->size()); + return true; + } else { + return false; + } } void KlassInfoTable::iterate(KlassInfoClosure* cic) { + assert(_size == 0 || _buckets != NULL, "Allocation failure should have been caught"); for (int index = 0; index < _size; index++) { _buckets[index].iterate(cic); } @@ -176,7 +196,7 @@ total += elements()->at(i)->count(); totalw += elements()->at(i)->words(); } - st->print_cr("Total %13" FORMAT64_MODIFIER "d %13" FORMAT64_MODIFIER "u", + st->print_cr("Total " INT64_FORMAT_W(13) " " UINT64_FORMAT_W(13), total, totalw * HeapWordSize); } @@ -199,12 +219,18 @@ class RecordInstanceClosure : public ObjectClosure { private: KlassInfoTable* _cit; + size_t _missed_count; public: - RecordInstanceClosure(KlassInfoTable* cit) : _cit(cit) {} + RecordInstanceClosure(KlassInfoTable* cit) : + _cit(cit), _missed_count(0) {} void do_object(oop obj) { - _cit->record_instance(obj); + if (!_cit->record_instance(obj)) { + _missed_count++; + } } + + size_t missed_count() { return _missed_count; } }; void HeapInspection::heap_inspection(outputStream* st) { @@ -230,21 +256,32 @@ ShouldNotReachHere(); // Unexpected heap kind for this op } // Collect klass instance info - - // Iterate over objects in the heap KlassInfoTable cit(KlassInfoTable::cit_size, ref); - RecordInstanceClosure ric(&cit); - Universe::heap()->object_iterate(&ric); + if (!cit.allocation_failed()) { + // Iterate over objects in the heap + RecordInstanceClosure ric(&cit); + Universe::heap()->object_iterate(&ric); - // Sort and print klass instance info - KlassInfoHisto histo("\n" - " num #instances #bytes class name\n" - "----------------------------------------------", - KlassInfoHisto::histo_initial_size); - HistoClosure hc(&histo); - cit.iterate(&hc); - histo.sort(); - histo.print_on(st); + // Report if certain classes are not counted because of + // running out of C-heap for the histogram. + size_t missed_count = ric.missed_count(); + if (missed_count != 0) { + st->print_cr("WARNING: Ran out of C-heap; undercounted " SIZE_FORMAT + " total instances in data below", + missed_count); + } + // Sort and print klass instance info + KlassInfoHisto histo("\n" + " num #instances #bytes class name\n" + "----------------------------------------------", + KlassInfoHisto::histo_initial_size); + HistoClosure hc(&histo); + cit.iterate(&hc); + histo.sort(); + histo.print_on(st); + } else { + st->print_cr("WARNING: Ran out of C-heap; histogram not generated"); + } st->flush(); if (Universe::heap()->kind() == CollectedHeap::GenCollectedHeap) { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/heapInspection.hpp --- a/src/share/vm/memory/heapInspection.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/heapInspection.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -98,8 +98,9 @@ }; KlassInfoTable(int size, HeapWord* ref); ~KlassInfoTable(); - void record_instance(const oop obj); + bool record_instance(const oop obj); void iterate(KlassInfoClosure* cic); + bool allocation_failed() { return _buckets == NULL; } }; class KlassInfoHisto : public StackObj { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/referenceProcessor.cpp --- a/src/share/vm/memory/referenceProcessor.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/referenceProcessor.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -85,7 +85,7 @@ ReferenceProcessor* rp = new ReferenceProcessor(span, atomic_discovery, mt_discovery, mt_degree, - mt_processing); + mt_processing && (parallel_gc_threads > 0)); if (rp == NULL) { vm_exit_during_initialization("Could not allocate ReferenceProcessor object"); } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/memory/tenuredGeneration.cpp --- a/src/share/vm/memory/tenuredGeneration.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/memory/tenuredGeneration.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -409,10 +409,11 @@ void TenuredGeneration::verify_alloc_buffers_clean() { if (UseParNewGC) { for (uint i = 0; i < ParallelGCThreads; i++) { - _rs->verify_empty(_alloc_buffers[i]->range()); + _rs->verify_aligned_region_empty(_alloc_buffers[i]->range()); } } } + #else // SERIALGC void TenuredGeneration::retire_alloc_buffers_before_full_gc() {} void TenuredGeneration::verify_alloc_buffers_clean() {} diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/arrayOop.hpp --- a/src/share/vm/oops/arrayOop.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/arrayOop.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -58,11 +58,11 @@ // alignments. It gets the scale from the type2aelembytes array. static int32_t max_array_length(BasicType type) { assert(type >= 0 && type < T_CONFLICT, "wrong type"); - assert(type2aelembytes[type] != 0, "wrong type"); + assert(type2aelembytes(type) != 0, "wrong type"); // We use max_jint, since object_size is internally represented by an 'int' // This gives us an upper bound of max_jint words for the size of the oop. int32_t max_words = (max_jint - header_size(type) - 2); - int elembytes = (type == T_OBJECT) ? T_OBJECT_aelem_bytes : type2aelembytes[type]; + int elembytes = (type == T_OBJECT) ? T_OBJECT_aelem_bytes : type2aelembytes(type); jlong len = ((jlong)max_words * HeapWordSize) / elembytes; return (len > max_jint) ? max_jint : (int32_t)len; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/instanceKlass.cpp --- a/src/share/vm/oops/instanceKlass.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/instanceKlass.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -791,17 +791,39 @@ } +static int compare_fields_by_offset(int* a, int* b) { + return a[0] - b[0]; +} + void instanceKlass::do_nonstatic_fields(FieldClosure* cl) { - fieldDescriptor fd; instanceKlass* super = superklass(); if (super != NULL) { super->do_nonstatic_fields(cl); } + fieldDescriptor fd; int length = fields()->length(); + // In DebugInfo nonstatic fields are sorted by offset. + int* fields_sorted = NEW_C_HEAP_ARRAY(int, 2*(length+1)); + int j = 0; for (int i = 0; i < length; i += next_offset) { fd.initialize(as_klassOop(), i); - if (!(fd.is_static())) cl->do_field(&fd); + if (!fd.is_static()) { + fields_sorted[j + 0] = fd.offset(); + fields_sorted[j + 1] = i; + j += 2; + } } + if (j > 0) { + length = j; + // _sort_Fn is defined in growableArray.hpp. + qsort(fields_sorted, length/2, 2*sizeof(int), (_sort_Fn)compare_fields_by_offset); + for (int i = 0; i < length; i += 2) { + fd.initialize(as_klassOop(), fields_sorted[i + 1]); + assert(!fd.is_static() && fd.offset() == fields_sorted[i], "only nonstatic fields"); + cl->do_field(&fd); + } + } + FREE_C_HEAP_ARRAY(int, fields_sorted); } @@ -950,7 +972,6 @@ // These allocations will have to be freed if they are unused. // Allocate a new array of methods. - jmethodID* to_dealloc_jmeths = NULL; jmethodID* new_jmeths = NULL; if (length <= idnum) { // A new array will be needed (unless some other thread beats us to it) @@ -961,7 +982,6 @@ } // Allocate a new method ID. - jmethodID to_dealloc_id = NULL; jmethodID new_id = NULL; if (method_h->is_old() && !method_h->is_obsolete()) { // The method passed in is old (but not obsolete), we need to use the current version @@ -975,40 +995,51 @@ new_id = JNIHandles::make_jmethod_id(method_h); } - { + if (Threads::number_of_threads() == 0 || SafepointSynchronize::is_at_safepoint()) { + // No need and unsafe to lock the JmethodIdCreation_lock at safepoint. + id = get_jmethod_id(ik_h, idnum, new_id, new_jmeths); + } else { MutexLocker ml(JmethodIdCreation_lock); + id = get_jmethod_id(ik_h, idnum, new_id, new_jmeths); + } + } + return id; +} - // We must not go to a safepoint while holding this lock. - debug_only(No_Safepoint_Verifier nosafepoints;) + +jmethodID instanceKlass::get_jmethod_id(instanceKlassHandle ik_h, size_t idnum, + jmethodID new_id, jmethodID* new_jmeths) { + // Retry lookup after we got the lock or ensured we are at safepoint + jmethodID* jmeths = ik_h->methods_jmethod_ids_acquire(); + jmethodID id = NULL; + jmethodID to_dealloc_id = NULL; + jmethodID* to_dealloc_jmeths = NULL; + size_t length; - // Retry lookup after we got the lock - jmeths = ik_h->methods_jmethod_ids_acquire(); - if (jmeths == NULL || (length = (size_t)jmeths[0]) <= idnum) { - if (jmeths != NULL) { - // We have grown the array: copy the existing entries, and delete the old array - for (size_t index = 0; index < length; index++) { - new_jmeths[index+1] = jmeths[index+1]; - } - to_dealloc_jmeths = jmeths; // using the new jmeths, deallocate the old one - } - ik_h->release_set_methods_jmethod_ids(jmeths = new_jmeths); - } else { - id = jmeths[idnum+1]; - to_dealloc_jmeths = new_jmeths; // using the old jmeths, deallocate the new one + if (jmeths == NULL || (length = (size_t)jmeths[0]) <= idnum) { + if (jmeths != NULL) { + // We have grown the array: copy the existing entries, and delete the old array + for (size_t index = 0; index < length; index++) { + new_jmeths[index+1] = jmeths[index+1]; } - if (id == NULL) { - id = new_id; - jmeths[idnum+1] = id; // install the new method ID - } else { - to_dealloc_id = new_id; // the new id wasn't used, mark it for deallocation - } + to_dealloc_jmeths = jmeths; // using the new jmeths, deallocate the old one } + ik_h->release_set_methods_jmethod_ids(jmeths = new_jmeths); + } else { + id = jmeths[idnum+1]; + to_dealloc_jmeths = new_jmeths; // using the old jmeths, deallocate the new one + } + if (id == NULL) { + id = new_id; + jmeths[idnum+1] = id; // install the new method ID + } else { + to_dealloc_id = new_id; // the new id wasn't used, mark it for deallocation + } - // Free up unneeded or no longer needed resources - FreeHeap(to_dealloc_jmeths); - if (to_dealloc_id != NULL) { - JNIHandles::destroy_jmethod_id(to_dealloc_id); - } + // Free up unneeded or no longer needed resources + FreeHeap(to_dealloc_jmeths); + if (to_dealloc_id != NULL) { + JNIHandles::destroy_jmethod_id(to_dealloc_id); } return id; } @@ -2165,12 +2196,20 @@ RC_TRACE(0x00000100, ("adding previous version ref for %s @%d, EMCP_cnt=%d", ikh->external_name(), _previous_versions->length(), emcp_method_count)); constantPoolHandle cp_h(ikh->constants()); - jweak cp_ref = JNIHandles::make_weak_global(cp_h); + jobject cp_ref; + if (cp_h->is_shared()) { + // a shared ConstantPool requires a regular reference; a weak + // reference would be collectible + cp_ref = JNIHandles::make_global(cp_h); + } else { + cp_ref = JNIHandles::make_weak_global(cp_h); + } PreviousVersionNode * pv_node = NULL; objArrayOop old_methods = ikh->methods(); if (emcp_method_count == 0) { - pv_node = new PreviousVersionNode(cp_ref, NULL); + // non-shared ConstantPool gets a weak reference + pv_node = new PreviousVersionNode(cp_ref, !cp_h->is_shared(), NULL); RC_TRACE(0x00000400, ("add: all methods are obsolete; flushing any EMCP weak refs")); } else { @@ -2190,7 +2229,8 @@ } } } - pv_node = new PreviousVersionNode(cp_ref, method_refs); + // non-shared ConstantPool gets a weak reference + pv_node = new PreviousVersionNode(cp_ref, !cp_h->is_shared(), method_refs); } _previous_versions->append(pv_node); @@ -2208,7 +2248,7 @@ // check the previous versions array for a GC'ed weak refs pv_node = _previous_versions->at(i); cp_ref = pv_node->prev_constant_pool(); - assert(cp_ref != NULL, "weak cp ref was unexpectedly cleared"); + assert(cp_ref != NULL, "cp ref was unexpectedly cleared"); if (cp_ref == NULL) { delete pv_node; _previous_versions->remove_at(i); @@ -2281,7 +2321,7 @@ // check the previous versions array for a GC'ed weak refs pv_node = _previous_versions->at(j); cp_ref = pv_node->prev_constant_pool(); - assert(cp_ref != NULL, "weak cp ref was unexpectedly cleared"); + assert(cp_ref != NULL, "cp ref was unexpectedly cleared"); if (cp_ref == NULL) { delete pv_node; _previous_versions->remove_at(j); @@ -2379,8 +2419,8 @@ // been GC'ed PreviousVersionNode * pv_node = _previous_versions->at(i); - jweak cp_ref = pv_node->prev_constant_pool(); - assert(cp_ref != NULL, "weak reference was unexpectedly cleared"); + jobject cp_ref = pv_node->prev_constant_pool(); + assert(cp_ref != NULL, "cp reference was unexpectedly cleared"); if (cp_ref == NULL) { continue; // robustness } @@ -2440,10 +2480,11 @@ // Construct a PreviousVersionNode entry for the array hung off // the instanceKlass. -PreviousVersionNode::PreviousVersionNode(jweak prev_constant_pool, - GrowableArray* prev_EMCP_methods) { +PreviousVersionNode::PreviousVersionNode(jobject prev_constant_pool, + bool prev_cp_is_weak, GrowableArray* prev_EMCP_methods) { _prev_constant_pool = prev_constant_pool; + _prev_cp_is_weak = prev_cp_is_weak; _prev_EMCP_methods = prev_EMCP_methods; } @@ -2451,7 +2492,11 @@ // Destroy a PreviousVersionNode PreviousVersionNode::~PreviousVersionNode() { if (_prev_constant_pool != NULL) { - JNIHandles::destroy_weak_global(_prev_constant_pool); + if (_prev_cp_is_weak) { + JNIHandles::destroy_weak_global(_prev_constant_pool); + } else { + JNIHandles::destroy_global(_prev_constant_pool); + } } if (_prev_EMCP_methods != NULL) { @@ -2471,8 +2516,8 @@ _prev_constant_pool_handle = constantPoolHandle(); // NULL handle _prev_EMCP_method_handles = NULL; - jweak cp_ref = pv_node->prev_constant_pool(); - assert(cp_ref != NULL, "weak constant pool ref was unexpectedly cleared"); + jobject cp_ref = pv_node->prev_constant_pool(); + assert(cp_ref != NULL, "constant pool ref was unexpectedly cleared"); if (cp_ref == NULL) { return; // robustness } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/instanceKlass.hpp --- a/src/share/vm/oops/instanceKlass.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/instanceKlass.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -432,6 +432,8 @@ _enclosing_method_method_index = method_index; } // jmethodID support + static jmethodID get_jmethod_id(instanceKlassHandle ik_h, size_t idnum, + jmethodID new_id, jmethodID* new_jmeths); static jmethodID jmethod_id_for_impl(instanceKlassHandle ik_h, methodHandle method_h); jmethodID jmethod_id_or_null(methodOop method); @@ -838,11 +840,20 @@ // A collection point for interesting information about the previous // version(s) of an instanceKlass. This class uses weak references to // the information so that the information may be collected as needed -// by the system. A GrowableArray of PreviousVersionNodes is attached +// by the system. If the information is shared, then a regular +// reference must be used because a weak reference would be seen as +// collectible. A GrowableArray of PreviousVersionNodes is attached // to the instanceKlass as needed. See PreviousVersionWalker below. class PreviousVersionNode : public CHeapObj { private: - jweak _prev_constant_pool; + // A shared ConstantPool is never collected so we'll always have + // a reference to it so we can update items in the cache. We'll + // have a weak reference to a non-shared ConstantPool until all + // of the methods (EMCP or obsolete) have been collected; the + // non-shared ConstantPool becomes collectible at that point. + jobject _prev_constant_pool; // regular or weak reference + bool _prev_cp_is_weak; // true if not a shared ConstantPool + // If the previous version of the instanceKlass doesn't have any // EMCP methods, then _prev_EMCP_methods will be NULL. If all the // EMCP methods have been collected, then _prev_EMCP_methods can @@ -850,10 +861,10 @@ GrowableArray* _prev_EMCP_methods; public: - PreviousVersionNode(jweak prev_constant_pool, + PreviousVersionNode(jobject prev_constant_pool, bool prev_cp_is_weak, GrowableArray* prev_EMCP_methods); ~PreviousVersionNode(); - jweak prev_constant_pool() const { + jobject prev_constant_pool() const { return _prev_constant_pool; } GrowableArray* prev_EMCP_methods() const { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/klass.cpp --- a/src/share/vm/oops/klass.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/klass.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -182,7 +182,7 @@ assert(etype >= T_BOOLEAN && etype <= T_OBJECT, "valid etype"); // Note that T_ARRAY is not allowed here. int hsize = arrayOopDesc::base_offset_in_bytes(etype); - int esize = type2aelembytes[etype]; + int esize = type2aelembytes(etype); bool isobj = (etype == T_OBJECT); int tag = isobj ? _lh_array_tag_obj_value : _lh_array_tag_type_value; int lh = array_layout_helper(tag, hsize, etype, exact_log2(esize)); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/klassVtable.cpp --- a/src/share/vm/oops/klassVtable.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/klassVtable.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -735,7 +735,7 @@ } } - // This lenght of the itable was either zero, or it has not yet been initialized. + // The length of the itable was either zero, or it has not yet been initialized. _table_offset = 0; _size_offset_table = 0; _size_method_table = 0; @@ -870,16 +870,19 @@ // Initialization void klassItable::initialize_itable(bool checkconstraints, TRAPS) { - // Cannot be setup doing bootstrapping - if (Universe::is_bootstrapping()) return; + // Cannot be setup doing bootstrapping, interfaces don't have + // itables, and klass with only ones entry have empty itables + if (Universe::is_bootstrapping() || + _klass->is_interface() || + _klass->itable_length() == itableOffsetEntry::size()) return; - int num_interfaces = nof_interfaces(); + // There's alway an extra itable entry so we can null-terminate it. + guarantee(size_offset_table() >= 1, "too small"); + int num_interfaces = size_offset_table() - 1; if (num_interfaces > 0) { - if (TraceItables) tty->print_cr("%3d: Initializing itables for %s", ++initialize_count, _klass->name()->as_C_string()); + if (TraceItables) tty->print_cr("%3d: Initializing itables for %s", ++initialize_count, + _klass->name()->as_C_string()); - // In debug mode, we got an extra NULL/NULL entry - debug_only(num_interfaces--); - assert(num_interfaces > 0, "to few interfaces in offset itable"); // Interate through all interfaces int i; @@ -890,12 +893,10 @@ initialize_itable_for_interface(ioe->offset(), interf_h, checkconstraints, CHECK); } -#ifdef ASSERT - // Check that the last entry is empty - itableOffsetEntry* ioe = offset_entry(i); - assert(ioe->interface_klass() == NULL && ioe->offset() == 0, "terminator entry missing"); -#endif } + // Check that the last entry is empty + itableOffsetEntry* ioe = offset_entry(size_offset_table() - 1); + guarantee(ioe->interface_klass() == NULL && ioe->offset() == 0, "terminator entry missing"); } @@ -972,7 +973,7 @@ } } -// Update entry for specic methodOop +// Update entry for specific methodOop void klassItable::initialize_with_method(methodOop m) { itableMethodEntry* ime = method_entry(0); for(int i = 0; i < _size_method_table; i++) { @@ -1085,12 +1086,8 @@ CountInterfacesClosure cic; visit_all_interfaces(transitive_interfaces(), &cic); - // Add one extra entry in debug mode, so we can null-terminate the table - int nof_methods = cic.nof_methods(); - int nof_interfaces = cic.nof_interfaces(); - debug_only(if (nof_interfaces > 0) nof_interfaces++); - - int itable_size = calc_itable_size(nof_interfaces, nof_methods); + // There's alway an extra itable entry so we can null-terminate it. + int itable_size = calc_itable_size(cic.nof_interfaces() + 1, cic.nof_methods()); // Statistics update_stats(itable_size * HeapWordSize); @@ -1110,8 +1107,8 @@ int nof_methods = cic.nof_methods(); int nof_interfaces = cic.nof_interfaces(); - // Add one extra entry in debug mode, so we can null-terminate the table - debug_only(if (nof_interfaces > 0) nof_interfaces++); + // Add one extra entry so we can null-terminate the table + nof_interfaces++; assert(compute_itable_size(objArrayHandle(klass->transitive_interfaces())) == calc_itable_size(nof_interfaces, nof_methods), diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/klassVtable.hpp --- a/src/share/vm/oops/klassVtable.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/klassVtable.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -259,7 +259,7 @@ itableMethodEntry* method_entry(int i) { assert(0 <= i && i <= _size_method_table, "index out of bounds"); return &((itableMethodEntry*)method_start())[i]; } - int nof_interfaces() { return _size_offset_table; } + int size_offset_table() { return _size_offset_table; } // Initialization void initialize_itable(bool checkconstraints, TRAPS); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/markOop.cpp --- a/src/share/vm/oops/markOop.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/markOop.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -37,3 +37,32 @@ st->print("age %d)", age()); } } + + +// Give advice about whether the oop that contains this markOop +// should be cached or not. +bool markOopDesc::should_not_be_cached() const { + // the cast is because decode_pointer() isn't marked const + if (is_marked() && ((markOopDesc *)this)->decode_pointer() != NULL) { + // If the oop containing this markOop is being forwarded, then + // we are in the middle of GC and we do not want the containing + // oop to be added to a cache. We have no way of knowing whether + // the cache has already been visited by the current GC phase so + // we don't know whether the forwarded oop will be properly + // processed in this phase. If the forwarded oop is not properly + // processed, then we'll see strange crashes or asserts during + // the next GC run because the markOop will contain an unexpected + // value. + // + // This situation has been seen when we are GC'ing a methodOop + // because we use the methodOop while we're GC'ing it. Scary + // stuff. Some of the uses the methodOop cause the methodOop to + // be added to the OopMapCache in the instanceKlass as a side + // effect. This check lets the cache maintainer know when a + // cache addition would not be safe. + return true; + } + + // caching the containing oop should be just fine + return false; +} diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/markOop.hpp --- a/src/share/vm/oops/markOop.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/markOop.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -357,4 +357,7 @@ // Recover address of oop from encoded form used in mark inline void* decode_pointer() { if (UseBiasedLocking && has_bias_pattern()) return NULL; return clear_lock_bits(); } + + // see the definition in markOop.cpp for the gory details + bool should_not_be_cached() const; }; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/methodDataOop.cpp --- a/src/share/vm/oops/methodDataOop.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/methodDataOop.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -32,7 +32,7 @@ // Some types of data layouts need a length field. bool DataLayout::needs_array_len(u1 tag) { - return (tag == multi_branch_data_tag); + return (tag == multi_branch_data_tag) || (tag == arg_info_data_tag); } // Perform generic initialization of the data. More specific @@ -404,6 +404,17 @@ } #endif +#ifndef PRODUCT +void ArgInfoData::print_data_on(outputStream* st) { + print_shared(st, "ArgInfoData"); + int nargs = number_of_args(); + for (int i = 0; i < nargs; i++) { + st->print(" 0x%x", arg_modified(i)); + } + st->cr(); +} + +#endif // ================================================================== // methodDataOop // @@ -508,6 +519,9 @@ int extra_data_count = compute_extra_data_count(data_size, empty_bc_count); object_size += extra_data_count * DataLayout::compute_size_in_bytes(0); + // Add a cell to record information about modified arguments. + int arg_size = method->size_of_parameters(); + object_size += DataLayout::compute_size_in_bytes(arg_size+1); return object_size; } @@ -626,6 +640,8 @@ return new BranchData(data_layout); case DataLayout::multi_branch_data_tag: return new MultiBranchData(data_layout); + case DataLayout::arg_info_data_tag: + return new ArgInfoData(data_layout); }; } @@ -681,7 +697,17 @@ // Add some extra DataLayout cells (at least one) to track stray traps. int extra_data_count = compute_extra_data_count(data_size, empty_bc_count); - object_size += extra_data_count * DataLayout::compute_size_in_bytes(0); + int extra_size = extra_data_count * DataLayout::compute_size_in_bytes(0); + + // Add a cell to record information about modified arguments. + // Set up _args_modified array after traps cells so that + // the code for traps cells works. + DataLayout *dp = data_layout_at(data_size + extra_size); + + int arg_size = method->size_of_parameters(); + dp->initialize(DataLayout::arg_info_data_tag, 0, arg_size+1); + + object_size += extra_size + DataLayout::compute_size_in_bytes(arg_size+1); // Set an initial hint. Don't use set_hint_di() because // first_di() may be out of bounds if data_size is 0. @@ -764,6 +790,10 @@ // No need for "OrderAccess::load_acquire" ops, // since the data structure is monotonic. if (dp->tag() == DataLayout::no_tag) break; + if (dp->tag() == DataLayout::arg_info_data_tag) { + dp = end; // ArgInfoData is at the end of extra data section. + break; + } if (dp->bci() == bci) { assert(dp->tag() == DataLayout::bit_data_tag, "sane"); return new BitData(dp); @@ -785,6 +815,16 @@ return NULL; } +ArgInfoData *methodDataOopDesc::arg_info() { + DataLayout* dp = extra_data_base(); + DataLayout* end = extra_data_limit(); + for (; dp < end; dp = next_extra(dp)) { + if (dp->tag() == DataLayout::arg_info_data_tag) + return new ArgInfoData(dp); + } + return NULL; +} + #ifndef PRODUCT void methodDataOopDesc::print_data_on(outputStream* st) { ResourceMark rm; @@ -794,15 +834,20 @@ st->fill_to(6); data->print_data_on(st); } + st->print_cr("--- Extra data:"); DataLayout* dp = extra_data_base(); DataLayout* end = extra_data_limit(); for (; dp < end; dp = next_extra(dp)) { // No need for "OrderAccess::load_acquire" ops, // since the data structure is monotonic. - if (dp->tag() == DataLayout::no_tag) break; - if (dp == extra_data_base()) - st->print_cr("--- Extra data:"); - data = new BitData(dp); + if (dp->tag() == DataLayout::no_tag) continue; + if (dp->tag() == DataLayout::bit_data_tag) { + data = new BitData(dp); + } else { + assert(dp->tag() == DataLayout::arg_info_data_tag, "must be BitData or ArgInfo"); + data = new ArgInfoData(dp); + dp = end; // ArgInfoData is at the end of extra data section. + } st->print("%d", dp_to_di(data->dp())); st->fill_to(6); data->print_data_on(st); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/methodDataOop.hpp --- a/src/share/vm/oops/methodDataOop.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/methodDataOop.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -101,7 +101,8 @@ virtual_call_data_tag, ret_data_tag, branch_data_tag, - multi_branch_data_tag + multi_branch_data_tag, + arg_info_data_tag }; enum { @@ -245,6 +246,7 @@ class BranchData; class ArrayData; class MultiBranchData; +class ArgInfoData; // ProfileData @@ -376,6 +378,8 @@ virtual bool is_BranchData() { return false; } virtual bool is_ArrayData() { return false; } virtual bool is_MultiBranchData() { return false; } + virtual bool is_ArgInfoData() { return false; } + BitData* as_BitData() { assert(is_BitData(), "wrong type"); @@ -413,6 +417,10 @@ assert(is_MultiBranchData(), "wrong type"); return is_MultiBranchData() ? (MultiBranchData*)this : NULL; } + ArgInfoData* as_ArgInfoData() { + assert(is_ArgInfoData(), "wrong type"); + return is_ArgInfoData() ? (ArgInfoData*)this : NULL; + } // Subclass specific initialization @@ -1047,6 +1055,33 @@ #endif }; +class ArgInfoData : public ArrayData { + +public: + ArgInfoData(DataLayout* layout) : ArrayData(layout) { + assert(layout->tag() == DataLayout::arg_info_data_tag, "wrong type"); + } + + virtual bool is_ArgInfoData() { return true; } + + + int number_of_args() { + return array_len(); + } + + uint arg_modified(int arg) { + return array_uint_at(arg); + } + + void set_arg_modified(int arg, uint val) { + array_set_int_at(arg, val); + } + +#ifndef PRODUCT + void print_data_on(outputStream* st); +#endif +}; + // methodDataOop // // A methodDataOop holds information which has been collected about @@ -1183,6 +1218,9 @@ // Find or create an extra ProfileData: ProfileData* bci_to_extra_data(int bci, bool create_if_missing); + // return the argument info cell + ArgInfoData *arg_info(); + public: static int header_size() { return sizeof(methodDataOopDesc)/wordSize; @@ -1215,18 +1253,28 @@ // Support for interprocedural escape analysis, from Thomas Kotzmann. enum EscapeFlag { estimated = 1 << 0, - return_local = 1 << 1 + return_local = 1 << 1, + return_allocated = 1 << 2, + allocated_escapes = 1 << 3, + unknown_modified = 1 << 4 }; intx eflags() { return _eflags; } intx arg_local() { return _arg_local; } intx arg_stack() { return _arg_stack; } intx arg_returned() { return _arg_returned; } + uint arg_modified(int a) { ArgInfoData *aid = arg_info(); + assert(a >= 0 && a < aid->number_of_args(), "valid argument number"); + return aid->arg_modified(a); } void set_eflags(intx v) { _eflags = v; } void set_arg_local(intx v) { _arg_local = v; } void set_arg_stack(intx v) { _arg_stack = v; } void set_arg_returned(intx v) { _arg_returned = v; } + void set_arg_modified(int a, uint v) { ArgInfoData *aid = arg_info(); + assert(a >= 0 && a < aid->number_of_args(), "valid argument number"); + + aid->set_arg_modified(a, v); } void clear_escape_info() { _eflags = _arg_local = _arg_stack = _arg_returned = 0; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/methodOop.cpp --- a/src/share/vm/oops/methodOop.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/methodOop.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -765,6 +765,28 @@ } +// give advice about whether this methodOop should be cached or not +bool methodOopDesc::should_not_be_cached() const { + if (is_old()) { + // This method has been redefined. It is either EMCP or obsolete + // and we don't want to cache it because that would pin the method + // down and prevent it from being collectible if and when it + // finishes executing. + return true; + } + + if (mark()->should_not_be_cached()) { + // It is either not safe or not a good idea to cache this + // method at this time because of the state of the embedded + // markOop. See markOop.cpp for the gory details. + return true; + } + + // caching this method should be just fine + return false; +} + + methodHandle methodOopDesc:: clone_with_new_data(methodHandle m, u_char* new_code, int new_code_length, u_char* new_compressed_linenumber_table, int new_compressed_linenumber_size, TRAPS) { // Code below does not work for native methods - they should never get rewritten anyway diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/oops/methodOop.hpp --- a/src/share/vm/oops/methodOop.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/oops/methodOop.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -524,6 +524,8 @@ void set_is_old() { _access_flags.set_is_old(); } bool is_obsolete() const { return access_flags().is_obsolete(); } void set_is_obsolete() { _access_flags.set_is_obsolete(); } + // see the definition in methodOop.cpp for the gory details + bool should_not_be_cached() const; // JVMTI Native method prefixing support: bool is_prefixed_native() const { return access_flags().is_prefixed_native(); } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/addnode.cpp --- a/src/share/vm/opto/addnode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/addnode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -505,15 +505,25 @@ const Type *temp_t2 = phase->type( in(Offset) ); if( temp_t2 == Type::TOP ) return NULL; const TypeX *t2 = temp_t2->is_intptr_t(); + Node* address; + Node* offset; if( t2->is_con() ) { // The Add of the flattened expression - set_req(Address, addp->in(Address)); - set_req(Offset , phase->MakeConX(t2->get_con() + t12->get_con())); - return this; // Made progress + address = addp->in(Address); + offset = phase->MakeConX(t2->get_con() + t12->get_con()); + } else { + // Else move the constant to the right. ((A+con)+B) into ((A+B)+con) + address = phase->transform(new (phase->C, 4) AddPNode(in(Base),addp->in(Address),in(Offset))); + offset = addp->in(Offset); } - // Else move the constant to the right. ((A+con)+B) into ((A+B)+con) - set_req(Address, phase->transform(new (phase->C, 4) AddPNode(in(Base),addp->in(Address),in(Offset)))); - set_req(Offset , addp->in(Offset)); + PhaseIterGVN *igvn = phase->is_IterGVN(); + if( igvn ) { + set_req_X(Address,address,igvn); + set_req_X(Offset,offset,igvn); + } else { + set_req(Address,address); + set_req(Offset,offset); + } return this; } } @@ -608,6 +618,28 @@ return NULL; } +//------------------------------unpack_offsets---------------------------------- +// Collect the AddP offset values into the elements array, giving up +// if there are more than length. +int AddPNode::unpack_offsets(Node* elements[], int length) { + int count = 0; + Node* addr = this; + Node* base = addr->in(AddPNode::Base); + while (addr->is_AddP()) { + if (addr->in(AddPNode::Base) != base) { + // give up + return -1; + } + elements[count++] = addr->in(AddPNode::Offset); + if (count == length) { + // give up + return -1; + } + addr = addr->in(AddPNode::Address); + } + return count; +} + //------------------------------match_edge------------------------------------- // Do we Match on this edge index or not? Do not match base pointer edge uint AddPNode::match_edge(uint idx) const { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/addnode.hpp --- a/src/share/vm/opto/addnode.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/addnode.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -144,6 +144,11 @@ static Node* Ideal_base_and_offset(Node* ptr, PhaseTransform* phase, // second return value: intptr_t& offset); + + // Collect the AddP offset values into the elements array, giving up + // if there are more than length. + int unpack_offsets(Node* elements[], int length); + // Do not match base-ptr edge virtual uint match_edge(uint idx) const; static const Type *mach_bottom_type(const MachNode* n); // used by ad_.hpp diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/bytecodeInfo.cpp --- a/src/share/vm/opto/bytecodeInfo.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/bytecodeInfo.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -79,8 +79,20 @@ for (int i = depth; i != 0; --i) tty->print(" "); } +static bool is_init_with_ea(ciMethod* callee_method, + ciMethod* caller_method, Compile* C) { + // True when EA is ON and a java constructor is called or + // a super constructor is called from an inlined java constructor. + return DoEscapeAnalysis && EliminateAllocations && + ( callee_method->is_initializer() || + (caller_method->is_initializer() && + caller_method != C->method() && + caller_method->holder()->is_subclass_of(callee_method->holder())) + ); +} + // positive filter: should send be inlined? returns NULL, if yes, or rejection msg -const char* InlineTree::shouldInline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const { +const char* InlineTree::shouldInline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const { // Allows targeted inlining if(callee_method->should_inline()) { *wci_result = *(WarmCallInfo::always_hot()); @@ -97,7 +109,8 @@ int size = callee_method->code_size(); // Check for too many throws (and not too huge) - if(callee_method->interpreter_throwout_count() > InlineThrowCount && size < InlineThrowMaxSize ) { + if(callee_method->interpreter_throwout_count() > InlineThrowCount && + size < InlineThrowMaxSize ) { wci_result->set_profit(wci_result->profit() * 100); if (PrintInlining && Verbose) { print_indent(inline_depth()); @@ -114,8 +127,12 @@ int invoke_count = method()->interpreter_invocation_count(); assert( invoke_count != 0, "Require invokation count greater than zero"); int freq = call_site_count/invoke_count; + // bump the max size if the call is frequent - if ((freq >= InlineFrequencyRatio) || (call_site_count >= InlineFrequencyCount)) { + if ((freq >= InlineFrequencyRatio) || + (call_site_count >= InlineFrequencyCount) || + is_init_with_ea(callee_method, caller_method, C)) { + max_size = C->freq_inline_size(); if (size <= max_size && TraceFrequencyInlining) { print_indent(inline_depth()); @@ -126,7 +143,8 @@ } } else { // Not hot. Check for medium-sized pre-existing nmethod at cold sites. - if (callee_method->has_compiled_code() && callee_method->instructions_size() > InlineSmallCode/4) + if (callee_method->has_compiled_code() && + callee_method->instructions_size() > InlineSmallCode/4) return "already compiled into a medium method"; } if (size > max_size) { @@ -139,7 +157,7 @@ // negative filter: should send NOT be inlined? returns NULL, ok to inline, or rejection msg -const char* InlineTree::shouldNotInline(ciMethod *callee_method, WarmCallInfo* wci_result) const { +const char* InlineTree::shouldNotInline(ciMethod *callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const { // negative filter: should send NOT be inlined? returns NULL (--> inline) or rejection msg if (!UseOldInlining) { const char* fail = NULL; @@ -204,9 +222,23 @@ // use frequency-based objections only for non-trivial methods if (callee_method->code_size() <= MaxTrivialSize) return NULL; - if (UseInterpreter && !CompileTheWorld) { // don't use counts with -Xcomp or CTW - if (!callee_method->has_compiled_code() && !callee_method->was_executed_more_than(0)) return "never executed"; - if (!callee_method->was_executed_more_than(MIN2(MinInliningThreshold, CompileThreshold >> 1))) return "executed < MinInliningThreshold times"; + + // don't use counts with -Xcomp or CTW + if (UseInterpreter && !CompileTheWorld) { + + if (!callee_method->has_compiled_code() && + !callee_method->was_executed_more_than(0)) { + return "never executed"; + } + + if (is_init_with_ea(callee_method, caller_method, C)) { + + // Escape Analysis: inline all executed constructors + + } else if (!callee_method->was_executed_more_than(MIN2(MinInliningThreshold, + CompileThreshold >> 1))) { + return "executed < MinInliningThreshold times"; + } } if (callee_method->should_not_inline()) { @@ -219,8 +251,7 @@ //-----------------------------try_to_inline----------------------------------- // return NULL if ok, reason for not inlining otherwise // Relocated from "InliningClosure::try_to_inline" -const char* InlineTree::try_to_inline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) { - ciMethod* caller_method = method(); +const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) { // Old algorithm had funny accumulating BC-size counters if (UseOldInlining && ClipInlining @@ -229,25 +260,47 @@ } const char *msg = NULL; - if ((msg = shouldInline(callee_method, caller_bci, profile, wci_result)) != NULL) return msg; - if ((msg = shouldNotInline(callee_method, wci_result)) != NULL) return msg; + if ((msg = shouldInline(callee_method, caller_method, caller_bci, + profile, wci_result)) != NULL) { + return msg; + } + if ((msg = shouldNotInline(callee_method, caller_method, + wci_result)) != NULL) { + return msg; + } bool is_accessor = InlineAccessors && callee_method->is_accessor(); // suppress a few checks for accessors and trivial methods if (!is_accessor && callee_method->code_size() > MaxTrivialSize) { + // don't inline into giant methods - if (C->unique() > (uint)NodeCountInliningCutoff) return "NodeCountInliningCutoff"; + if (C->unique() > (uint)NodeCountInliningCutoff) { + return "NodeCountInliningCutoff"; + } - // don't inline unreached call sites - if (profile.count() == 0) return "call site not reached"; + if ((!UseInterpreter || CompileTheWorld) && + is_init_with_ea(callee_method, caller_method, C)) { + + // Escape Analysis stress testing when running Xcomp or CTW: + // inline constructors even if they are not reached. + + } else if (profile.count() == 0) { + // don't inline unreached call sites + return "call site not reached"; + } } - if (!C->do_inlining() && InlineAccessors && !is_accessor) return "not an accessor"; - - if( inline_depth() > MaxInlineLevel ) return "inlining too deep"; + if (!C->do_inlining() && InlineAccessors && !is_accessor) { + return "not an accessor"; + } + if( inline_depth() > MaxInlineLevel ) { + return "inlining too deep"; + } if( method() == callee_method && - inline_depth() > MaxRecursiveInlineLevel ) return "recursively inlining too deep"; + inline_depth() > MaxRecursiveInlineLevel ) { + return "recursively inlining too deep"; + } int size = callee_method->code_size(); @@ -336,7 +389,7 @@ // Check if inlining policy says no. WarmCallInfo wci = *(initial_wci); - failure_msg = try_to_inline(callee_method, caller_bci, profile, &wci); + failure_msg = try_to_inline(callee_method, caller_method, caller_bci, profile, &wci); if (failure_msg != NULL && C->log() != NULL) { C->log()->begin_elem("inline_fail reason='"); C->log()->text("%s", failure_msg); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/c2_globals.hpp --- a/src/share/vm/opto/c2_globals.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/c2_globals.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -367,6 +367,12 @@ notproduct(bool, PrintEliminateLocks, false, \ "Print out when locks are eliminated") \ \ + diagnostic(bool, EliminateAutoBox, false, \ + "Private flag to control optimizations for autobox elimination") \ + \ + product(intx, AutoBoxCacheMax, 128, \ + "Sets max value cached by the java.lang.Integer autobox cache") \ + \ product(bool, DoEscapeAnalysis, false, \ "Perform escape analysis") \ \ @@ -376,6 +382,12 @@ product(bool, EliminateAllocations, true, \ "Use escape analysis to eliminate allocations") \ \ + notproduct(bool, PrintEliminateAllocations, false, \ + "Print out when allocations are eliminated") \ + \ + product(intx, EliminateAllocationArraySizeLimit, 64, \ + "Array size (number of elements) limit for scalar replacement") \ + \ product(intx, MaxLabelRootDepth, 1100, \ "Maximum times call Label_Root to prevent stack overflow") \ diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/c2compiler.cpp --- a/src/share/vm/opto/c2compiler.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/c2compiler.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -35,6 +35,9 @@ const char* C2Compiler::retry_no_subsuming_loads() { return "retry without subsuming loads"; } +const char* C2Compiler::retry_no_escape_analysis() { + return "retry without escape analysis"; +} void C2Compiler::initialize_runtime() { // Check assumptions used while running ADLC @@ -101,17 +104,23 @@ initialize(); } bool subsume_loads = true; + bool do_escape_analysis = DoEscapeAnalysis; while (!env->failing()) { // Attempt to compile while subsuming loads into machine instructions. - Compile C(env, this, target, entry_bci, subsume_loads); + Compile C(env, this, target, entry_bci, subsume_loads, do_escape_analysis); // Check result and retry if appropriate. if (C.failure_reason() != NULL) { - if (C.failure_reason_is(retry_no_subsuming_loads())) { + if (C.failure_reason_is(retry_no_subsuming_loads())) { assert(subsume_loads, "must make progress"); subsume_loads = false; continue; // retry } + if (C.failure_reason_is(retry_no_escape_analysis())) { + assert(do_escape_analysis, "must make progress"); + do_escape_analysis = false; + continue; // retry + } // Pass any other failure reason up to the ciEnv. // Note that serious, irreversible failures are already logged // on the ciEnv via env->record_method_not_compilable(). diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/c2compiler.hpp --- a/src/share/vm/opto/c2compiler.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/c2compiler.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -50,6 +50,7 @@ // sentinel value used to trigger backtracking in compile_method(). static const char* retry_no_subsuming_loads(); + static const char* retry_no_escape_analysis(); // Print compilation timers and statistics void print_timers(); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/callnode.cpp --- a/src/share/vm/opto/callnode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/callnode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -230,6 +230,7 @@ _locoff = TypeFunc::Parms; _stkoff = _locoff + _method->max_locals(); _monoff = _stkoff + _method->max_stack(); + _scloff = _monoff; _endoff = _monoff; _sp = 0; } @@ -242,6 +243,7 @@ _locoff = TypeFunc::Parms; _stkoff = _locoff; _monoff = _stkoff + stack_size; + _scloff = _monoff; _endoff = _monoff; _sp = 0; } @@ -297,12 +299,22 @@ return total; } +#ifndef PRODUCT + //------------------------------format_helper---------------------------------- // Given an allocation (a Chaitin object) and a Node decide if the Node carries // any defined value or not. If it does, print out the register or constant. -#ifndef PRODUCT -static void format_helper( PhaseRegAlloc *regalloc, outputStream* st, Node *n, const char *msg, uint i ) { +static void format_helper( PhaseRegAlloc *regalloc, outputStream* st, Node *n, const char *msg, uint i, GrowableArray *scobjs ) { if (n == NULL) { st->print(" NULL"); return; } + if (n->is_SafePointScalarObject()) { + // Scalar replacement. + SafePointScalarObjectNode* spobj = n->as_SafePointScalarObject(); + scobjs->append_if_missing(spobj); + int sco_n = scobjs->find(spobj); + assert(sco_n >= 0, ""); + st->print(" %s%d]=#ScObj" INT32_FORMAT, msg, i, sco_n); + return; + } if( OptoReg::is_valid(regalloc->get_reg_first(n))) { // Check for undefined char buf[50]; regalloc->dump_register(n,buf); @@ -342,10 +354,8 @@ } } } -#endif //------------------------------format----------------------------------------- -#ifndef PRODUCT void JVMState::format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const { st->print(" #"); if( _method ) { @@ -356,24 +366,25 @@ return; } if (n->is_MachSafePoint()) { + GrowableArray scobjs; MachSafePointNode *mcall = n->as_MachSafePoint(); uint i; // Print locals for( i = 0; i < (uint)loc_size(); i++ ) - format_helper( regalloc, st, mcall->local(this, i), "L[", i ); + format_helper( regalloc, st, mcall->local(this, i), "L[", i, &scobjs ); // Print stack for (i = 0; i < (uint)stk_size(); i++) { if ((uint)(_stkoff + i) >= mcall->len()) st->print(" oob "); else - format_helper( regalloc, st, mcall->stack(this, i), "STK[", i ); + format_helper( regalloc, st, mcall->stack(this, i), "STK[", i, &scobjs ); } for (i = 0; (int)i < nof_monitors(); i++) { Node *box = mcall->monitor_box(this, i); Node *obj = mcall->monitor_obj(this, i); if ( OptoReg::is_valid(regalloc->get_reg_first(box)) ) { while( !box->is_BoxLock() ) box = box->in(1); - format_helper( regalloc, st, box, "MON-BOX[", i ); + format_helper( regalloc, st, box, "MON-BOX[", i, &scobjs ); } else { OptoReg::Name box_reg = BoxLockNode::stack_slot(box); st->print(" MON-BOX%d=%s+%d", @@ -381,15 +392,71 @@ OptoReg::regname(OptoReg::c_frame_pointer), regalloc->reg2offset(box_reg)); } - format_helper( regalloc, st, obj, "MON-OBJ[", i ); + format_helper( regalloc, st, obj, "MON-OBJ[", i, &scobjs ); + } + + for (i = 0; i < (uint)scobjs.length(); i++) { + // Scalar replaced objects. + st->print_cr(""); + st->print(" # ScObj" INT32_FORMAT " ", i); + SafePointScalarObjectNode* spobj = scobjs.at(i); + ciKlass* cik = spobj->bottom_type()->is_oopptr()->klass(); + assert(cik->is_instance_klass() || + cik->is_array_klass(), "Not supported allocation."); + ciInstanceKlass *iklass = NULL; + if (cik->is_instance_klass()) { + cik->print_name_on(st); + iklass = cik->as_instance_klass(); + } else if (cik->is_type_array_klass()) { + cik->as_array_klass()->base_element_type()->print_name_on(st); + st->print("[%d]=", spobj->n_fields()); + } else if (cik->is_obj_array_klass()) { + ciType* cie = cik->as_array_klass()->base_element_type(); + int ndim = 1; + while (cie->is_obj_array_klass()) { + ndim += 1; + cie = cie->as_array_klass()->base_element_type(); + } + cie->print_name_on(st); + while (ndim-- > 0) { + st->print("[]"); + } + st->print("[%d]=", spobj->n_fields()); + } + st->print("{"); + uint nf = spobj->n_fields(); + if (nf > 0) { + uint first_ind = spobj->first_index(); + Node* fld_node = mcall->in(first_ind); + ciField* cifield; + if (iklass != NULL) { + st->print(" ["); + cifield = iklass->nonstatic_field_at(0); + cifield->print_name_on(st); + format_helper( regalloc, st, fld_node, ":", 0, &scobjs ); + } else { + format_helper( regalloc, st, fld_node, "[", 0, &scobjs ); + } + for (uint j = 1; j < nf; j++) { + fld_node = mcall->in(first_ind+j); + if (iklass != NULL) { + st->print(", ["); + cifield = iklass->nonstatic_field_at(j); + cifield->print_name_on(st); + format_helper( regalloc, st, fld_node, ":", j, &scobjs ); + } else { + format_helper( regalloc, st, fld_node, ", [", j, &scobjs ); + } + } + } + st->print(" }"); } } st->print_cr(""); if (caller() != NULL) caller()->format(regalloc, n, st); } -#endif -#ifndef PRODUCT + void JVMState::dump_spec(outputStream *st) const { if (_method != NULL) { bool printed = false; @@ -419,9 +486,8 @@ } if (caller() != NULL) caller()->dump_spec(st); } -#endif -#ifndef PRODUCT + void JVMState::dump_on(outputStream* st) const { if (_map && !((uintptr_t)_map & 1)) { if (_map->len() > _map->req()) { // _map->has_exceptions() @@ -434,8 +500,8 @@ } _map->dump(2); } - st->print("JVMS depth=%d loc=%d stk=%d mon=%d end=%d mondepth=%d sp=%d bci=%d method=", - depth(), locoff(), stkoff(), monoff(), endoff(), monitor_depth(), sp(), bci()); + st->print("JVMS depth=%d loc=%d stk=%d mon=%d scalar=%d end=%d mondepth=%d sp=%d bci=%d method=", + depth(), locoff(), stkoff(), monoff(), scloff(), endoff(), monitor_depth(), sp(), bci()); if (_method == NULL) { st->print_cr("(none)"); } else { @@ -465,6 +531,7 @@ n->set_locoff(_locoff); n->set_stkoff(_stkoff); n->set_monoff(_monoff); + n->set_scloff(_scloff); n->set_endoff(_endoff); n->set_sp(_sp); n->set_map(_map); @@ -557,6 +624,107 @@ return 0; } +// +// Determine whether the call could modify the field of the specified +// instance at the specified offset. +// +bool CallNode::may_modify(const TypePtr *addr_t, PhaseTransform *phase) { + const TypeOopPtr *adrInst_t = addr_t->isa_oopptr(); + + // if not an InstPtr or not an instance type, assume the worst + if (adrInst_t == NULL || !adrInst_t->is_instance_field()) { + return true; + } + Compile *C = phase->C; + int offset = adrInst_t->offset(); + assert(offset >= 0, "should be valid offset"); + ciKlass* adr_k = adrInst_t->klass(); + assert(adr_k->is_loaded() && + adr_k->is_java_klass() && + !adr_k->is_interface(), + "only non-abstract classes are expected"); + + int base_idx = C->get_alias_index(adrInst_t); + int size = BytesPerLong; // If we don't know the size, assume largest. + if (adrInst_t->isa_instptr()) { + ciField* field = C->alias_type(base_idx)->field(); + if (field != NULL) { + size = field->size_in_bytes(); + } + } else { + assert(adrInst_t->isa_aryptr(), "only arrays are expected"); + size = type2aelembytes(adr_k->as_array_klass()->element_type()->basic_type()); + } + + ciMethod * meth = is_CallStaticJava() ? as_CallStaticJava()->method() : NULL; + BCEscapeAnalyzer *bcea = (meth != NULL) ? meth->get_bcea() : NULL; + + const TypeTuple * d = tf()->domain(); + for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { + const Type* t = d->field_at(i); + Node *arg = in(i); + const Type *at = phase->type(arg); + if (at == TypePtr::NULL_PTR || at == Type::TOP) + continue; // null can't affect anything + + const TypeOopPtr *at_ptr = at->isa_oopptr(); + if (!arg->is_top() && (t->isa_oopptr() != NULL || + t->isa_ptr() && at_ptr != NULL)) { + assert(at_ptr != NULL, "expecting an OopPtr"); + ciKlass* at_k = at_ptr->klass(); + if ((adrInst_t->base() == at_ptr->base()) && + at_k->is_loaded() && + at_k->is_java_klass() && + !at_k->is_interface()) { + // If we have found an argument matching addr_t, check if the field + // at the specified offset is modified. + int at_idx = C->get_alias_index(at_ptr->add_offset(offset)->isa_oopptr()); + if (base_idx == at_idx && + (bcea == NULL || + bcea->is_arg_modified(i - TypeFunc::Parms, offset, size))) { + return true; + } + } + } + } + return false; +} + +// Does this call have a direct reference to n other than debug information? +bool CallNode::has_non_debug_use(Node *n) { + const TypeTuple * d = tf()->domain(); + for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { + Node *arg = in(i); + if (arg == n) { + return true; + } + } + return false; +} + +// Returns the unique CheckCastPP of a call +// or 'this' if there are several CheckCastPP +// or returns NULL if there is no one. +Node *CallNode::result_cast() { + Node *cast = NULL; + + Node *p = proj_out(TypeFunc::Parms); + if (p == NULL) + return NULL; + + for (DUIterator_Fast imax, i = p->fast_outs(imax); i < imax; i++) { + Node *use = p->fast_out(i); + if (use->is_CheckCastPP()) { + if (cast != NULL) { + return this; // more than 1 CheckCastPP + } + cast = use; + } + } + return cast; +} + + //============================================================================= uint CallJavaNode::size_of() const { return sizeof(*this); } uint CallJavaNode::cmp( const Node &n ) const { @@ -765,6 +933,7 @@ void SafePointNode::grow_stack(JVMState* jvms, uint grow_by) { assert((int)grow_by > 0, "sanity"); int monoff = jvms->monoff(); + int scloff = jvms->scloff(); int endoff = jvms->endoff(); assert(endoff == (int)req(), "no other states or debug info after me"); Node* top = Compile::current()->top(); @@ -772,6 +941,7 @@ ins_req(monoff, top); } jvms->set_monoff(monoff + grow_by); + jvms->set_scloff(scloff + grow_by); jvms->set_endoff(endoff + grow_by); } @@ -781,6 +951,7 @@ const int MonitorEdges = 2; assert(JVMState::logMonitorEdges == exact_log2(MonitorEdges), "correct MonitorEdges"); assert(req() == jvms()->endoff(), "correct sizing"); + int nextmon = jvms()->scloff(); if (GenerateSynchronizationCode) { add_req(lock->box_node()); add_req(lock->obj_node()); @@ -788,6 +959,7 @@ add_req(NULL); add_req(NULL); } + jvms()->set_scloff(nextmon+MonitorEdges); jvms()->set_endoff(req()); } @@ -795,10 +967,13 @@ // Delete last monitor from debug info debug_only(int num_before_pop = jvms()->nof_monitors()); const int MonitorEdges = (1<scloff(); int endoff = jvms()->endoff(); + int new_scloff = scloff - MonitorEdges; int new_endoff = endoff - MonitorEdges; + jvms()->set_scloff(new_scloff); jvms()->set_endoff(new_endoff); - while (endoff > new_endoff) del_req(--endoff); + while (scloff > new_scloff) del_req(--scloff); assert(jvms()->nof_monitors() == num_before_pop-1, ""); } @@ -822,6 +997,63 @@ return (TypeFunc::Parms == idx); } +//============== SafePointScalarObjectNode ============== + +SafePointScalarObjectNode::SafePointScalarObjectNode(const TypeOopPtr* tp, +#ifdef ASSERT + AllocateNode* alloc, +#endif + uint first_index, + uint n_fields) : + TypeNode(tp, 1), // 1 control input -- seems required. Get from root. +#ifdef ASSERT + _alloc(alloc), +#endif + _first_index(first_index), + _n_fields(n_fields) +{ + init_class_id(Class_SafePointScalarObject); +} + + +uint SafePointScalarObjectNode::ideal_reg() const { + return 0; // No matching to machine instruction +} + +const RegMask &SafePointScalarObjectNode::in_RegMask(uint idx) const { + return *(Compile::current()->matcher()->idealreg2debugmask[in(idx)->ideal_reg()]); +} + +const RegMask &SafePointScalarObjectNode::out_RegMask() const { + return RegMask::Empty; +} + +uint SafePointScalarObjectNode::match_edge(uint idx) const { + return 0; +} + +SafePointScalarObjectNode* +SafePointScalarObjectNode::clone(int jvms_adj, Dict* sosn_map) const { + void* cached = (*sosn_map)[(void*)this]; + if (cached != NULL) { + return (SafePointScalarObjectNode*)cached; + } + Compile* C = Compile::current(); + SafePointScalarObjectNode* res = (SafePointScalarObjectNode*)Node::clone(); + res->_first_index += jvms_adj; + sosn_map->Insert((void*)this, (void*)res); + return res; +} + + +#ifndef PRODUCT +void SafePointScalarObjectNode::dump_spec(outputStream *st) const { + st->print(" # fields@[%d..%d]", first_index(), + first_index() + n_fields() - 1); +} + +#endif + //============================================================================= uint AllocateNode::size_of() const { return sizeof(*this); } @@ -832,6 +1064,7 @@ { init_class_id(Class_Allocate); init_flags(Flag_is_macro); + _is_scalar_replaceable = false; Node *topnode = C->top(); init_req( TypeFunc::Control , ctrl ); @@ -1151,7 +1384,7 @@ //============================================================================= Node *LockNode::Ideal(PhaseGVN *phase, bool can_reshape) { - // perform any generic optimizations first + // perform any generic optimizations first (returns 'this' or NULL) Node *result = SafePointNode::Ideal(phase, can_reshape); // Now see if we can optimize away this lock. We don't actually @@ -1159,7 +1392,20 @@ // prevents macro expansion from expanding the lock. Since we don't // modify the graph, the value returned from this function is the // one computed above. - if (EliminateLocks && !is_eliminated()) { + if (result == NULL && can_reshape && EliminateLocks && !is_eliminated()) { + // + // If we are locking an unescaped object, the lock/unlock is unnecessary + // + ConnectionGraph *cgr = Compile::current()->congraph(); + PointsToNode::EscapeState es = PointsToNode::GlobalEscape; + if (cgr != NULL) + es = cgr->escape_state(obj_node(), phase); + if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) { + // Mark it eliminated to update any counters + this->set_eliminated(); + return result; + } + // // Try lock coarsening // @@ -1199,8 +1445,10 @@ int unlocks = 0; for (int i = 0; i < lock_ops.length(); i++) { AbstractLockNode* lock = lock_ops.at(i); - if (lock->Opcode() == Op_Lock) locks++; - else unlocks++; + if (lock->Opcode() == Op_Lock) + locks++; + else + unlocks++; if (Verbose) { lock->dump(1); } @@ -1237,7 +1485,7 @@ //============================================================================= Node *UnlockNode::Ideal(PhaseGVN *phase, bool can_reshape) { - // perform any generic optimizations first + // perform any generic optimizations first (returns 'this' or NULL) Node * result = SafePointNode::Ideal(phase, can_reshape); // Now see if we can optimize away this unlock. We don't actually @@ -1245,66 +1493,18 @@ // prevents macro expansion from expanding the unlock. Since we don't // modify the graph, the value returned from this function is the // one computed above. - if (EliminateLocks && !is_eliminated()) { + // Escape state is defined after Parse phase. + if (result == NULL && can_reshape && EliminateLocks && !is_eliminated()) { // - // If we are unlocking an unescaped object, the lock/unlock is unnecessary - // We can eliminate them if there are no safepoints in the locked region. + // If we are unlocking an unescaped object, the lock/unlock is unnecessary. // ConnectionGraph *cgr = Compile::current()->congraph(); - if (cgr != NULL && cgr->escape_state(obj_node(), phase) == PointsToNode::NoEscape) { - GrowableArray lock_ops; - LockNode *lock = find_matching_lock(this); - if (lock != NULL) { - lock_ops.append(this); - lock_ops.append(lock); - // find other unlocks which pair with the lock we found and add them - // to the list - Node * box = box_node(); - - for (DUIterator_Fast imax, i = box->fast_outs(imax); i < imax; i++) { - Node *use = box->fast_out(i); - if (use->is_Unlock() && use != this) { - UnlockNode *unlock1 = use->as_Unlock(); - if (!unlock1->is_eliminated()) { - LockNode *lock1 = find_matching_lock(unlock1); - if (lock == lock1) - lock_ops.append(unlock1); - else if (lock1 == NULL) { - // we can't find a matching lock, we must assume the worst - lock_ops.trunc_to(0); - break; - } - } - } - } - if (lock_ops.length() > 0) { - - #ifndef PRODUCT - if (PrintEliminateLocks) { - int locks = 0; - int unlocks = 0; - for (int i = 0; i < lock_ops.length(); i++) { - AbstractLockNode* lock = lock_ops.at(i); - if (lock->Opcode() == Op_Lock) locks++; - else unlocks++; - if (Verbose) { - lock->dump(1); - } - } - tty->print_cr("***Eliminated %d unescaped unlocks and %d unescaped locks", unlocks, locks); - } - #endif - - // for each of the identified locks, mark them - // as eliminatable - for (int i = 0; i < lock_ops.length(); i++) { - AbstractLockNode* lock = lock_ops.at(i); - - // Mark it eliminated to update any counters - lock->set_eliminated(); - } - } - } + PointsToNode::EscapeState es = PointsToNode::GlobalEscape; + if (cgr != NULL) + es = cgr->escape_state(obj_node(), phase); + if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) { + // Mark it eliminated to update any counters + this->set_eliminated(); } } return result; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/callnode.hpp --- a/src/share/vm/opto/callnode.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/callnode.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -38,7 +38,7 @@ class CallLeafNode; class CallLeafNoFPNode; class AllocateNode; -class AllocateArrayNode; +class AllocateArrayNode; class LockNode; class UnlockNode; class JVMState; @@ -91,7 +91,9 @@ class ParmNode : public ProjNode { static const char * const names[TypeFunc::Parms+1]; public: - ParmNode( StartNode *src, uint con ) : ProjNode(src,con) {} + ParmNode( StartNode *src, uint con ) : ProjNode(src,con) { + init_class_id(Class_Parm); + } virtual int Opcode() const; virtual bool is_CFG() const { return (_con == TypeFunc::Control); } virtual uint ideal_reg() const; @@ -182,6 +184,7 @@ uint _locoff; // Offset to locals in input edge mapping uint _stkoff; // Offset to stack in input edge mapping uint _monoff; // Offset to monitors in input edge mapping + uint _scloff; // Offset to fields of scalar objs in input edge mapping uint _endoff; // Offset to end of input edge mapping uint _sp; // Jave Expression Stack Pointer for this state int _bci; // Byte Code Index of this JVM point @@ -205,16 +208,19 @@ uint stkoff() const { return _stkoff; } uint argoff() const { return _stkoff + _sp; } uint monoff() const { return _monoff; } + uint scloff() const { return _scloff; } uint endoff() const { return _endoff; } uint oopoff() const { return debug_end(); } int loc_size() const { return _stkoff - _locoff; } int stk_size() const { return _monoff - _stkoff; } - int mon_size() const { return _endoff - _monoff; } + int mon_size() const { return _scloff - _monoff; } + int scl_size() const { return _endoff - _scloff; } bool is_loc(uint i) const { return i >= _locoff && i < _stkoff; } bool is_stk(uint i) const { return i >= _stkoff && i < _monoff; } - bool is_mon(uint i) const { return i >= _monoff && i < _endoff; } + bool is_mon(uint i) const { return i >= _monoff && i < _scloff; } + bool is_scl(uint i) const { return i >= _scloff && i < _endoff; } uint sp() const { return _sp; } int bci() const { return _bci; } @@ -225,7 +231,9 @@ uint depth() const { return _depth; } uint debug_start() const; // returns locoff of root caller uint debug_end() const; // returns endoff of self - uint debug_size() const { return loc_size() + sp() + mon_size(); } + uint debug_size() const { + return loc_size() + sp() + mon_size() + scl_size(); + } uint debug_depth() const; // returns sum of debug_size values at all depths // Returns the JVM state at the desired depth (1 == root). @@ -252,8 +260,11 @@ void set_locoff(uint off) { _locoff = off; } void set_stkoff(uint off) { _stkoff = off; } void set_monoff(uint off) { _monoff = off; } + void set_scloff(uint off) { _scloff = off; } void set_endoff(uint off) { _endoff = off; } - void set_offsets(uint off) { _locoff = _stkoff = _monoff = _endoff = off; } + void set_offsets(uint off) { + _locoff = _stkoff = _monoff = _scloff = _endoff = off; + } void set_map(SafePointNode *map) { _map = map; } void set_sp(uint sp) { _sp = sp; } void set_bci(int bci) { _bci = bci; } @@ -377,6 +388,9 @@ void set_next_exception(SafePointNode* n); bool has_exceptions() const { return next_exception() != NULL; } + // Does this node have a use of n other than in debug information? + virtual bool has_non_debug_use(Node *n) {return false; } + // Standard Node stuff virtual int Opcode() const; virtual bool pinned() const { return true; } @@ -397,6 +411,47 @@ #endif }; +//------------------------------SafePointScalarObjectNode---------------------- +// A SafePointScalarObjectNode represents the state of a scalarized object +// at a safepoint. + +class SafePointScalarObjectNode: public TypeNode { + uint _first_index; // First input edge index of a SafePoint node where + // states of the scalarized object fields are collected. + uint _n_fields; // Number of non-static fields of the scalarized object. + DEBUG_ONLY(AllocateNode* _alloc;) +public: + SafePointScalarObjectNode(const TypeOopPtr* tp, +#ifdef ASSERT + AllocateNode* alloc, +#endif + uint first_index, uint n_fields); + virtual int Opcode() const; + virtual uint ideal_reg() const; + virtual const RegMask &in_RegMask(uint) const; + virtual const RegMask &out_RegMask() const; + virtual uint match_edge(uint idx) const; + + uint first_index() const { return _first_index; } + uint n_fields() const { return _n_fields; } + DEBUG_ONLY(AllocateNode* alloc() const { return _alloc; }) + + virtual uint size_of() const { return sizeof(*this); } + + // Assumes that "this" is an argument to a safepoint node "s", and that + // "new_call" is being created to correspond to "s". But the difference + // between the start index of the jvmstates of "new_call" and "s" is + // "jvms_adj". Produce and return a SafePointScalarObjectNode that + // corresponds appropriately to "this" in "new_call". Assumes that + // "sosn_map" is a map, specific to the translation of "s" to "new_call", + // mapping old SafePointScalarObjectNodes to new, to avoid multiple copies. + SafePointScalarObjectNode* clone(int jvms_adj, Dict* sosn_map) const; + +#ifndef PRODUCT + virtual void dump_spec(outputStream *st) const; +#endif +}; + //------------------------------CallNode--------------------------------------- // Call nodes now subsume the function of debug nodes at callsites, so they // contain the functionality of a full scope chain of debug nodes. @@ -405,7 +460,6 @@ const TypeFunc *_tf; // Function type address _entry_point; // Address of method being called float _cnt; // Estimate of number of times called - PointsToNode::EscapeState _escape_state; CallNode(const TypeFunc* tf, address addr, const TypePtr* adr_type) : SafePointNode(tf->domain()->cnt(), NULL, adr_type), @@ -415,7 +469,6 @@ { init_class_id(Class_Call); init_flags(Flag_is_Call); - _escape_state = PointsToNode::UnknownEscape; } const TypeFunc* tf() const { return _tf; } @@ -441,6 +494,15 @@ // the node the JVMState must be cloned. virtual void clone_jvms() { } // default is not to clone + // Returns true if the call may modify n + virtual bool may_modify(const TypePtr *addr_t, PhaseTransform *phase); + // Does this node have a use of n other than in debug information? + virtual bool has_non_debug_use(Node *n); + // Returns the unique CheckCastPP of a call + // or result projection is there are several CheckCastPP + // or returns NULL if there is no one. + Node *result_cast(); + virtual uint match_edge(uint idx) const; #ifndef PRODUCT @@ -624,6 +686,8 @@ return TypeFunc::make(domain, range); } + bool _is_scalar_replaceable; // Result of Escape Analysis + virtual uint size_of() const; // Size is bigger AllocateNode(Compile* C, const TypeFunc *atype, Node *ctrl, Node *mem, Node *abio, Node *size, Node *klass_node, Node *initial_test); @@ -635,6 +699,9 @@ virtual uint ideal_reg() const { return Op_RegP; } virtual bool guaranteed_safepoint() { return false; } + // allocations do not modify their arguments + virtual bool may_modify(const TypePtr *addr_t, PhaseTransform *phase) { return false;} + // Pattern-match a possible usage of AllocateNode. // Return null if no allocation is recognized. // The operand is the pointer produced by the (possible) allocation. @@ -747,6 +814,9 @@ // mark node as eliminated and update the counter if there is one void set_eliminated(); + // locking does not modify its arguments + virtual bool may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;} + #ifndef PRODUCT void create_lock_counter(JVMState* s); NamedCounter* counter() const { return _counter; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/cfgnode.cpp --- a/src/share/vm/opto/cfgnode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/cfgnode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -704,6 +704,61 @@ return mem; } +//------------------------split_out_instance----------------------------------- +// Split out an instance type from a bottom phi. +PhiNode* PhiNode::split_out_instance(const TypePtr* at, PhaseIterGVN *igvn) const { + assert(type() == Type::MEMORY && (adr_type() == TypePtr::BOTTOM || + adr_type() == TypeRawPtr::BOTTOM) , "bottom or raw memory required"); + + // Check if an appropriate node already exists. + Node *region = in(0); + for (DUIterator_Fast kmax, k = region->fast_outs(kmax); k < kmax; k++) { + Node* use = region->fast_out(k); + if( use->is_Phi()) { + PhiNode *phi2 = use->as_Phi(); + if (phi2->type() == Type::MEMORY && phi2->adr_type() == at) { + return phi2; + } + } + } + Compile *C = igvn->C; + Arena *a = Thread::current()->resource_area(); + Node_Array node_map = new Node_Array(a); + Node_Stack stack(a, C->unique() >> 4); + PhiNode *nphi = slice_memory(at); + igvn->register_new_node_with_optimizer( nphi ); + node_map.map(_idx, nphi); + stack.push((Node *)this, 1); + while(!stack.is_empty()) { + PhiNode *ophi = stack.node()->as_Phi(); + uint i = stack.index(); + assert(i >= 1, "not control edge"); + stack.pop(); + nphi = node_map[ophi->_idx]->as_Phi(); + for (; i < ophi->req(); i++) { + Node *in = ophi->in(i); + if (in == NULL || igvn->type(in) == Type::TOP) + continue; + Node *opt = MemNode::optimize_simple_memory_chain(in, at, igvn); + PhiNode *optphi = opt->is_Phi() ? opt->as_Phi() : NULL; + if (optphi != NULL && optphi->adr_type() == TypePtr::BOTTOM) { + opt = node_map[optphi->_idx]; + if (opt == NULL) { + stack.push(ophi, i); + nphi = optphi->slice_memory(at); + igvn->register_new_node_with_optimizer( nphi ); + node_map.map(optphi->_idx, nphi); + ophi = optphi; + i = 0; // will get incremented at top of loop + continue; + } + } + nphi->set_req(i, opt); + } + } + return nphi; +} + //------------------------verify_adr_type-------------------------------------- #ifdef ASSERT void PhiNode::verify_adr_type(VectorSet& visited, const TypePtr* at) const { @@ -1736,6 +1791,18 @@ return result; } } + // + // Other optimizations on the memory chain + // + const TypePtr* at = adr_type(); + for( uint i=1; ireq()), _adr_type(at) { + PhiNode( Node *r, const Type *t, const TypePtr* at = NULL, + const int iid = TypeOopPtr::UNKNOWN_INSTANCE, + const int iidx = Compile::AliasIdxTop, + const int ioffs = Type::OffsetTop ) + : TypeNode(t,r->req()), + _adr_type(at), + _inst_id(iid), + _inst_index(iidx), + _inst_offset(ioffs) + { init_class_id(Class_Phi); init_req(0, r); verify_adr_type(); @@ -139,6 +148,7 @@ static PhiNode* make( Node* r, Node* x, const Type *t, const TypePtr* at = NULL ); // create a new phi with narrowed memory type PhiNode* slice_memory(const TypePtr* adr_type) const; + PhiNode* split_out_instance(const TypePtr* at, PhaseIterGVN *igvn) const; // like make(r, x), but does not initialize the in edges to x static PhiNode* make_blank( Node* r, Node* x ); @@ -152,6 +162,10 @@ return NULL; // not a copy! } + // Determine a unique non-trivial input, if any. + // Ignore casts if it helps. Return NULL on failure. + Node* unique_input(PhaseTransform *phase); + // Check for a simple dead loop. enum LoopSafety { Safe = 0, Unsafe, UnsafeLoop }; LoopSafety simple_data_loop_check(Node *in) const; @@ -161,6 +175,18 @@ virtual int Opcode() const; virtual bool pinned() const { return in(0) != 0; } virtual const TypePtr *adr_type() const { verify_adr_type(true); return _adr_type; } + + const int inst_id() const { return _inst_id; } + const int inst_index() const { return _inst_index; } + const int inst_offset() const { return _inst_offset; } + bool is_same_inst_field(const Type* tp, int id, int index, int offset) { + return type()->basic_type() == tp->basic_type() && + inst_id() == id && + inst_index() == index && + inst_offset() == offset && + type()->higher_equal(tp); + } + virtual const Type *Value( PhaseTransform *phase ) const; virtual Node *Identity( PhaseTransform *phase ); virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); @@ -310,8 +336,14 @@ virtual const RegMask &out_RegMask() const; void dominated_by(Node* prev_dom, PhaseIterGVN* igvn); int is_range_check(Node* &range, Node* &index, jint &offset); + Node* fold_compares(PhaseGVN* phase); static Node* up_one_dom(Node* curr, bool linear_only = false); + // Takes the type of val and filters it through the test represented + // by if_proj and returns a more refined type if one is produced. + // Returns NULL is it couldn't improve the type. + static const TypeInt* filtered_int_type(PhaseGVN* phase, Node* val, Node* if_proj); + #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/chaitin.hpp --- a/src/share/vm/opto/chaitin.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/chaitin.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -457,7 +457,8 @@ bool may_be_copy_of_callee( Node *def ) const; // If nreg already contains the same constant as val then eliminate it - bool eliminate_copy_of_constant(Node* val, Block *current_block, Node_List& value, Node_List ®nd, + bool eliminate_copy_of_constant(Node* val, Node* n, + Block *current_block, Node_List& value, Node_List ®nd, OptoReg::Name nreg, OptoReg::Name nreg2); // Extend the node to LRG mapping void add_reference( const Node *node, const Node *old_node); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/classes.hpp --- a/src/share/vm/opto/classes.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/classes.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -185,6 +185,7 @@ macro(RoundDouble) macro(RoundFloat) macro(SafePoint) +macro(SafePointScalarObject) macro(SCMemProj) macro(SinD) macro(SqrtD) diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/compile.cpp --- a/src/share/vm/opto/compile.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/compile.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -333,6 +333,12 @@ tty->print_cr("** Bailout: Recompile without subsuming loads **"); tty->print_cr("*********************************************************"); } + if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) { + // Recompiling without escape analysis + tty->print_cr("*********************************************************"); + tty->print_cr("** Bailout: Recompile without escape analysis **"); + tty->print_cr("*********************************************************"); + } if (env()->break_at_compile()) { // Open the debugger when compiing this method. tty->print("### Breaking when compiling: "); @@ -401,11 +407,6 @@ return buf.code_size(); } -void Compile::record_for_escape_analysis(Node* n) { - if (_congraph != NULL) - _congraph->record_for_escape_analysis(n); -} - // ============================================================================ //------------------------------Compile standard------------------------------- @@ -415,7 +416,7 @@ // the continuation bci for on stack replacement. -Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads ) +Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads, bool do_escape_analysis ) : Phase(Compiler), _env(ci_env), _log(ci_env->log()), @@ -430,6 +431,7 @@ _for_igvn(NULL), _warm_calls(NULL), _subsume_loads(subsume_loads), + _do_escape_analysis(do_escape_analysis), _failure_reason(NULL), _code_buffer("Compile::Fill_buffer"), _orig_pc_slot(0), @@ -487,9 +489,6 @@ PhaseGVN gvn(node_arena(), estimated_size); set_initial_gvn(&gvn); - if (DoEscapeAnalysis) - _congraph = new ConnectionGraph(this); - { // Scope for timing the parser TracePhase t3("parse", &_t_parser, true); @@ -574,9 +573,13 @@ NOT_PRODUCT( verify_graph_edges(); ) // Perform escape analysis + if (_do_escape_analysis) + _congraph = new ConnectionGraph(this); if (_congraph != NULL) { NOT_PRODUCT( TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, TimeCompiler); ) _congraph->compute_escape(); + if (failing()) return; + #ifndef PRODUCT if (PrintEscapeAnalysis) { _congraph->dump(); @@ -675,6 +678,7 @@ _orig_pc_slot(0), _orig_pc_slot_offset_in_bytes(0), _subsume_loads(true), + _do_escape_analysis(false), _failure_reason(NULL), _code_buffer("Compile::Fill_buffer"), _node_bundling_limit(0), @@ -822,7 +826,7 @@ // Type::update_loaded_types(_method, _method->constants()); // Init alias_type map. - if (!DoEscapeAnalysis && aliaslevel == 3) + if (!_do_escape_analysis && aliaslevel == 3) aliaslevel = 2; // No unique types without escape analysis _AliasLevel = aliaslevel; const int grow_ats = 16; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/compile.hpp --- a/src/share/vm/opto/compile.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/compile.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -31,6 +31,7 @@ class Int_Array; class Matcher; class MachNode; +class MachSafePointNode; class Node; class Node_Array; class Node_Notes; @@ -52,9 +53,6 @@ class Unique_Node_List; class nmethod; class WarmCallInfo; -#ifdef ENABLE_ZAP_DEAD_LOCALS -class MachSafePointNode; -#endif //------------------------------Compile---------------------------------------- // This class defines a top-level Compiler invocation. @@ -127,6 +125,7 @@ const int _compile_id; const bool _save_argument_registers; // save/restore arg regs for trampolines const bool _subsume_loads; // Load can be matched as part of a larger op. + const bool _do_escape_analysis; // Do escape analysis. ciMethod* _method; // The method being compiled. int _entry_bci; // entry bci for osr methods. const TypeFunc* _tf; // My kind of signature @@ -260,6 +259,8 @@ // instructions that subsume a load may result in an unschedulable // instruction sequence. bool subsume_loads() const { return _subsume_loads; } + // Do escape analysis. + bool do_escape_analysis() const { return _do_escape_analysis; } bool save_argument_registers() const { return _save_argument_registers; } @@ -484,7 +485,6 @@ PhaseGVN* initial_gvn() { return _initial_gvn; } Unique_Node_List* for_igvn() { return _for_igvn; } inline void record_for_igvn(Node* n); // Body is after class Unique_Node_List. - void record_for_escape_analysis(Node* n); void set_initial_gvn(PhaseGVN *gvn) { _initial_gvn = gvn; } void set_for_igvn(Unique_Node_List *for_igvn) { _for_igvn = for_igvn; } @@ -560,7 +560,7 @@ // replacement, entry_bci indicates the bytecode for which to compile a // continuation. Compile(ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, - int entry_bci, bool subsume_loads); + int entry_bci, bool subsume_loads, bool do_escape_analysis); // Second major entry point. From the TypeFunc signature, generate code // to pass arguments from the Java calling convention to the C calling @@ -605,8 +605,20 @@ // Build OopMaps for each GC point void BuildOopMaps(); - // Append debug info for the node to the array - void FillLocArray( int idx, Node *local, GrowableArray *array ); + + // Append debug info for the node "local" at safepoint node "sfpt" to the + // "array", May also consult and add to "objs", which describes the + // scalar-replaced objects. + void FillLocArray( int idx, MachSafePointNode* sfpt, + Node *local, GrowableArray *array, + GrowableArray *objs ); + + // If "objs" contains an ObjectValue whose id is "id", returns it, else NULL. + static ObjectValue* sv_for_node_id(GrowableArray *objs, int id); + // Requres that "objs" does not contains an ObjectValue whose id matches + // that of "sv. Appends "sv". + static void set_sv_for_object_node(GrowableArray *objs, + ObjectValue* sv ); // Process an OopMap Element while emitting nodes void Process_OopMap_Node(MachNode *mach, int code_offset); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/connode.cpp --- a/src/share/vm/opto/connode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/connode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -982,34 +982,9 @@ return new (phase->C, 3) AddINode(add1,add2); } - // Fold up with a prior LoadL: LoadL->ConvL2I ==> LoadI - // Requires we understand the 'endianess' of Longs. - if( andl_op == Op_LoadL ) { - Node *adr = andl->in(MemNode::Address); - // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles -#ifndef VM_LITTLE_ENDIAN - // The transformation can cause problems on BIG_ENDIAN architectures - // where the jint is not the same address as the jlong. Specifically, we - // will fail to insert an anti-dependence in GCM between the LoadI and a - // subsequent StoreL because different memory offsets provoke - // flatten_alias_type() into indicating two different types. See bug - // 4755222. - - // Node *base = adr->is_AddP() ? adr->in(AddPNode::Base) : adr; - // adr = phase->transform( new (phase->C, 4) AddPNode(base,adr,phase->MakeConX(sizeof(jint)))); - return NULL; -#else - if (phase->C->alias_type(andl->adr_type())->is_volatile()) { - // Picking up the low half by itself bypasses the atomic load and we could - // end up with more than one non-atomic load. See bugs 4432655 and 4526490. - // We could go to the trouble of iterating over andl's output edges and - // punting only if there's more than one real use, but we don't bother. - return NULL; - } - return new (phase->C, 3) LoadINode(andl->in(MemNode::Control),andl->in(MemNode::Memory),adr,((LoadLNode*)andl)->raw_adr_type()); -#endif - } - + // Disable optimization: LoadL->ConvL2I ==> LoadI. + // It causes problems (sizes of Load and Store nodes do not match) + // in objects initialization code and Escape Analysis. return NULL; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/doCall.cpp --- a/src/share/vm/opto/doCall.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/doCall.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -390,6 +390,8 @@ } if (cg->is_inline()) { + // Accumulate has_loops estimate + C->set_has_loops(C->has_loops() || call_method->has_loops()); C->env()->notice_inlined_method(call_method); } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/escape.cpp --- a/src/share/vm/opto/escape.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/escape.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -51,21 +51,21 @@ } #ifndef PRODUCT -static char *node_type_names[] = { +static const char *node_type_names[] = { "UnknownType", "JavaObject", "LocalVar", "Field" }; -static char *esc_names[] = { +static const char *esc_names[] = { "UnknownEscape", - "NoEscape ", - "ArgEscape ", - "GlobalEscape " + "NoEscape", + "ArgEscape", + "GlobalEscape" }; -static char *edge_type_suffix[] = { +static const char *edge_type_suffix[] = { "?", // UnknownEdge "P", // PointsToEdge "D", // DeferredEdge @@ -75,7 +75,7 @@ void PointsToNode::dump() const { NodeType nt = node_type(); EscapeState es = escape_state(); - tty->print("%s %s [[", node_type_names[(int) nt], esc_names[(int) es]); + tty->print("%s %s %s [[", node_type_names[(int) nt], esc_names[(int) es], _scalar_replaceable ? "" : "NSR"); for (uint i = 0; i < edge_count(); i++) { tty->print(" %d%s", edge_target(i), edge_type_suffix[(int) edge_type(i)]); } @@ -91,9 +91,11 @@ _collecting = true; this->_compile = C; const PointsToNode &dummy = PointsToNode(); - _nodes = new(C->comp_arena()) GrowableArray(C->comp_arena(), (int) INITIAL_NODE_COUNT, 0, dummy); + int sz = C->unique(); + _nodes = new(C->comp_arena()) GrowableArray(C->comp_arena(), sz, sz, dummy); _phantom_object = C->top()->_idx; PointsToNode *phn = ptnode_adr(_phantom_object); + phn->_node = C->top(); phn->set_node_type(PointsToNode::JavaObject); phn->set_escape_state(PointsToNode::GlobalEscape); } @@ -121,8 +123,20 @@ f->add_edge(to_i, PointsToNode::DeferredEdge); } -int ConnectionGraph::type_to_offset(const Type *t) { - const TypePtr *t_ptr = t->isa_ptr(); +int ConnectionGraph::address_offset(Node* adr, PhaseTransform *phase) { + const Type *adr_type = phase->type(adr); + if (adr->is_AddP() && adr_type->isa_oopptr() == NULL && + adr->in(AddPNode::Address)->is_Proj() && + adr->in(AddPNode::Address)->in(0)->is_Allocate()) { + // We are computing a raw address for a store captured by an Initialize + // compute an appropriate address type. AddP cases #3 and #5 (see below). + int offs = (int)phase->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot); + assert(offs != Type::OffsetBot || + adr->in(AddPNode::Address)->in(0)->is_AllocateArray(), + "offset must be a constant or it is initialization of array"); + return offs; + } + const TypePtr *t_ptr = adr_type->isa_ptr(); assert(t_ptr != NULL, "must be a pointer type"); return t_ptr->offset(); } @@ -147,12 +161,28 @@ npt->set_escape_state(es); } +void ConnectionGraph::add_node(Node *n, PointsToNode::NodeType nt, + PointsToNode::EscapeState es, bool done) { + PointsToNode* ptadr = ptnode_adr(n->_idx); + ptadr->_node = n; + ptadr->set_node_type(nt); + + // inline set_escape_state(idx, es); + PointsToNode::EscapeState old_es = ptadr->escape_state(); + if (es > old_es) + ptadr->set_escape_state(es); + + if (done) + _processed.set(n->_idx); +} + PointsToNode::EscapeState ConnectionGraph::escape_state(Node *n, PhaseTransform *phase) { uint idx = n->_idx; PointsToNode::EscapeState es; - // If we are still collecting we don't know the answer yet - if (_collecting) + // If we are still collecting or there were no non-escaping allocations + // we don't know the answer yet + if (_collecting || !_has_allocations) return PointsToNode::UnknownEscape; // if the node was created after the escape computation, return @@ -169,9 +199,9 @@ // compute max escape state of anything this node could point to VectorSet ptset(Thread::current()->resource_area()); PointsTo(ptset, n, phase); - for( VectorSetI i(&ptset); i.test() && es != PointsToNode::GlobalEscape; ++i ) { + for(VectorSetI i(&ptset); i.test() && es != PointsToNode::GlobalEscape; ++i) { uint pt = i.elem; - PointsToNode::EscapeState pes = _nodes->at(pt).escape_state(); + PointsToNode::EscapeState pes = _nodes->adr_at(pt)->escape_state(); if (pes > es) es = pes; } @@ -185,7 +215,7 @@ VectorSet visited(Thread::current()->resource_area()); GrowableArray worklist; - n = skip_casts(n); + n = n->uncast(); PointsToNode npt = _nodes->at_grow(n->_idx); // If we have a JavaObject, return just that object @@ -193,39 +223,33 @@ ptset.set(n->_idx); return; } - // we may have a Phi which has not been processed - if (npt._node == NULL) { - assert(n->is_Phi(), "unprocessed node must be a Phi"); - record_for_escape_analysis(n); - npt = _nodes->at(n->_idx); - } + assert(npt._node != NULL, "unregistered node"); + worklist.push(n->_idx); while(worklist.length() > 0) { int ni = worklist.pop(); PointsToNode pn = _nodes->at_grow(ni); - if (!visited.test(ni)) { - visited.set(ni); - + if (!visited.test_set(ni)) { // ensure that all inputs of a Phi have been processed - if (_collecting && pn._node->is_Phi()) { - PhiNode *phi = pn._node->as_Phi(); - process_phi_escape(phi, phase); - } + assert(!_collecting || !pn._node->is_Phi() || _processed.test(ni),""); int edges_processed = 0; for (uint e = 0; e < pn.edge_count(); e++) { + uint etgt = pn.edge_target(e); PointsToNode::EdgeType et = pn.edge_type(e); if (et == PointsToNode::PointsToEdge) { - ptset.set(pn.edge_target(e)); + ptset.set(etgt); edges_processed++; } else if (et == PointsToNode::DeferredEdge) { - worklist.push(pn.edge_target(e)); + worklist.push(etgt); edges_processed++; + } else { + assert(false,"neither PointsToEdge or DeferredEdge"); } } if (edges_processed == 0) { - // no deferred or pointsto edges found. Assume the value was set outside - // this method. Add the phantom object to the pointsto set. + // no deferred or pointsto edges found. Assume the value was set + // outside this method. Add the phantom object to the pointsto set. ptset.set(_phantom_object); } } @@ -239,20 +263,23 @@ PointsToNode *ptn = ptnode_adr(ni); while(i < ptn->edge_count()) { + uint t = ptn->edge_target(i); + PointsToNode *ptt = ptnode_adr(t); if (ptn->edge_type(i) != PointsToNode::DeferredEdge) { i++; } else { - uint t = ptn->edge_target(i); - PointsToNode *ptt = ptnode_adr(t); ptn->remove_edge(t, PointsToNode::DeferredEdge); - if(!visited.test(t)) { - visited.set(t); + if(!visited.test_set(t)) { for (uint j = 0; j < ptt->edge_count(); j++) { uint n1 = ptt->edge_target(j); PointsToNode *pt1 = ptnode_adr(n1); switch(ptt->edge_type(j)) { case PointsToNode::PointsToEdge: - add_pointsto_edge(ni, n1); + add_pointsto_edge(ni, n1); + if(n1 == _phantom_object) { + // Special case - field set outside (globally escaping). + ptn->set_escape_state(PointsToNode::GlobalEscape); + } break; case PointsToNode::DeferredEdge: add_deferred_edge(ni, n1); @@ -291,8 +318,8 @@ } } -// Add a deferred edge from node given by "from_i" to any field of adr_i whose offset -// matches "offset" +// Add a deferred edge from node given by "from_i" to any field of adr_i +// whose offset matches "offset". void ConnectionGraph::add_deferred_edge_to_fields(uint from_i, uint adr_i, int offs) { PointsToNode an = _nodes->at_grow(adr_i); for (uint fe = 0; fe < an.edge_count(); fe++) { @@ -310,25 +337,115 @@ } } -// -// Search memory chain of "mem" to find a MemNode whose address -// is the specified alias index. Returns the MemNode found or the -// first non-MemNode encountered. -// -Node *ConnectionGraph::find_mem(Node *mem, int alias_idx, PhaseGVN *igvn) { - if (mem == NULL) - return mem; - while (mem->is_Mem()) { - const Type *at = igvn->type(mem->in(MemNode::Address)); - if (at != Type::TOP) { - assert (at->isa_ptr() != NULL, "pointer type required."); - int idx = _compile->get_alias_index(at->is_ptr()); - if (idx == alias_idx) - break; - } - mem = mem->in(MemNode::Memory); +// Helper functions + +static Node* get_addp_base(Node *addp) { + assert(addp->is_AddP(), "must be AddP"); + // + // AddP cases for Base and Address inputs: + // case #1. Direct object's field reference: + // Allocate + // | + // Proj #5 ( oop result ) + // | + // CheckCastPP (cast to instance type) + // | | + // AddP ( base == address ) + // + // case #2. Indirect object's field reference: + // Phi + // | + // CastPP (cast to instance type) + // | | + // AddP ( base == address ) + // + // case #3. Raw object's field reference for Initialize node: + // Allocate + // | + // Proj #5 ( oop result ) + // top | + // \ | + // AddP ( base == top ) + // + // case #4. Array's element reference: + // {CheckCastPP | CastPP} + // | | | + // | AddP ( array's element offset ) + // | | + // AddP ( array's offset ) + // + // case #5. Raw object's field reference for arraycopy stub call: + // The inline_native_clone() case when the arraycopy stub is called + // after the allocation before Initialize and CheckCastPP nodes. + // Allocate + // | + // Proj #5 ( oop result ) + // | | + // AddP ( base == address ) + // + // case #6. Constant Pool, ThreadLocal, CastX2P or + // Raw object's field reference: + // {ConP, ThreadLocal, CastX2P, raw Load} + // top | + // \ | + // AddP ( base == top ) + // + // case #7. Klass's field reference. + // LoadKlass + // | | + // AddP ( base == address ) + // + Node *base = addp->in(AddPNode::Base)->uncast(); + if (base->is_top()) { // The AddP case #3 and #6. + base = addp->in(AddPNode::Address)->uncast(); + assert(base->Opcode() == Op_ConP || base->Opcode() == Op_ThreadLocal || + base->Opcode() == Op_CastX2P || + (base->is_Mem() && base->bottom_type() == TypeRawPtr::NOTNULL) || + (base->is_Proj() && base->in(0)->is_Allocate()), "sanity"); } - return mem; + return base; +} + +static Node* find_second_addp(Node* addp, Node* n) { + assert(addp->is_AddP() && addp->outcnt() > 0, "Don't process dead nodes"); + + Node* addp2 = addp->raw_out(0); + if (addp->outcnt() == 1 && addp2->is_AddP() && + addp2->in(AddPNode::Base) == n && + addp2->in(AddPNode::Address) == addp) { + + assert(addp->in(AddPNode::Base) == n, "expecting the same base"); + // + // Find array's offset to push it on worklist first and + // as result process an array's element offset first (pushed second) + // to avoid CastPP for the array's offset. + // Otherwise the inserted CastPP (LocalVar) will point to what + // the AddP (Field) points to. Which would be wrong since + // the algorithm expects the CastPP has the same point as + // as AddP's base CheckCastPP (LocalVar). + // + // ArrayAllocation + // | + // CheckCastPP + // | + // memProj (from ArrayAllocation CheckCastPP) + // | || + // | || Int (element index) + // | || | ConI (log(element size)) + // | || | / + // | || LShift + // | || / + // | AddP (array's element offset) + // | | + // | | ConI (array's offset: #12(32-bits) or #24(64-bits)) + // | / / + // AddP (array's offset) + // | + // Load/Store (memory operation on array's element) + // + return addp2; + } + return NULL; } // @@ -336,24 +453,33 @@ // address of a field of an instance // void ConnectionGraph::split_AddP(Node *addp, Node *base, PhaseGVN *igvn) { + const TypeOopPtr *base_t = igvn->type(base)->isa_oopptr(); + assert(base_t != NULL && base_t->is_instance(), "expecting instance oopptr"); const TypeOopPtr *t = igvn->type(addp)->isa_oopptr(); - const TypeOopPtr *base_t = igvn->type(base)->isa_oopptr(); - assert(t != NULL, "expecting oopptr"); - assert(base_t != NULL && base_t->is_instance(), "expecting instance oopptr"); + if (t == NULL) { + // We are computing a raw address for a store captured by an Initialize + // compute an appropriate address type. + assert(igvn->type(addp) == TypeRawPtr::NOTNULL, "must be raw pointer"); + assert(addp->in(AddPNode::Address)->is_Proj(), "base of raw address must be result projection from allocation"); + int offs = (int)igvn->find_intptr_t_con(addp->in(AddPNode::Offset), Type::OffsetBot); + assert(offs != Type::OffsetBot, "offset must be a constant"); + t = base_t->add_offset(offs)->is_oopptr(); + } uint inst_id = base_t->instance_id(); assert(!t->is_instance() || t->instance_id() == inst_id, "old type must be non-instance or match new type"); const TypeOopPtr *tinst = base_t->add_offset(t->offset())->is_oopptr(); - // ensure an alias index is allocated for the instance type + // Do NOT remove the next call: ensure an new alias index is allocated + // for the instance type int alias_idx = _compile->get_alias_index(tinst); igvn->set_type(addp, tinst); // record the allocation in the node map set_map(addp->_idx, get_map(base->_idx)); - // if the Address input is not the appropriate instance type (due to intervening - // casts,) insert a cast + // if the Address input is not the appropriate instance type + // (due to intervening casts,) insert a cast Node *adr = addp->in(AddPNode::Address); const TypeOopPtr *atype = igvn->type(adr)->isa_oopptr(); - if (atype->instance_id() != inst_id) { + if (atype != NULL && atype->instance_id() != inst_id) { assert(!atype->is_instance(), "no conflicting instances"); const TypeOopPtr *new_atype = base_t->add_offset(atype->offset())->isa_oopptr(); Node *acast = new (_compile, 2) CastPPNode(adr, new_atype); @@ -372,8 +498,9 @@ addp->set_req(AddPNode::Base, bcast); addp->set_req(AddPNode::Address, acast); igvn->hash_insert(addp); - record_for_optimizer(addp); } + // Put on IGVN worklist since at least addp's type was changed above. + record_for_optimizer(addp); } // @@ -386,17 +513,25 @@ new_created = false; int phi_alias_idx = C->get_alias_index(orig_phi->adr_type()); // nothing to do if orig_phi is bottom memory or matches alias_idx - if (phi_alias_idx == Compile::AliasIdxBot || phi_alias_idx == alias_idx) { + if (phi_alias_idx == alias_idx) { return orig_phi; } // have we already created a Phi for this alias index? PhiNode *result = get_map_phi(orig_phi->_idx); - const TypePtr *atype = C->get_adr_type(alias_idx); if (result != NULL && C->get_alias_index(result->adr_type()) == alias_idx) { return result; } - + if ((int)C->unique() + 2*NodeLimitFudgeFactor > MaxNodeLimit) { + if (C->do_escape_analysis() == true && !C->failing()) { + // Retry compilation without escape analysis. + // If this is the first failure, the sentinel string will "stick" + // to the Compile object, and the C2Compiler will see it and retry. + C->record_failure(C2Compiler::retry_no_escape_analysis()); + } + return NULL; + } orig_phi_worklist.append_if_missing(orig_phi); + const TypePtr *atype = C->get_adr_type(alias_idx); result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype); set_map_phi(orig_phi->_idx, result); igvn->set_type(result, result->bottom_type()); @@ -414,7 +549,7 @@ assert(alias_idx != Compile::AliasIdxBot, "can't split out bottom memory"); Compile *C = _compile; bool new_phi_created; - PhiNode *result = create_split_phi(orig_phi, alias_idx, orig_phi_worklist, igvn, new_phi_created); + PhiNode *result = create_split_phi(orig_phi, alias_idx, orig_phi_worklist, igvn, new_phi_created); if (!new_phi_created) { return result; } @@ -427,45 +562,149 @@ bool finished = false; while(!finished) { while (idx < phi->req()) { - Node *mem = find_mem(phi->in(idx), alias_idx, igvn); + Node *mem = find_inst_mem(phi->in(idx), alias_idx, orig_phi_worklist, igvn); if (mem != NULL && mem->is_Phi()) { - PhiNode *nphi = create_split_phi(mem->as_Phi(), alias_idx, orig_phi_worklist, igvn, new_phi_created); + PhiNode *newphi = create_split_phi(mem->as_Phi(), alias_idx, orig_phi_worklist, igvn, new_phi_created); if (new_phi_created) { // found an phi for which we created a new split, push current one on worklist and begin // processing new one phi_list.push(phi); cur_input.push(idx); phi = mem->as_Phi(); - result = nphi; + result = newphi; idx = 1; continue; } else { - mem = nphi; + mem = newphi; } } + if (C->failing()) { + return NULL; + } result->set_req(idx++, mem); } #ifdef ASSERT // verify that the new Phi has an input for each input of the original assert( phi->req() == result->req(), "must have same number of inputs."); assert( result->in(0) != NULL && result->in(0) == phi->in(0), "regions must match"); +#endif + // Check if all new phi's inputs have specified alias index. + // Otherwise use old phi. for (uint i = 1; i < phi->req(); i++) { - assert((phi->in(i) == NULL) == (result->in(i) == NULL), "inputs must correspond."); + Node* in = result->in(i); + assert((phi->in(i) == NULL) == (in == NULL), "inputs must correspond."); } -#endif // we have finished processing a Phi, see if there are any more to do finished = (phi_list.length() == 0 ); if (!finished) { phi = phi_list.pop(); idx = cur_input.pop(); - PhiNode *prev_phi = get_map_phi(phi->_idx); - prev_phi->set_req(idx++, result); - result = prev_phi; + PhiNode *prev_result = get_map_phi(phi->_idx); + prev_result->set_req(idx++, result); + result = prev_result; } } return result; } + +// +// The next methods are derived from methods in MemNode. +// +static Node *step_through_mergemem(MergeMemNode *mmem, int alias_idx, const TypeOopPtr *tinst) { + Node *mem = mmem; + // TypeInstPtr::NOTNULL+any is an OOP with unknown offset - generally + // means an array I have not precisely typed yet. Do not do any + // alias stuff with it any time soon. + if( tinst->base() != Type::AnyPtr && + !(tinst->klass()->is_java_lang_Object() && + tinst->offset() == Type::OffsetBot) ) { + mem = mmem->memory_at(alias_idx); + // Update input if it is progress over what we have now + } + return mem; +} + +// +// Search memory chain of "mem" to find a MemNode whose address +// is the specified alias index. +// +Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArray &orig_phis, PhaseGVN *phase) { + if (orig_mem == NULL) + return orig_mem; + Compile* C = phase->C; + const TypeOopPtr *tinst = C->get_adr_type(alias_idx)->isa_oopptr(); + bool is_instance = (tinst != NULL) && tinst->is_instance(); + Node *prev = NULL; + Node *result = orig_mem; + while (prev != result) { + prev = result; + if (result->is_Mem()) { + MemNode *mem = result->as_Mem(); + const Type *at = phase->type(mem->in(MemNode::Address)); + if (at != Type::TOP) { + assert (at->isa_ptr() != NULL, "pointer type required."); + int idx = C->get_alias_index(at->is_ptr()); + if (idx == alias_idx) + break; + } + result = mem->in(MemNode::Memory); + } + if (!is_instance) + continue; // don't search further for non-instance types + // skip over a call which does not affect this memory slice + if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) { + Node *proj_in = result->in(0); + if (proj_in->is_Call()) { + CallNode *call = proj_in->as_Call(); + if (!call->may_modify(tinst, phase)) { + result = call->in(TypeFunc::Memory); + } + } else if (proj_in->is_Initialize()) { + AllocateNode* alloc = proj_in->as_Initialize()->allocation(); + // Stop if this is the initialization for the object instance which + // which contains this memory slice, otherwise skip over it. + if (alloc == NULL || alloc->_idx != tinst->instance_id()) { + result = proj_in->in(TypeFunc::Memory); + } + } else if (proj_in->is_MemBar()) { + result = proj_in->in(TypeFunc::Memory); + } + } else if (result->is_MergeMem()) { + MergeMemNode *mmem = result->as_MergeMem(); + result = step_through_mergemem(mmem, alias_idx, tinst); + if (result == mmem->base_memory()) { + // Didn't find instance memory, search through general slice recursively. + result = mmem->memory_at(C->get_general_index(alias_idx)); + result = find_inst_mem(result, alias_idx, orig_phis, phase); + if (C->failing()) { + return NULL; + } + mmem->set_memory_at(alias_idx, result); + } + } else if (result->is_Phi() && + C->get_alias_index(result->as_Phi()->adr_type()) != alias_idx) { + Node *un = result->as_Phi()->unique_input(phase); + if (un != NULL) { + result = un; + } else { + break; + } + } + } + if (is_instance && result->is_Phi()) { + PhiNode *mphi = result->as_Phi(); + assert(mphi->bottom_type() == Type::MEMORY, "memory phi required"); + const TypePtr *t = mphi->adr_type(); + if (C->get_alias_index(t) != alias_idx) { + result = split_memory_phi(mphi, alias_idx, orig_phis, phase); + } + } + // the result is either MemNode, PhiNode, InitializeNode. + return result; +} + + // // Convert the types of unescaped object to instance types where possible, // propagate the new type information through the graph, and update memory @@ -564,51 +803,101 @@ VectorSet visited(Thread::current()->resource_area()); VectorSet ptset(Thread::current()->resource_area()); - // Phase 1: Process possible allocations from alloc_worklist. Create instance - // types for the CheckCastPP for allocations where possible. + + // Phase 1: Process possible allocations from alloc_worklist. + // Create instance types for the CheckCastPP for allocations where possible. while (alloc_worklist.length() != 0) { Node *n = alloc_worklist.pop(); uint ni = n->_idx; + const TypeOopPtr* tinst = NULL; if (n->is_Call()) { CallNode *alloc = n->as_Call(); // copy escape information to call node - PointsToNode ptn = _nodes->at(alloc->_idx); + PointsToNode* ptn = _nodes->adr_at(alloc->_idx); PointsToNode::EscapeState es = escape_state(alloc, igvn); - alloc->_escape_state = es; + // We have an allocation or call which returns a Java object, + // see if it is unescaped. + if (es != PointsToNode::NoEscape || !ptn->_scalar_replaceable) + continue; + if (alloc->is_Allocate()) { + // Set the scalar_replaceable flag before the next check. + alloc->as_Allocate()->_is_scalar_replaceable = true; + } // find CheckCastPP of call return value - n = alloc->proj_out(TypeFunc::Parms); - if (n != NULL && n->outcnt() == 1) { - n = n->unique_out(); - if (n->Opcode() != Op_CheckCastPP) { + n = alloc->result_cast(); + if (n == NULL || // No uses accept Initialize or + !n->is_CheckCastPP()) // not unique CheckCastPP. + continue; + // The inline code for Object.clone() casts the allocation result to + // java.lang.Object and then to the the actual type of the allocated + // object. Detect this case and use the second cast. + if (alloc->is_Allocate() && n->as_Type()->type() == TypeInstPtr::NOTNULL + && igvn->type(alloc->in(AllocateNode::KlassNode)) != TypeKlassPtr::OBJECT) { + Node *cast2 = NULL; + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node *use = n->fast_out(i); + if (use->is_CheckCastPP()) { + cast2 = use; + break; + } + } + if (cast2 != NULL) { + n = cast2; + } else { continue; } - } else { - continue; } - // we have an allocation or call which returns a Java object, see if it is unescaped - if (es != PointsToNode::NoEscape || !ptn._unique_type) { - continue; // can't make a unique type - } + set_escape_state(n->_idx, es); + // in order for an object to be stackallocatable, it must be: + // - a direct allocation (not a call returning an object) + // - non-escaping + // - eligible to be a unique type + // - not determined to be ineligible by escape analysis set_map(alloc->_idx, n); set_map(n->_idx, alloc); - const TypeInstPtr *t = igvn->type(n)->isa_instptr(); - // Unique types which are arrays are not currently supported. - // The check for AllocateArray is needed in case an array - // allocation is immediately cast to Object - if (t == NULL || alloc->is_AllocateArray()) + const TypeOopPtr *t = igvn->type(n)->isa_oopptr(); + if (t == NULL) continue; // not a TypeInstPtr - const TypeOopPtr *tinst = t->cast_to_instance(ni); + tinst = t->cast_to_instance(ni); igvn->hash_delete(n); igvn->set_type(n, tinst); n->raise_bottom_type(tinst); igvn->hash_insert(n); + record_for_optimizer(n); + if (alloc->is_Allocate() && ptn->_scalar_replaceable && + (t->isa_instptr() || t->isa_aryptr())) { + // An allocation may have an Initialize which has raw stores. Scan + // the users of the raw allocation result and push AddP users + // on alloc_worklist. + Node *raw_result = alloc->proj_out(TypeFunc::Parms); + assert (raw_result != NULL, "must have an allocation result"); + for (DUIterator_Fast imax, i = raw_result->fast_outs(imax); i < imax; i++) { + Node *use = raw_result->fast_out(i); + if (use->is_AddP() && use->outcnt() > 0) { // Don't process dead nodes + Node* addp2 = find_second_addp(use, raw_result); + if (addp2 != NULL) { + assert(alloc->is_AllocateArray(),"array allocation was expected"); + alloc_worklist.append_if_missing(addp2); + } + alloc_worklist.append_if_missing(use); + } else if (use->is_Initialize()) { + memnode_worklist.append_if_missing(use); + } + } + } } else if (n->is_AddP()) { ptset.Clear(); - PointsTo(ptset, n->in(AddPNode::Address), igvn); + PointsTo(ptset, get_addp_base(n), igvn); assert(ptset.Size() == 1, "AddP address is unique"); - Node *base = get_map(ptset.getelem()); + uint elem = ptset.getelem(); // Allocation node's index + if (elem == _phantom_object) + continue; // Assume the value was set outside this method. + Node *base = get_map(elem); // CheckCastPP node split_AddP(n, base, igvn); - } else if (n->is_Phi() || n->Opcode() == Op_CastPP || n->Opcode() == Op_CheckCastPP) { + tinst = igvn->type(base)->isa_oopptr(); + } else if (n->is_Phi() || + n->is_CheckCastPP() || + (n->is_ConstraintCast() && n->Opcode() == Op_CastPP)) { if (visited.test_set(n->_idx)) { assert(n->is_Phi(), "loops only through Phi's"); continue; // already processed @@ -616,17 +905,23 @@ ptset.Clear(); PointsTo(ptset, n, igvn); if (ptset.Size() == 1) { + uint elem = ptset.getelem(); // Allocation node's index + if (elem == _phantom_object) + continue; // Assume the value was set outside this method. + Node *val = get_map(elem); // CheckCastPP node TypeNode *tn = n->as_Type(); - Node *val = get_map(ptset.getelem()); - const TypeInstPtr *val_t = igvn->type(val)->isa_instptr();; - assert(val_t != NULL && val_t->is_instance(), "instance type expected."); - const TypeInstPtr *tn_t = igvn->type(tn)->isa_instptr();; + tinst = igvn->type(val)->isa_oopptr(); + assert(tinst != NULL && tinst->is_instance() && + tinst->instance_id() == elem , "instance type expected."); + const TypeOopPtr *tn_t = igvn->type(tn)->isa_oopptr(); - if (tn_t != NULL && val_t->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE)->higher_equal(tn_t)) { + if (tn_t != NULL && + tinst->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE)->higher_equal(tn_t)) { igvn->hash_delete(tn); - igvn->set_type(tn, val_t); - tn->set_type(val_t); + igvn->set_type(tn, tinst); + tn->set_type(tinst); igvn->hash_insert(tn); + record_for_optimizer(n); } } } else { @@ -636,13 +931,38 @@ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node *use = n->fast_out(i); if(use->is_Mem() && use->in(MemNode::Address) == n) { - memnode_worklist.push(use); - } else if (use->is_AddP() || use->is_Phi() || use->Opcode() == Op_CastPP || use->Opcode() == Op_CheckCastPP) { - alloc_worklist.push(use); + memnode_worklist.append_if_missing(use); + } else if (use->is_Initialize()) { + memnode_worklist.append_if_missing(use); + } else if (use->is_MergeMem()) { + mergemem_worklist.append_if_missing(use); + } else if (use->is_Call() && tinst != NULL) { + // Look for MergeMem nodes for calls which reference unique allocation + // (through CheckCastPP nodes) even for debug info. + Node* m = use->in(TypeFunc::Memory); + uint iid = tinst->instance_id(); + while (m->is_Proj() && m->in(0)->is_Call() && + m->in(0) != use && !m->in(0)->_idx != iid) { + m = m->in(0)->in(TypeFunc::Memory); + } + if (m->is_MergeMem()) { + mergemem_worklist.append_if_missing(m); + } + } else if (use->is_AddP() && use->outcnt() > 0) { // No dead nodes + Node* addp2 = find_second_addp(use, n); + if (addp2 != NULL) { + alloc_worklist.append_if_missing(addp2); + } + alloc_worklist.append_if_missing(use); + } else if (use->is_Phi() || + use->is_CheckCastPP() || + (use->is_ConstraintCast() && use->Opcode() == Op_CastPP)) { + alloc_worklist.append_if_missing(use); } } } + // New alias types were created in split_AddP(). uint new_index_end = (uint) _compile->num_alias_types(); // Phase 2: Process MemNode's from memnode_worklist. compute new address type and @@ -651,29 +971,37 @@ if (memnode_worklist.length() == 0) return; // nothing to do - while (memnode_worklist.length() != 0) { Node *n = memnode_worklist.pop(); + if (visited.test_set(n->_idx)) + continue; if (n->is_Phi()) { assert(n->as_Phi()->adr_type() != TypePtr::BOTTOM, "narrow memory slice required"); // we don't need to do anything, but the users must be pushed if we haven't processed // this Phi before - if (visited.test_set(n->_idx)) + } else if (n->is_Initialize()) { + // we don't need to do anything, but the users of the memory projection must be pushed + n = n->as_Initialize()->proj_out(TypeFunc::Memory); + if (n == NULL) continue; } else { assert(n->is_Mem(), "memory node required."); Node *addr = n->in(MemNode::Address); + assert(addr->is_AddP(), "AddP required"); const Type *addr_t = igvn->type(addr); if (addr_t == Type::TOP) continue; assert (addr_t->isa_ptr() != NULL, "pointer type required."); int alias_idx = _compile->get_alias_index(addr_t->is_ptr()); - Node *mem = find_mem(n->in(MemNode::Memory), alias_idx, igvn); - if (mem->is_Phi()) { - mem = split_memory_phi(mem->as_Phi(), alias_idx, orig_phis, igvn); + assert ((uint)alias_idx < new_index_end, "wrong alias index"); + Node *mem = find_inst_mem(n->in(MemNode::Memory), alias_idx, orig_phis, igvn); + if (_compile->failing()) { + return; } - if (mem != n->in(MemNode::Memory)) + if (mem != n->in(MemNode::Memory)) { set_map(n->_idx, mem); + _nodes->adr_at(n->_idx)->_node = n; + } if (n->is_Load()) { continue; // don't push users } else if (n->is_LoadStore()) { @@ -692,29 +1020,33 @@ for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node *use = n->fast_out(i); if (use->is_Phi()) { - memnode_worklist.push(use); + memnode_worklist.append_if_missing(use); } else if(use->is_Mem() && use->in(MemNode::Memory) == n) { - memnode_worklist.push(use); + memnode_worklist.append_if_missing(use); + } else if (use->is_Initialize()) { + memnode_worklist.append_if_missing(use); } else if (use->is_MergeMem()) { - mergemem_worklist.push(use); + mergemem_worklist.append_if_missing(use); } } } - // Phase 3: Process MergeMem nodes from mergemem_worklist. Walk each memory slice - // moving the first node encountered of each instance type to the - // the input corresponding to its alias index. + // Phase 3: Process MergeMem nodes from mergemem_worklist. + // Walk each memory moving the first node encountered of each + // instance type to the the input corresponding to its alias index. while (mergemem_worklist.length() != 0) { Node *n = mergemem_worklist.pop(); assert(n->is_MergeMem(), "MergeMem node required."); + if (visited.test_set(n->_idx)) + continue; MergeMemNode *nmm = n->as_MergeMem(); // Note: we don't want to use MergeMemStream here because we only want to - // scan inputs which exist at the start, not ones we add during processing + // scan inputs which exist at the start, not ones we add during processing. uint nslices = nmm->req(); igvn->hash_delete(nmm); for (uint i = Compile::AliasIdxRaw+1; i < nslices; i++) { - Node * mem = nmm->in(i); - Node * cur = NULL; + Node* mem = nmm->in(i); + Node* cur = NULL; if (mem == NULL || mem->is_top()) continue; while (mem->is_Mem()) { @@ -734,26 +1066,76 @@ mem = mem->in(MemNode::Memory); } nmm->set_memory_at(i, (cur != NULL) ? cur : mem); - if (mem->is_Phi()) { - // We have encountered a Phi, we need to split the Phi for - // any instance of the current type if we haven't encountered - // a value of the instance along the chain. - for (uint ni = new_index_start; ni < new_index_end; ni++) { - if((uint)_compile->get_general_index(ni) == i) { - Node *m = (ni >= nmm->req()) ? nmm->empty_memory() : nmm->in(ni); - if (nmm->is_empty_memory(m)) { - nmm->set_memory_at(ni, split_memory_phi(mem->as_Phi(), ni, orig_phis, igvn)); + // Find any instance of the current type if we haven't encountered + // a value of the instance along the chain. + for (uint ni = new_index_start; ni < new_index_end; ni++) { + if((uint)_compile->get_general_index(ni) == i) { + Node *m = (ni >= nmm->req()) ? nmm->empty_memory() : nmm->in(ni); + if (nmm->is_empty_memory(m)) { + Node* result = find_inst_mem(mem, ni, orig_phis, igvn); + if (_compile->failing()) { + return; + } + nmm->set_memory_at(ni, result); + } + } + } + } + // Find the rest of instances values + for (uint ni = new_index_start; ni < new_index_end; ni++) { + const TypeOopPtr *tinst = igvn->C->get_adr_type(ni)->isa_oopptr(); + Node* result = step_through_mergemem(nmm, ni, tinst); + if (result == nmm->base_memory()) { + // Didn't find instance memory, search through general slice recursively. + result = nmm->memory_at(igvn->C->get_general_index(ni)); + result = find_inst_mem(result, ni, orig_phis, igvn); + if (_compile->failing()) { + return; + } + nmm->set_memory_at(ni, result); + } + } + igvn->hash_insert(nmm); + record_for_optimizer(nmm); + + // Propagate new memory slices to following MergeMem nodes. + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node *use = n->fast_out(i); + if (use->is_Call()) { + CallNode* in = use->as_Call(); + if (in->proj_out(TypeFunc::Memory) != NULL) { + Node* m = in->proj_out(TypeFunc::Memory); + for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) { + Node* mm = m->fast_out(j); + if (mm->is_MergeMem()) { + mergemem_worklist.append_if_missing(mm); + } + } + } + if (use->is_Allocate()) { + use = use->as_Allocate()->initialization(); + if (use == NULL) { + continue; + } + } + } + if (use->is_Initialize()) { + InitializeNode* in = use->as_Initialize(); + if (in->proj_out(TypeFunc::Memory) != NULL) { + Node* m = in->proj_out(TypeFunc::Memory); + for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) { + Node* mm = m->fast_out(j); + if (mm->is_MergeMem()) { + mergemem_worklist.append_if_missing(mm); } } } } } - igvn->hash_insert(nmm); - record_for_optimizer(nmm); } - // Phase 4: Update the inputs of non-instance memory Phis and the Memory input of memnodes - // + // Phase 4: Update the inputs of non-instance memory Phis and + // the Memory input of memnodes // First update the inputs of any non-instance Phi's from // which we split out an instance Phi. Note we don't have // to recursively process Phi's encounted on the input memory @@ -765,7 +1147,10 @@ igvn->hash_delete(phi); for (uint i = 1; i < phi->req(); i++) { Node *mem = phi->in(i); - Node *new_mem = find_mem(mem, alias_idx, igvn); + Node *new_mem = find_inst_mem(mem, alias_idx, orig_phis, igvn); + if (_compile->failing()) { + return; + } if (mem != new_mem) { phi->set_req(i, new_mem); } @@ -779,7 +1164,7 @@ for (int i = 0; i < _nodes->length(); i++) { Node *nmem = get_map(i); if (nmem != NULL) { - Node *n = _nodes->at(i)._node; + Node *n = _nodes->adr_at(i)->_node; if (n != NULL && n->is_Mem()) { igvn->hash_delete(n); n->set_req(MemNode::Memory, nmem); @@ -791,59 +1176,110 @@ } void ConnectionGraph::compute_escape() { - GrowableArray worklist; - GrowableArray alloc_worklist; - VectorSet visited(Thread::current()->resource_area()); - PhaseGVN *igvn = _compile->initial_gvn(); + + // 1. Populate Connection Graph with Ideal nodes. + + Unique_Node_List worklist_init; + worklist_init.map(_compile->unique(), NULL); // preallocate space + + // Initialize worklist + if (_compile->root() != NULL) { + worklist_init.push(_compile->root()); + } + + GrowableArray cg_worklist; + PhaseGVN* igvn = _compile->initial_gvn(); + bool has_allocations = false; + + // Push all useful nodes onto CG list and set their type. + for( uint next = 0; next < worklist_init.size(); ++next ) { + Node* n = worklist_init.at(next); + record_for_escape_analysis(n, igvn); + if (n->is_Call() && + _nodes->adr_at(n->_idx)->node_type() == PointsToNode::JavaObject) { + has_allocations = true; + } + if(n->is_AddP()) + cg_worklist.append(n->_idx); + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* m = n->fast_out(i); // Get user + worklist_init.push(m); + } + } - // process Phi nodes from the deferred list, they may not have - while(_deferred.size() > 0) { - Node * n = _deferred.pop(); - PhiNode * phi = n->as_Phi(); + if (has_allocations) { + _has_allocations = true; + } else { + _has_allocations = false; + _collecting = false; + return; // Nothing to do. + } + + // 2. First pass to create simple CG edges (doesn't require to walk CG). + for( uint next = 0; next < _delayed_worklist.size(); ++next ) { + Node* n = _delayed_worklist.at(next); + build_connection_graph(n, igvn); + } - process_phi_escape(phi, igvn); + // 3. Pass to create fields edges (Allocate -F-> AddP). + for( int next = 0; next < cg_worklist.length(); ++next ) { + int ni = cg_worklist.at(next); + build_connection_graph(_nodes->adr_at(ni)->_node, igvn); + } + + cg_worklist.clear(); + cg_worklist.append(_phantom_object); + + // 4. Build Connection Graph which need + // to walk the connection graph. + for (uint ni = 0; ni < (uint)_nodes->length(); ni++) { + PointsToNode* ptn = _nodes->adr_at(ni); + Node *n = ptn->_node; + if (n != NULL) { // Call, AddP, LoadP, StoreP + build_connection_graph(n, igvn); + if (ptn->node_type() != PointsToNode::UnknownType) + cg_worklist.append(n->_idx); // Collect CG nodes + } } VectorSet ptset(Thread::current()->resource_area()); + GrowableArray alloc_worklist; + GrowableArray worklist; // remove deferred edges from the graph and collect // information we will need for type splitting - for (uint ni = 0; ni < (uint)_nodes->length(); ni++) { - PointsToNode * ptn = _nodes->adr_at(ni); + for( int next = 0; next < cg_worklist.length(); ++next ) { + int ni = cg_worklist.at(next); + PointsToNode* ptn = _nodes->adr_at(ni); PointsToNode::NodeType nt = ptn->node_type(); - - if (nt == PointsToNode::UnknownType) { - continue; // not a node we are interested in - } Node *n = ptn->_node; if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) { remove_deferred(ni); if (n->is_AddP()) { - // if this AddP computes an address which may point to more that one - // object, nothing the address points to can be a unique type. - Node *base = n->in(AddPNode::Base); + // If this AddP computes an address which may point to more that one + // object, nothing the address points to can be scalar replaceable. + Node *base = get_addp_base(n); ptset.Clear(); PointsTo(ptset, base, igvn); if (ptset.Size() > 1) { for( VectorSetI j(&ptset); j.test(); ++j ) { - PointsToNode *ptaddr = _nodes->adr_at(j.elem); - ptaddr->_unique_type = false; + uint pt = j.elem; + ptnode_adr(pt)->_scalar_replaceable = false; } } } - } else if (n->is_Call()) { - // initialize _escape_state of calls to GlobalEscape - n->as_Call()->_escape_state = PointsToNode::GlobalEscape; - // push call on alloc_worlist (alocations are calls) - // for processing by split_unique_types() - alloc_worklist.push(n); + } else if (nt == PointsToNode::JavaObject && n->is_Call()) { + // Push call on alloc_worlist (alocations are calls) + // for processing by split_unique_types(). + alloc_worklist.append(n); } } + // push all GlobalEscape nodes on the worklist - for (uint nj = 0; nj < (uint)_nodes->length(); nj++) { - if (_nodes->at(nj).escape_state() == PointsToNode::GlobalEscape) { - worklist.append(nj); - } + for( int next = 0; next < cg_worklist.length(); ++next ) { + int nk = cg_worklist.at(next); + if (_nodes->adr_at(nk)->escape_state() == PointsToNode::GlobalEscape) + worklist.append(nk); } // mark all node reachable from GlobalEscape nodes while(worklist.length() > 0) { @@ -851,7 +1287,7 @@ for (uint ei = 0; ei < n.edge_count(); ei++) { uint npi = n.edge_target(ei); PointsToNode *np = ptnode_adr(npi); - if (np->escape_state() != PointsToNode::GlobalEscape) { + if (np->escape_state() < PointsToNode::GlobalEscape) { np->set_escape_state(PointsToNode::GlobalEscape); worklist.append_if_missing(npi); } @@ -859,128 +1295,191 @@ } // push all ArgEscape nodes on the worklist - for (uint nk = 0; nk < (uint)_nodes->length(); nk++) { - if (_nodes->at(nk).escape_state() == PointsToNode::ArgEscape) + for( int next = 0; next < cg_worklist.length(); ++next ) { + int nk = cg_worklist.at(next); + if (_nodes->adr_at(nk)->escape_state() == PointsToNode::ArgEscape) worklist.push(nk); } // mark all node reachable from ArgEscape nodes while(worklist.length() > 0) { PointsToNode n = _nodes->at(worklist.pop()); - for (uint ei = 0; ei < n.edge_count(); ei++) { uint npi = n.edge_target(ei); PointsToNode *np = ptnode_adr(npi); - if (np->escape_state() != PointsToNode::ArgEscape) { + if (np->escape_state() < PointsToNode::ArgEscape) { np->set_escape_state(PointsToNode::ArgEscape); worklist.append_if_missing(npi); } } } + + // push all NoEscape nodes on the worklist + for( int next = 0; next < cg_worklist.length(); ++next ) { + int nk = cg_worklist.at(next); + if (_nodes->adr_at(nk)->escape_state() == PointsToNode::NoEscape) + worklist.push(nk); + } + // mark all node reachable from NoEscape nodes + while(worklist.length() > 0) { + PointsToNode n = _nodes->at(worklist.pop()); + for (uint ei = 0; ei < n.edge_count(); ei++) { + uint npi = n.edge_target(ei); + PointsToNode *np = ptnode_adr(npi); + if (np->escape_state() < PointsToNode::NoEscape) { + np->set_escape_state(PointsToNode::NoEscape); + worklist.append_if_missing(npi); + } + } + } + _collecting = false; - // Now use the escape information to create unique types for - // unescaped objects - split_unique_types(alloc_worklist); -} + has_allocations = false; // Are there scalar replaceable allocations? -Node * ConnectionGraph::skip_casts(Node *n) { - while(n->Opcode() == Op_CastPP || n->Opcode() == Op_CheckCastPP) { - n = n->in(1); + for( int next = 0; next < alloc_worklist.length(); ++next ) { + Node* n = alloc_worklist.at(next); + uint ni = n->_idx; + PointsToNode* ptn = _nodes->adr_at(ni); + PointsToNode::EscapeState es = ptn->escape_state(); + if (ptn->escape_state() == PointsToNode::NoEscape && + ptn->_scalar_replaceable) { + has_allocations = true; + break; + } } - return n; -} - -void ConnectionGraph::process_phi_escape(PhiNode *phi, PhaseTransform *phase) { - - if (phi->type()->isa_oopptr() == NULL) - return; // nothing to do if not an oop - - PointsToNode *ptadr = ptnode_adr(phi->_idx); - int incount = phi->req(); - int non_null_inputs = 0; + if (!has_allocations) { + return; // Nothing to do. + } - for (int i = 1; i < incount ; i++) { - if (phi->in(i) != NULL) - non_null_inputs++; - } - if (non_null_inputs == ptadr->_inputs_processed) - return; // no new inputs since the last time this node was processed, - // the current information is valid + if(_compile->AliasLevel() >= 3 && EliminateAllocations) { + // Now use the escape information to create unique types for + // unescaped objects + split_unique_types(alloc_worklist); + if (_compile->failing()) return; + + // Clean up after split unique types. + ResourceMark rm; + PhaseRemoveUseless pru(_compile->initial_gvn(), _compile->for_igvn()); - ptadr->_inputs_processed = non_null_inputs; // prevent recursive processing of this node - for (int j = 1; j < incount ; j++) { - Node * n = phi->in(j); - if (n == NULL) - continue; // ignore NULL - n = skip_casts(n); - if (n->is_top() || n == phi) - continue; // ignore top or inputs which go back this node - int nopc = n->Opcode(); - PointsToNode npt = _nodes->at(n->_idx); - if (_nodes->at(n->_idx).node_type() == PointsToNode::JavaObject) { - add_pointsto_edge(phi->_idx, n->_idx); - } else { - add_deferred_edge(phi->_idx, n->_idx); +#ifdef ASSERT + } else if (PrintEscapeAnalysis || PrintEliminateAllocations) { + tty->print("=== No allocations eliminated for "); + C()->method()->print_short_name(); + if(!EliminateAllocations) { + tty->print(" since EliminateAllocations is off ==="); + } else if(_compile->AliasLevel() < 3) { + tty->print(" since AliasLevel < 3 ==="); } + tty->cr(); +#endif } } void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *phase) { - _processed.set(call->_idx); switch (call->Opcode()) { - - // arguments to allocation and locking don't escape +#ifdef ASSERT case Op_Allocate: case Op_AllocateArray: case Op_Lock: case Op_Unlock: + assert(false, "should be done already"); break; +#endif + case Op_CallLeafNoFP: + { + // Stub calls, objects do not escape but they are not scale replaceable. + // Adjust escape state for outgoing arguments. + const TypeTuple * d = call->tf()->domain(); + VectorSet ptset(Thread::current()->resource_area()); + for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { + const Type* at = d->field_at(i); + Node *arg = call->in(i)->uncast(); + const Type *aat = phase->type(arg); + if (!arg->is_top() && at->isa_ptr() && aat->isa_ptr()) { + assert(aat == Type::TOP || aat == TypePtr::NULL_PTR || + aat->isa_ptr() != NULL, "expecting an Ptr"); + set_escape_state(arg->_idx, PointsToNode::ArgEscape); + if (arg->is_AddP()) { + // + // The inline_native_clone() case when the arraycopy stub is called + // after the allocation before Initialize and CheckCastPP nodes. + // + // Set AddP's base (Allocate) as not scalar replaceable since + // pointer to the base (with offset) is passed as argument. + // + arg = get_addp_base(arg); + } + ptset.Clear(); + PointsTo(ptset, arg, phase); + for( VectorSetI j(&ptset); j.test(); ++j ) { + uint pt = j.elem; + set_escape_state(pt, PointsToNode::ArgEscape); + } + } + } + break; + } case Op_CallStaticJava: // For a static call, we know exactly what method is being called. // Use bytecode estimator to record the call's escape affects { ciMethod *meth = call->as_CallJava()->method(); - if (meth != NULL) { + BCEscapeAnalyzer *call_analyzer = (meth !=NULL) ? meth->get_bcea() : NULL; + // fall-through if not a Java method or no analyzer information + if (call_analyzer != NULL) { const TypeTuple * d = call->tf()->domain(); - BCEscapeAnalyzer call_analyzer(meth); VectorSet ptset(Thread::current()->resource_area()); + bool copy_dependencies = false; for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { const Type* at = d->field_at(i); int k = i - TypeFunc::Parms; if (at->isa_oopptr() != NULL) { - Node *arg = skip_casts(call->in(i)); - - if (!call_analyzer.is_arg_stack(k)) { - // The argument global escapes, mark everything it could point to - ptset.Clear(); - PointsTo(ptset, arg, phase); - for( VectorSetI j(&ptset); j.test(); ++j ) { - uint pt = j.elem; + Node *arg = call->in(i)->uncast(); - set_escape_state(pt, PointsToNode::GlobalEscape); + bool global_escapes = false; + bool fields_escapes = false; + if (!call_analyzer->is_arg_stack(k)) { + // The argument global escapes, mark everything it could point to + set_escape_state(arg->_idx, PointsToNode::GlobalEscape); + global_escapes = true; + } else { + if (!call_analyzer->is_arg_local(k)) { + // The argument itself doesn't escape, but any fields might + fields_escapes = true; } - } else if (!call_analyzer.is_arg_local(k)) { - // The argument itself doesn't escape, but any fields might - ptset.Clear(); - PointsTo(ptset, arg, phase); - for( VectorSetI j(&ptset); j.test(); ++j ) { - uint pt = j.elem; - add_edge_from_fields(pt, _phantom_object, Type::OffsetBot); + set_escape_state(arg->_idx, PointsToNode::ArgEscape); + copy_dependencies = true; + } + + ptset.Clear(); + PointsTo(ptset, arg, phase); + for( VectorSetI j(&ptset); j.test(); ++j ) { + uint pt = j.elem; + if (global_escapes) { + //The argument global escapes, mark everything it could point to + set_escape_state(pt, PointsToNode::GlobalEscape); + } else { + if (fields_escapes) { + // The argument itself doesn't escape, but any fields might + add_edge_from_fields(pt, _phantom_object, Type::OffsetBot); + } + set_escape_state(pt, PointsToNode::ArgEscape); } } } } - call_analyzer.copy_dependencies(C()->dependencies()); + if (copy_dependencies) + call_analyzer->copy_dependencies(C()->dependencies()); break; } - // fall-through if not a Java method } default: - // Some other type of call, assume the worst case: all arguments + // Fall-through here if not a Java method or no analyzer information + // or some other type of call, assume the worst case: all arguments // globally escape. { // adjust escape state for outgoing arguments @@ -988,15 +1487,15 @@ VectorSet ptset(Thread::current()->resource_area()); for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { const Type* at = d->field_at(i); - if (at->isa_oopptr() != NULL) { - Node *arg = skip_casts(call->in(i)); + Node *arg = call->in(i)->uncast(); + set_escape_state(arg->_idx, PointsToNode::GlobalEscape); ptset.Clear(); PointsTo(ptset, arg, phase); for( VectorSetI j(&ptset); j.test(); ++j ) { uint pt = j.elem; - set_escape_state(pt, PointsToNode::GlobalEscape); + PointsToNode *ptadr = ptnode_adr(pt); } } } @@ -1004,15 +1503,9 @@ } } void ConnectionGraph::process_call_result(ProjNode *resproj, PhaseTransform *phase) { - CallNode *call = resproj->in(0)->as_Call(); - PointsToNode *ptadr = ptnode_adr(resproj->_idx); - ptadr->_node = resproj; - ptadr->set_node_type(PointsToNode::LocalVar); - set_escape_state(resproj->_idx, PointsToNode::UnknownEscape); - _processed.set(resproj->_idx); - + CallNode *call = resproj->in(0)->as_Call(); switch (call->Opcode()) { case Op_Allocate: { @@ -1028,36 +1521,40 @@ ciInstanceKlass* ciik = cik->as_instance_klass(); PointsToNode *ptadr = ptnode_adr(call->_idx); - ptadr->set_node_type(PointsToNode::JavaObject); + PointsToNode::EscapeState es; + uint edge_to; if (cik->is_subclass_of(_compile->env()->Thread_klass()) || ciik->has_finalizer()) { - set_escape_state(call->_idx, PointsToNode::GlobalEscape); - add_pointsto_edge(resproj->_idx, _phantom_object); + es = PointsToNode::GlobalEscape; + edge_to = _phantom_object; // Could not be worse } else { - set_escape_state(call->_idx, PointsToNode::NoEscape); - add_pointsto_edge(resproj->_idx, call->_idx); + es = PointsToNode::NoEscape; + edge_to = call->_idx; } - _processed.set(call->_idx); + set_escape_state(call->_idx, es); + add_pointsto_edge(resproj->_idx, edge_to); + _processed.set(resproj->_idx); break; } case Op_AllocateArray: { PointsToNode *ptadr = ptnode_adr(call->_idx); - ptadr->set_node_type(PointsToNode::JavaObject); + int length = call->in(AllocateNode::ALength)->find_int_con(-1); + if (length < 0 || length > EliminateAllocationArraySizeLimit) { + // Not scalar replaceable if the length is not constant or too big. + ptadr->_scalar_replaceable = false; + } set_escape_state(call->_idx, PointsToNode::NoEscape); - _processed.set(call->_idx); add_pointsto_edge(resproj->_idx, call->_idx); + _processed.set(resproj->_idx); break; } - case Op_Lock: - case Op_Unlock: - break; - case Op_CallStaticJava: // For a static call, we know exactly what method is being called. // Use bytecode estimator to record whether the call's return value escapes { + bool done = true; const TypeTuple *r = call->tf()->range(); const Type* ret_type = NULL; @@ -1066,32 +1563,45 @@ // Note: we use isa_ptr() instead of isa_oopptr() here because the // _multianewarray functions return a TypeRawPtr. - if (ret_type == NULL || ret_type->isa_ptr() == NULL) + if (ret_type == NULL || ret_type->isa_ptr() == NULL) { + _processed.set(resproj->_idx); break; // doesn't return a pointer type - + } ciMethod *meth = call->as_CallJava()->method(); + const TypeTuple * d = call->tf()->domain(); if (meth == NULL) { // not a Java method, assume global escape set_escape_state(call->_idx, PointsToNode::GlobalEscape); if (resproj != NULL) add_pointsto_edge(resproj->_idx, _phantom_object); } else { - BCEscapeAnalyzer call_analyzer(meth); + BCEscapeAnalyzer *call_analyzer = meth->get_bcea(); VectorSet ptset(Thread::current()->resource_area()); + bool copy_dependencies = false; - if (call_analyzer.is_return_local() && resproj != NULL) { + if (call_analyzer->is_return_allocated()) { + // Returns a newly allocated unescaped object, simply + // update dependency information. + // Mark it as NoEscape so that objects referenced by + // it's fields will be marked as NoEscape at least. + set_escape_state(call->_idx, PointsToNode::NoEscape); + if (resproj != NULL) + add_pointsto_edge(resproj->_idx, call->_idx); + copy_dependencies = true; + } else if (call_analyzer->is_return_local() && resproj != NULL) { // determine whether any arguments are returned - const TypeTuple * d = call->tf()->domain(); set_escape_state(call->_idx, PointsToNode::NoEscape); for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { const Type* at = d->field_at(i); if (at->isa_oopptr() != NULL) { - Node *arg = skip_casts(call->in(i)); + Node *arg = call->in(i)->uncast(); - if (call_analyzer.is_arg_returned(i - TypeFunc::Parms)) { + if (call_analyzer->is_arg_returned(i - TypeFunc::Parms)) { PointsToNode *arg_esp = _nodes->adr_at(arg->_idx); - if (arg_esp->node_type() == PointsToNode::JavaObject) + if (arg_esp->node_type() == PointsToNode::UnknownType) + done = false; + else if (arg_esp->node_type() == PointsToNode::JavaObject) add_pointsto_edge(resproj->_idx, arg->_idx); else add_deferred_edge(resproj->_idx, arg->_idx); @@ -1099,13 +1609,25 @@ } } } + copy_dependencies = true; } else { set_escape_state(call->_idx, PointsToNode::GlobalEscape); if (resproj != NULL) add_pointsto_edge(resproj->_idx, _phantom_object); + for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { + const Type* at = d->field_at(i); + if (at->isa_oopptr() != NULL) { + Node *arg = call->in(i)->uncast(); + PointsToNode *arg_esp = _nodes->adr_at(arg->_idx); + arg_esp->_hidden_alias = true; + } + } } - call_analyzer.copy_dependencies(C()->dependencies()); + if (copy_dependencies) + call_analyzer->copy_dependencies(C()->dependencies()); } + if (done) + _processed.set(resproj->_idx); break; } @@ -1114,7 +1636,6 @@ // returned value, if any, globally escapes. { const TypeTuple *r = call->tf()->range(); - if (r->cnt() > TypeFunc::Parms) { const Type* ret_type = r->field_at(TypeFunc::Parms); @@ -1122,142 +1643,385 @@ // _multianewarray functions return a TypeRawPtr. if (ret_type->isa_ptr() != NULL) { PointsToNode *ptadr = ptnode_adr(call->_idx); - ptadr->set_node_type(PointsToNode::JavaObject); set_escape_state(call->_idx, PointsToNode::GlobalEscape); if (resproj != NULL) add_pointsto_edge(resproj->_idx, _phantom_object); } } - } - } -} - -void ConnectionGraph::record_for_escape_analysis(Node *n) { - if (_collecting) { - if (n->is_Phi()) { - PhiNode *phi = n->as_Phi(); - const Type *pt = phi->type(); - if ((pt->isa_oopptr() != NULL) || pt == TypePtr::NULL_PTR) { - PointsToNode *ptn = ptnode_adr(phi->_idx); - ptn->set_node_type(PointsToNode::LocalVar); - ptn->_node = n; - _deferred.push(n); - } + _processed.set(resproj->_idx); } } } -void ConnectionGraph::record_escape_work(Node *n, PhaseTransform *phase) { +// Populate Connection Graph with Ideal nodes and create simple +// connection graph edges (do not need to check the node_type of inputs +// or to call PointsTo() to walk the connection graph). +void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase) { + if (_processed.test(n->_idx)) + return; // No need to redefine node's state. + + if (n->is_Call()) { + // Arguments to allocation and locking don't escape. + if (n->is_Allocate()) { + add_node(n, PointsToNode::JavaObject, PointsToNode::UnknownEscape, true); + record_for_optimizer(n); + } else if (n->is_Lock() || n->is_Unlock()) { + // Put Lock and Unlock nodes on IGVN worklist to process them during + // the first IGVN optimization when escape information is still available. + record_for_optimizer(n); + _processed.set(n->_idx); + } else { + // Have to process call's arguments first. + PointsToNode::NodeType nt = PointsToNode::UnknownType; + + // Check if a call returns an object. + const TypeTuple *r = n->as_Call()->tf()->range(); + if (r->cnt() > TypeFunc::Parms && + n->as_Call()->proj_out(TypeFunc::Parms) != NULL) { + // Note: use isa_ptr() instead of isa_oopptr() here because + // the _multianewarray functions return a TypeRawPtr. + if (r->field_at(TypeFunc::Parms)->isa_ptr() != NULL) { + nt = PointsToNode::JavaObject; + } + } + add_node(n, nt, PointsToNode::UnknownEscape, false); + } + return; + } + + // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because + // ThreadLocal has RawPrt type. + switch (n->Opcode()) { + case Op_AddP: + { + add_node(n, PointsToNode::Field, PointsToNode::UnknownEscape, false); + break; + } + case Op_CastX2P: + { // "Unsafe" memory access. + add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, true); + break; + } + case Op_CastPP: + case Op_CheckCastPP: + { + add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false); + int ti = n->in(1)->_idx; + PointsToNode::NodeType nt = _nodes->adr_at(ti)->node_type(); + if (nt == PointsToNode::UnknownType) { + _delayed_worklist.push(n); // Process it later. + break; + } else if (nt == PointsToNode::JavaObject) { + add_pointsto_edge(n->_idx, ti); + } else { + add_deferred_edge(n->_idx, ti); + } + _processed.set(n->_idx); + break; + } + case Op_ConP: + { + // assume all pointer constants globally escape except for null + PointsToNode::EscapeState es; + if (phase->type(n) == TypePtr::NULL_PTR) + es = PointsToNode::NoEscape; + else + es = PointsToNode::GlobalEscape; - int opc = n->Opcode(); + add_node(n, PointsToNode::JavaObject, es, true); + break; + } + case Op_CreateEx: + { + // assume that all exception objects globally escape + add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, true); + break; + } + case Op_LoadKlass: + { + add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, true); + break; + } + case Op_LoadP: + { + const Type *t = phase->type(n); + if (t->isa_ptr() == NULL) { + _processed.set(n->_idx); + return; + } + add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false); + break; + } + case Op_Parm: + { + _processed.set(n->_idx); // No need to redefine it state. + uint con = n->as_Proj()->_con; + if (con < TypeFunc::Parms) + return; + const Type *t = n->in(0)->as_Start()->_domain->field_at(con); + if (t->isa_ptr() == NULL) + return; + // We have to assume all input parameters globally escape + // (Note: passing 'false' since _processed is already set). + add_node(n, PointsToNode::JavaObject, PointsToNode::GlobalEscape, false); + break; + } + case Op_Phi: + { + if (n->as_Phi()->type()->isa_ptr() == NULL) { + // nothing to do if not an oop + _processed.set(n->_idx); + return; + } + add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false); + uint i; + for (i = 1; i < n->req() ; i++) { + Node* in = n->in(i); + if (in == NULL) + continue; // ignore NULL + in = in->uncast(); + if (in->is_top() || in == n) + continue; // ignore top or inputs which go back this node + int ti = in->_idx; + PointsToNode::NodeType nt = _nodes->adr_at(ti)->node_type(); + if (nt == PointsToNode::UnknownType) { + break; + } else if (nt == PointsToNode::JavaObject) { + add_pointsto_edge(n->_idx, ti); + } else { + add_deferred_edge(n->_idx, ti); + } + } + if (i >= n->req()) + _processed.set(n->_idx); + else + _delayed_worklist.push(n); + break; + } + case Op_Proj: + { + // we are only interested in the result projection from a call + if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() ) { + add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false); + process_call_result(n->as_Proj(), phase); + if (!_processed.test(n->_idx)) { + // The call's result may need to be processed later if the call + // returns it's argument and the argument is not processed yet. + _delayed_worklist.push(n); + } + } else { + _processed.set(n->_idx); + } + break; + } + case Op_Return: + { + if( n->req() > TypeFunc::Parms && + phase->type(n->in(TypeFunc::Parms))->isa_oopptr() ) { + // Treat Return value as LocalVar with GlobalEscape escape state. + add_node(n, PointsToNode::LocalVar, PointsToNode::GlobalEscape, false); + int ti = n->in(TypeFunc::Parms)->_idx; + PointsToNode::NodeType nt = _nodes->adr_at(ti)->node_type(); + if (nt == PointsToNode::UnknownType) { + _delayed_worklist.push(n); // Process it later. + break; + } else if (nt == PointsToNode::JavaObject) { + add_pointsto_edge(n->_idx, ti); + } else { + add_deferred_edge(n->_idx, ti); + } + } + _processed.set(n->_idx); + break; + } + case Op_StoreP: + { + const Type *adr_type = phase->type(n->in(MemNode::Address)); + if (adr_type->isa_oopptr()) { + add_node(n, PointsToNode::UnknownType, PointsToNode::UnknownEscape, false); + } else { + Node* adr = n->in(MemNode::Address); + if (adr->is_AddP() && phase->type(adr) == TypeRawPtr::NOTNULL && + adr->in(AddPNode::Address)->is_Proj() && + adr->in(AddPNode::Address)->in(0)->is_Allocate()) { + add_node(n, PointsToNode::UnknownType, PointsToNode::UnknownEscape, false); + // We are computing a raw address for a store captured + // by an Initialize compute an appropriate address type. + int offs = (int)phase->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot); + assert(offs != Type::OffsetBot, "offset must be a constant"); + } else { + _processed.set(n->_idx); + return; + } + } + break; + } + case Op_StorePConditional: + case Op_CompareAndSwapP: + { + const Type *adr_type = phase->type(n->in(MemNode::Address)); + if (adr_type->isa_oopptr()) { + add_node(n, PointsToNode::UnknownType, PointsToNode::UnknownEscape, false); + } else { + _processed.set(n->_idx); + return; + } + break; + } + case Op_ThreadLocal: + { + add_node(n, PointsToNode::JavaObject, PointsToNode::ArgEscape, true); + break; + } + default: + ; + // nothing to do + } + return; +} + +void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) { + // Don't set processed bit for AddP, LoadP, StoreP since + // they may need more then one pass to process. + if (_processed.test(n->_idx)) + return; // No need to redefine node's state. + PointsToNode *ptadr = ptnode_adr(n->_idx); - if (_processed.test(n->_idx)) - return; - - ptadr->_node = n; if (n->is_Call()) { CallNode *call = n->as_Call(); process_call_arguments(call, phase); + _processed.set(n->_idx); return; } - switch (opc) { + switch (n->Opcode()) { case Op_AddP: { - Node *base = skip_casts(n->in(AddPNode::Base)); - ptadr->set_node_type(PointsToNode::Field); - - // create a field edge to this node from everything adr could point to + Node *base = get_addp_base(n); + // Create a field edge to this node from everything base could point to. VectorSet ptset(Thread::current()->resource_area()); PointsTo(ptset, base, phase); for( VectorSetI i(&ptset); i.test(); ++i ) { uint pt = i.elem; - add_field_edge(pt, n->_idx, type_to_offset(phase->type(n))); + add_field_edge(pt, n->_idx, address_offset(n, phase)); + } + break; + } + case Op_CastX2P: + { + assert(false, "Op_CastX2P"); + break; + } + case Op_CastPP: + case Op_CheckCastPP: + { + int ti = n->in(1)->_idx; + if (_nodes->adr_at(ti)->node_type() == PointsToNode::JavaObject) { + add_pointsto_edge(n->_idx, ti); + } else { + add_deferred_edge(n->_idx, ti); + } + _processed.set(n->_idx); + break; + } + case Op_ConP: + { + assert(false, "Op_ConP"); + break; + } + case Op_CreateEx: + { + assert(false, "Op_CreateEx"); + break; + } + case Op_LoadKlass: + { + assert(false, "Op_LoadKlass"); + break; + } + case Op_LoadP: + { + const Type *t = phase->type(n); +#ifdef ASSERT + if (t->isa_ptr() == NULL) + assert(false, "Op_LoadP"); +#endif + + Node* adr = n->in(MemNode::Address)->uncast(); + const Type *adr_type = phase->type(adr); + Node* adr_base; + if (adr->is_AddP()) { + adr_base = get_addp_base(adr); + } else { + adr_base = adr; + } + + // For everything "adr_base" could point to, create a deferred edge from + // this node to each field with the same offset. + VectorSet ptset(Thread::current()->resource_area()); + PointsTo(ptset, adr_base, phase); + int offset = address_offset(adr, phase); + for( VectorSetI i(&ptset); i.test(); ++i ) { + uint pt = i.elem; + add_deferred_edge_to_fields(n->_idx, pt, offset); } break; } case Op_Parm: { - ProjNode *nproj = n->as_Proj(); - uint con = nproj->_con; - if (con < TypeFunc::Parms) - return; - const Type *t = nproj->in(0)->as_Start()->_domain->field_at(con); - if (t->isa_ptr() == NULL) - return; - ptadr->set_node_type(PointsToNode::JavaObject); - if (t->isa_oopptr() != NULL) { - set_escape_state(n->_idx, PointsToNode::ArgEscape); - } else { - // this must be the incoming state of an OSR compile, we have to assume anything - // passed in globally escapes - assert(_compile->is_osr_compilation(), "bad argument type for non-osr compilation"); - set_escape_state(n->_idx, PointsToNode::GlobalEscape); - } - _processed.set(n->_idx); + assert(false, "Op_Parm"); break; } case Op_Phi: { - PhiNode *phi = n->as_Phi(); - if (phi->type()->isa_oopptr() == NULL) - return; // nothing to do if not an oop - ptadr->set_node_type(PointsToNode::LocalVar); - process_phi_escape(phi, phase); - break; - } - case Op_CreateEx: - { - // assume that all exception objects globally escape - ptadr->set_node_type(PointsToNode::JavaObject); - set_escape_state(n->_idx, PointsToNode::GlobalEscape); - _processed.set(n->_idx); - break; - } - case Op_ConP: - { - const Type *t = phase->type(n); - ptadr->set_node_type(PointsToNode::JavaObject); - // assume all pointer constants globally escape except for null - if (t == TypePtr::NULL_PTR) - set_escape_state(n->_idx, PointsToNode::NoEscape); - else - set_escape_state(n->_idx, PointsToNode::GlobalEscape); +#ifdef ASSERT + if (n->as_Phi()->type()->isa_ptr() == NULL) + assert(false, "Op_Phi"); +#endif + for (uint i = 1; i < n->req() ; i++) { + Node* in = n->in(i); + if (in == NULL) + continue; // ignore NULL + in = in->uncast(); + if (in->is_top() || in == n) + continue; // ignore top or inputs which go back this node + int ti = in->_idx; + if (_nodes->adr_at(in->_idx)->node_type() == PointsToNode::JavaObject) { + add_pointsto_edge(n->_idx, ti); + } else { + add_deferred_edge(n->_idx, ti); + } + } _processed.set(n->_idx); break; } - case Op_LoadKlass: + case Op_Proj: { - ptadr->set_node_type(PointsToNode::JavaObject); - set_escape_state(n->_idx, PointsToNode::GlobalEscape); - _processed.set(n->_idx); + // we are only interested in the result projection from a call + if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() ) { + process_call_result(n->as_Proj(), phase); + assert(_processed.test(n->_idx), "all call results should be processed"); + } else { + assert(false, "Op_Proj"); + } break; } - case Op_LoadP: + case Op_Return: { - const Type *t = phase->type(n); - if (!t->isa_oopptr()) - return; - ptadr->set_node_type(PointsToNode::LocalVar); - set_escape_state(n->_idx, PointsToNode::UnknownEscape); - - Node *adr = skip_casts(n->in(MemNode::Address)); - const Type *adr_type = phase->type(adr); - Node *adr_base = skip_casts((adr->Opcode() == Op_AddP) ? adr->in(AddPNode::Base) : adr); - - // For everything "adr" could point to, create a deferred edge from - // this node to each field with the same offset as "adr_type" - VectorSet ptset(Thread::current()->resource_area()); - PointsTo(ptset, adr_base, phase); - // If ptset is empty, then this value must have been set outside - // this method, so we add the phantom node - if (ptset.Size() == 0) - ptset.set(_phantom_object); - for( VectorSetI i(&ptset); i.test(); ++i ) { - uint pt = i.elem; - add_deferred_edge_to_fields(n->_idx, pt, type_to_offset(adr_type)); +#ifdef ASSERT + if( n->req() <= TypeFunc::Parms || + !phase->type(n->in(TypeFunc::Parms))->isa_oopptr() ) { + assert(false, "Op_Return"); } +#endif + int ti = n->in(TypeFunc::Parms)->_idx; + if (_nodes->adr_at(ti)->node_type() == PointsToNode::JavaObject) { + add_pointsto_edge(n->_idx, ti); + } else { + add_deferred_edge(n->_idx, ti); + } + _processed.set(n->_idx); break; } case Op_StoreP: @@ -1265,45 +2029,28 @@ case Op_CompareAndSwapP: { Node *adr = n->in(MemNode::Address); - Node *val = skip_casts(n->in(MemNode::ValueIn)); const Type *adr_type = phase->type(adr); +#ifdef ASSERT if (!adr_type->isa_oopptr()) - return; + assert(phase->type(adr) == TypeRawPtr::NOTNULL, "Op_StoreP"); +#endif - assert(adr->Opcode() == Op_AddP, "expecting an AddP"); - Node *adr_base = adr->in(AddPNode::Base); - - // For everything "adr_base" could point to, create a deferred edge to "val" from each field - // with the same offset as "adr_type" + assert(adr->is_AddP(), "expecting an AddP"); + Node *adr_base = get_addp_base(adr); + Node *val = n->in(MemNode::ValueIn)->uncast(); + // For everything "adr_base" could point to, create a deferred edge + // to "val" from each field with the same offset. VectorSet ptset(Thread::current()->resource_area()); PointsTo(ptset, adr_base, phase); for( VectorSetI i(&ptset); i.test(); ++i ) { uint pt = i.elem; - add_edge_from_fields(pt, val->_idx, type_to_offset(adr_type)); + add_edge_from_fields(pt, val->_idx, address_offset(adr, phase)); } break; } - case Op_Proj: + case Op_ThreadLocal: { - ProjNode *nproj = n->as_Proj(); - Node *n0 = nproj->in(0); - // we are only interested in the result projection from a call - if (nproj->_con == TypeFunc::Parms && n0->is_Call() ) { - process_call_result(nproj, phase); - } - - break; - } - case Op_CastPP: - case Op_CheckCastPP: - { - ptadr->set_node_type(PointsToNode::LocalVar); - int ti = n->in(1)->_idx; - if (_nodes->at(ti).node_type() == PointsToNode::JavaObject) { - add_pointsto_edge(n->_idx, ti); - } else { - add_deferred_edge(n->_idx, ti); - } + assert(false, "Op_ThreadLocal"); break; } default: @@ -1312,34 +2059,48 @@ } } -void ConnectionGraph::record_escape(Node *n, PhaseTransform *phase) { - if (_collecting) - record_escape_work(n, phase); -} - #ifndef PRODUCT void ConnectionGraph::dump() { PhaseGVN *igvn = _compile->initial_gvn(); bool first = true; - for (uint ni = 0; ni < (uint)_nodes->length(); ni++) { - PointsToNode *esp = _nodes->adr_at(ni); - if (esp->node_type() == PointsToNode::UnknownType || esp->_node == NULL) + uint size = (uint)_nodes->length(); + for (uint ni = 0; ni < size; ni++) { + PointsToNode *ptn = _nodes->adr_at(ni); + PointsToNode::NodeType ptn_type = ptn->node_type(); + + if (ptn_type != PointsToNode::JavaObject || ptn->_node == NULL) continue; - PointsToNode::EscapeState es = escape_state(esp->_node, igvn); - if (es == PointsToNode::NoEscape || (Verbose && - (es != PointsToNode::UnknownEscape || esp->edge_count() != 0))) { - // don't print null pointer node which almost every method has - if (esp->_node->Opcode() != Op_ConP || igvn->type(esp->_node) != TypePtr::NULL_PTR) { - if (first) { - tty->print("======== Connection graph for "); - C()->method()->print_short_name(); - tty->cr(); - first = false; + PointsToNode::EscapeState es = escape_state(ptn->_node, igvn); + if (ptn->_node->is_Allocate() && (es == PointsToNode::NoEscape || Verbose)) { + if (first) { + tty->cr(); + tty->print("======== Connection graph for "); + C()->method()->print_short_name(); + tty->cr(); + first = false; + } + tty->print("%6d ", ni); + ptn->dump(); + // Print all locals which reference this allocation + for (uint li = ni; li < size; li++) { + PointsToNode *ptn_loc = _nodes->adr_at(li); + PointsToNode::NodeType ptn_loc_type = ptn_loc->node_type(); + if ( ptn_loc_type == PointsToNode::LocalVar && ptn_loc->_node != NULL && + ptn_loc->edge_count() == 1 && ptn_loc->edge_target(0) == ni ) { + tty->print("%6d LocalVar [[%d]]", li, ni); + _nodes->adr_at(li)->_node->dump(); } - tty->print("%4d ", ni); - esp->dump(); } + if (Verbose) { + // Print all fields which reference this allocation + for (uint i = 0; i < ptn->edge_count(); i++) { + uint ei = ptn->edge_target(i); + tty->print("%6d Field [[%d]]", ei, ni); + _nodes->adr_at(ei)->_node->dump(); + } + } + tty->cr(); } } } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/escape.hpp --- a/src/share/vm/opto/escape.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/escape.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -25,14 +25,15 @@ // // Adaptation for C2 of the escape analysis algorithm described in: // -// [Choi99] Jong-Deok Shoi, Manish Gupta, Mauricio Seffano, Vugranam C. Sreedhar, -// Sam Midkiff, "Escape Analysis for Java", Procedings of ACM SIGPLAN -// OOPSLA Conference, November 1, 1999 +// [Choi99] Jong-Deok Shoi, Manish Gupta, Mauricio Seffano, +// Vugranam C. Sreedhar, Sam Midkiff, +// "Escape Analysis for Java", Procedings of ACM SIGPLAN +// OOPSLA Conference, November 1, 1999 // // The flow-insensitive analysis described in the paper has been implemented. // -// The analysis requires construction of a "connection graph" (CG) for the method being -// analyzed. The nodes of the connection graph are: +// The analysis requires construction of a "connection graph" (CG) for +// the method being analyzed. The nodes of the connection graph are: // // - Java objects (JO) // - Local variables (LV) @@ -40,47 +41,51 @@ // // The CG contains 3 types of edges: // -// - PointsTo (-P>) {LV,OF} to JO -// - Deferred (-D>) from {LV, OF} to {LV, OF} +// - PointsTo (-P>) {LV, OF} to JO +// - Deferred (-D>) from {LV, OF} to {LV, OF} // - Field (-F>) from JO to OF // // The following utility functions is used by the algorithm: // -// PointsTo(n) - n is any CG node, it returns the set of JO that n could -// point to. +// PointsTo(n) - n is any CG node, it returns the set of JO that n could +// point to. // -// The algorithm describes how to construct the connection graph in the following 4 cases: +// The algorithm describes how to construct the connection graph +// in the following 4 cases: // // Case Edges Created // -// (1) p = new T() LV -P> JO -// (2) p = q LV -D> LV -// (3) p.f = q JO -F> OF, OF -D> LV -// (4) p = q.f JO -F> OF, LV -D> OF +// (1) p = new T() LV -P> JO +// (2) p = q LV -D> LV +// (3) p.f = q JO -F> OF, OF -D> LV +// (4) p = q.f JO -F> OF, LV -D> OF // -// In all these cases, p and q are local variables. For static field references, we can -// construct a local variable containing a reference to the static memory. +// In all these cases, p and q are local variables. For static field +// references, we can construct a local variable containing a reference +// to the static memory. // // C2 does not have local variables. However for the purposes of constructing // the connection graph, the following IR nodes are treated as local variables: // Phi (pointer values) // LoadP -// Proj (value returned from callnodes including allocations) -// CheckCastPP +// Proj#5 (value returned from callnodes including allocations) +// CheckCastPP, CastPP // -// The LoadP, Proj and CheckCastPP behave like variables assigned to only once. Only -// a Phi can have multiple assignments. Each input to a Phi is treated +// The LoadP, Proj and CheckCastPP behave like variables assigned to only once. +// Only a Phi can have multiple assignments. Each input to a Phi is treated // as an assignment to it. // -// The following note types are JavaObject: +// The following node types are JavaObject: // // top() // Allocate // AllocateArray // Parm (for incoming arguments) +// CastX2P ("unsafe" operations) // CreateEx // ConP // LoadKlass +// ThreadLocal // // AddP nodes are fields. // @@ -89,7 +94,7 @@ // source. This results in a graph with no deferred edges, only: // // LV -P> JO -// OF -P> JO +// OF -P> JO (the object whose oop is stored in the field) // JO -F> OF // // Then, for each node which is GlobalEscape, anything it could point to @@ -110,17 +115,18 @@ friend class ConnectionGraph; public: typedef enum { - UnknownType = 0, - JavaObject = 1, - LocalVar = 2, - Field = 3 + UnknownType = 0, + JavaObject = 1, + LocalVar = 2, + Field = 3 } NodeType; typedef enum { UnknownEscape = 0, - NoEscape = 1, - ArgEscape = 2, - GlobalEscape = 3 + NoEscape = 1, // A scalar replaceable object with unique type. + ArgEscape = 2, // An object passed as argument or referenced by + // argument (and not globally escape during call). + GlobalEscape = 3 // An object escapes the method and thread. } EscapeState; typedef enum { @@ -140,18 +146,24 @@ NodeType _type; EscapeState _escape; - GrowableArray* _edges; // outgoing edges - int _offset; // for fields + GrowableArray* _edges; // outgoing edges - bool _unique_type; // For allocated objects, this node may be a unique type public: - Node* _node; // Ideal node corresponding to this PointsTo node - int _inputs_processed; // the number of Phi inputs that have been processed so far - bool _hidden_alias; // this node is an argument to a function which may return it - // creating a hidden alias + Node* _node; // Ideal node corresponding to this PointsTo node. + int _offset; // Object fields offsets. + bool _scalar_replaceable;// Not escaped object could be replaced with scalar + bool _hidden_alias; // This node is an argument to a function. + // which may return it creating a hidden alias. + PointsToNode(): + _type(UnknownType), + _escape(UnknownEscape), + _edges(NULL), + _node(NULL), + _offset(-1), + _scalar_replaceable(true), + _hidden_alias(false) {} - PointsToNode(): _offset(-1), _type(UnknownType), _escape(UnknownEscape), _edges(NULL), _node(NULL), _inputs_processed(0), _hidden_alias(false), _unique_type(true) {} EscapeState escape_state() const { return _escape; } NodeType node_type() const { return _type;} @@ -182,22 +194,28 @@ class ConnectionGraph: public ResourceObj { private: - enum { - INITIAL_NODE_COUNT = 100 // initial size of _nodes array - }; + GrowableArray* _nodes; // Connection graph nodes indexed + // by ideal node index. + Unique_Node_List _delayed_worklist; // Nodes to be processed before + // the call build_connection_graph(). + + VectorSet _processed; // Records which nodes have been + // processed. - GrowableArray* _nodes; // connection graph nodes Indexed by ideal - // node index - Unique_Node_List _deferred; // Phi's to be processed after parsing - VectorSet _processed; // records which nodes have been processed - bool _collecting; // indicates whether escape information is - // still being collected. If false, no new - // nodes will be processed - uint _phantom_object; // index of globally escaping object that - // pointer values loaded from a field which - // has not been set are assumed to point to - Compile * _compile; // Compile object for current compilation + bool _collecting; // Indicates whether escape information + // is still being collected. If false, + // no new nodes will be processed. + + bool _has_allocations; // Indicates whether method has any + // non-escaping allocations. + + uint _phantom_object; // Index of globally escaping object + // that pointer values loaded from + // a field which has not been set + // are assumed to point to. + + Compile * _compile; // Compile object for current compilation // address of an element in _nodes. Used when the element is to be modified PointsToNode *ptnode_adr(uint idx) { @@ -208,8 +226,11 @@ return _nodes->adr_at(idx); } + // Add node to ConnectionGraph. + void add_node(Node *n, PointsToNode::NodeType nt, PointsToNode::EscapeState es, bool done); + // offset of a field reference - int type_to_offset(const Type *t); + int address_offset(Node* adr, PhaseTransform *phase); // compute the escape state for arguments to a call void process_call_arguments(CallNode *call, PhaseTransform *phase); @@ -217,12 +238,11 @@ // compute the escape state for the return value of a call void process_call_result(ProjNode *resproj, PhaseTransform *phase); - // compute the escape state of a Phi. This may be called multiple - // times as new inputs are added to the Phi. - void process_phi_escape(PhiNode *phi, PhaseTransform *phase); + // Populate Connection Graph with Ideal nodes. + void record_for_escape_analysis(Node *n, PhaseTransform *phase); - // compute the escape state of an ideal node. - void record_escape_work(Node *n, PhaseTransform *phase); + // Build Connection Graph and set nodes escape state. + void build_connection_graph(Node *n, PhaseTransform *phase); // walk the connection graph starting at the node corresponding to "n" and // add the index of everything it could point to, to "ptset". This may cause @@ -241,8 +261,8 @@ // a pointsto edge is added if it is a JavaObject void add_edge_from_fields(uint adr, uint to_i, int offs); - // Add a deferred edge from node given by "from_i" to any field of adr_i whose offset - // matches "offset" + // Add a deferred edge from node given by "from_i" to any field + // of adr_i whose offset matches "offset" void add_deferred_edge_to_fields(uint from_i, uint adr, int offs); @@ -262,6 +282,8 @@ PhiNode *create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray &orig_phi_worklist, PhaseGVN *igvn, bool &new_created); PhiNode *split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray &orig_phi_worklist, PhaseGVN *igvn); Node *find_mem(Node *mem, int alias_idx, PhaseGVN *igvn); + Node *find_inst_mem(Node *mem, int alias_idx,GrowableArray &orig_phi_worklist, PhaseGVN *igvn); + // Propagate unique types created for unescaped allocated objects // through the graph void split_unique_types(GrowableArray &alloc_worklist); @@ -285,26 +307,24 @@ // Set the escape state of a node void set_escape_state(uint ni, PointsToNode::EscapeState es); - // bypass any casts and return the node they refer to - Node * skip_casts(Node *n); - // Get Compile object for current compilation. Compile *C() const { return _compile; } public: ConnectionGraph(Compile *C); - // record a Phi for later processing. - void record_for_escape_analysis(Node *n); - - // process a node and fill in its connection graph node - void record_escape(Node *n, PhaseTransform *phase); - - // All nodes have been recorded, compute the escape information + // Compute the escape information void compute_escape(); // escape state of a node PointsToNode::EscapeState escape_state(Node *n, PhaseTransform *phase); + // other information we have collected + bool is_scalar_replaceable(Node *n) { + if (_collecting) + return false; + PointsToNode ptn = _nodes->at_grow(n->_idx); + return ptn.escape_state() == PointsToNode::NoEscape && ptn._scalar_replaceable; + } bool hidden_alias(Node *n) { if (_collecting) diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/gcm.cpp --- a/src/share/vm/opto/gcm.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/gcm.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -448,9 +448,9 @@ ResourceArea *area = Thread::current()->resource_area(); Node_List worklist_mem(area); // prior memory state to store Node_List worklist_store(area); // possible-def to explore + Node_List worklist_visited(area); // visited mergemem nodes Node_List non_early_stores(area); // all relevant stores outside of early bool must_raise_LCA = false; - DEBUG_ONLY(VectorSet should_not_repeat(area)); #ifdef TRACK_PHI_INPUTS // %%% This extra checking fails because MergeMem nodes are not GVNed. @@ -479,8 +479,8 @@ Node* initial_mem = load->in(MemNode::Memory); worklist_store.push(initial_mem); + worklist_visited.push(initial_mem); worklist_mem.push(NULL); - DEBUG_ONLY(should_not_repeat.test_set(initial_mem->_idx)); while (worklist_store.size() > 0) { // Examine a nearby store to see if it might interfere with our load. Node* mem = worklist_mem.pop(); @@ -494,18 +494,20 @@ || op == Op_MergeMem // internal node of tree we are searching ) { mem = store; // It's not a possibly interfering store. + if (store == initial_mem) + initial_mem = NULL; // only process initial memory once + for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { store = mem->fast_out(i); if (store->is_MergeMem()) { // Be sure we don't get into combinatorial problems. // (Allow phis to be repeated; they can merge two relevant states.) - uint i = worklist_store.size(); - for (; i > 0; i--) { - if (worklist_store.at(i-1) == store) break; + uint j = worklist_visited.size(); + for (; j > 0; j--) { + if (worklist_visited.at(j-1) == store) break; } - if (i > 0) continue; // already on work list; do not repeat - DEBUG_ONLY(int repeated = should_not_repeat.test_set(store->_idx)); - assert(!repeated, "do not walk merges twice"); + if (j > 0) continue; // already on work list; do not repeat + worklist_visited.push(store); } worklist_mem.push(mem); worklist_store.push(store); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/graphKit.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -857,6 +857,13 @@ for (j = 0; j < l; j++) call->set_req(p++, in_map->in(k+j)); + // Copy any scalar object fields. + k = in_jvms->scloff(); + l = in_jvms->scl_size(); + out_jvms->set_scloff(p); + for (j = 0; j < l; j++) + call->set_req(p++, in_map->in(k+j)); + // Finish the new jvms. out_jvms->set_endoff(p); @@ -864,6 +871,7 @@ assert(out_jvms->depth() == in_jvms->depth(), "depth must match"); assert(out_jvms->loc_size() == in_jvms->loc_size(), "size must match"); assert(out_jvms->mon_size() == in_jvms->mon_size(), "size must match"); + assert(out_jvms->scl_size() == in_jvms->scl_size(), "size must match"); assert(out_jvms->debug_size() == in_jvms->debug_size(), "size must match"); // Update the two tail pointers in parallel. @@ -1447,7 +1455,7 @@ //-------------------------array_element_address------------------------- Node* GraphKit::array_element_address(Node* ary, Node* idx, BasicType elembt, const TypeInt* sizetype) { - uint shift = exact_log2(type2aelembytes[elembt]); + uint shift = exact_log2(type2aelembytes(elembt)); uint header = arrayOopDesc::base_offset_in_bytes(elembt); // short-circuit a common case (saves lots of confusing waste motion) @@ -2808,7 +2816,7 @@ ciInstanceKlass* ik = oop_type->klass()->as_instance_klass(); for (int i = 0, len = ik->nof_nonstatic_fields(); i < len; i++) { ciField* field = ik->nonstatic_field_at(i); - if (field->offset() >= TrackedInitializationLimit) + if (field->offset() >= TrackedInitializationLimit * HeapWordSize) continue; // do not bother to track really large numbers of fields // Find (or create) the alias category for this field: int fieldidx = C->alias_type(field)->index(); @@ -2914,10 +2922,22 @@ const TypeOopPtr* oop_type = tklass->as_instance_type(); // Now generate allocation code + + // With escape analysis, the entire memory state is needed to be able to + // eliminate the allocation. If the allocations cannot be eliminated, this + // will be optimized to the raw slice when the allocation is expanded. + Node *mem; + if (C->do_escape_analysis()) { + mem = reset_memory(); + set_all_memory(mem); + } else { + mem = memory(Compile::AliasIdxRaw); + } + AllocateNode* alloc = new (C, AllocateNode::ParmLimit) AllocateNode(C, AllocateNode::alloc_type(), - control(), memory(Compile::AliasIdxRaw), i_o(), + control(), mem, i_o(), size, klass_node, initial_slow_test); @@ -3048,11 +3068,23 @@ } // Now generate allocation code + + // With escape analysis, the entire memory state is needed to be able to + // eliminate the allocation. If the allocations cannot be eliminated, this + // will be optimized to the raw slice when the allocation is expanded. + Node *mem; + if (C->do_escape_analysis()) { + mem = reset_memory(); + set_all_memory(mem); + } else { + mem = memory(Compile::AliasIdxRaw); + } + // Create the AllocateArrayNode and its result projections AllocateArrayNode* alloc = new (C, AllocateArrayNode::ParmLimit) AllocateArrayNode(C, AllocateArrayNode::alloc_type(), - control(), memory(Compile::AliasIdxRaw), i_o(), + control(), mem, i_o(), size, klass_node, initial_slow_test, length); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/ifnode.cpp --- a/src/share/vm/opto/ifnode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/ifnode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -543,6 +543,159 @@ return NULL; // Dead loop? Or hit root? } + +//------------------------------filtered_int_type-------------------------------- +// Return a possibly more restrictive type for val based on condition control flow for an if +const TypeInt* IfNode::filtered_int_type(PhaseGVN* gvn, Node *val, Node* if_proj) { + assert(if_proj && + (if_proj->Opcode() == Op_IfTrue || if_proj->Opcode() == Op_IfFalse), "expecting an if projection"); + if (if_proj->in(0) && if_proj->in(0)->is_If()) { + IfNode* iff = if_proj->in(0)->as_If(); + if (iff->in(1) && iff->in(1)->is_Bool()) { + BoolNode* bol = iff->in(1)->as_Bool(); + if (bol->in(1) && bol->in(1)->is_Cmp()) { + const CmpNode* cmp = bol->in(1)->as_Cmp(); + if (cmp->in(1) == val) { + const TypeInt* cmp2_t = gvn->type(cmp->in(2))->isa_int(); + if (cmp2_t != NULL) { + jint lo = cmp2_t->_lo; + jint hi = cmp2_t->_hi; + BoolTest::mask msk = if_proj->Opcode() == Op_IfTrue ? bol->_test._test : bol->_test.negate(); + switch (msk) { + case BoolTest::ne: + // Can't refine type + return NULL; + case BoolTest::eq: + return cmp2_t; + case BoolTest::lt: + lo = TypeInt::INT->_lo; + if (hi - 1 < hi) { + hi = hi - 1; + } + break; + case BoolTest::le: + lo = TypeInt::INT->_lo; + break; + case BoolTest::gt: + if (lo + 1 > lo) { + lo = lo + 1; + } + hi = TypeInt::INT->_hi; + break; + case BoolTest::ge: + // lo unchanged + hi = TypeInt::INT->_hi; + break; + } + const TypeInt* rtn_t = TypeInt::make(lo, hi, cmp2_t->_widen); + return rtn_t; + } + } + } + } + } + return NULL; +} + +//------------------------------fold_compares---------------------------- +// See if a pair of CmpIs can be converted into a CmpU. In some cases +// the direction of this if is determined by the preciding if so it +// can be eliminate entirely. Given an if testing (CmpI n c) check +// for an immediately control dependent if that is testing (CmpI n c2) +// and has one projection leading to this if and the other projection +// leading to a region that merges one of this ifs control +// projections. +// +// If +// / | +// / | +// / | +// If | +// /\ | +// / \ | +// / \ | +// / Region +// +Node* IfNode::fold_compares(PhaseGVN* phase) { + if (!EliminateAutoBox || Opcode() != Op_If) return NULL; + + Node* this_cmp = in(1)->in(1); + if (this_cmp != NULL && this_cmp->Opcode() == Op_CmpI && + this_cmp->in(2)->is_Con() && this_cmp->in(2) != phase->C->top()) { + Node* ctrl = in(0); + BoolNode* this_bool = in(1)->as_Bool(); + Node* n = this_cmp->in(1); + int hi = this_cmp->in(2)->get_int(); + if (ctrl != NULL && ctrl->is_Proj() && ctrl->outcnt() == 1 && + ctrl->in(0)->is_If() && + ctrl->in(0)->outcnt() == 2 && + ctrl->in(0)->in(1)->is_Bool() && + ctrl->in(0)->in(1)->in(1)->Opcode() == Op_CmpI && + ctrl->in(0)->in(1)->in(1)->in(2)->is_Con() && + ctrl->in(0)->in(1)->in(1)->in(1) == n) { + IfNode* dom_iff = ctrl->in(0)->as_If(); + Node* otherproj = dom_iff->proj_out(!ctrl->as_Proj()->_con); + if (otherproj->outcnt() == 1 && otherproj->unique_out()->is_Region() && + this_bool->_test._test != BoolTest::ne && this_bool->_test._test != BoolTest::eq) { + // Identify which proj goes to the region and which continues on + RegionNode* region = otherproj->unique_out()->as_Region(); + Node* success = NULL; + Node* fail = NULL; + for (int i = 0; i < 2; i++) { + Node* proj = proj_out(i); + if (success == NULL && proj->outcnt() == 1 && proj->unique_out() == region) { + success = proj; + } else if (fail == NULL) { + fail = proj; + } else { + success = fail = NULL; + } + } + if (success != NULL && fail != NULL && !region->has_phi()) { + int lo = dom_iff->in(1)->in(1)->in(2)->get_int(); + BoolNode* dom_bool = dom_iff->in(1)->as_Bool(); + Node* dom_cmp = dom_bool->in(1); + const TypeInt* failtype = filtered_int_type(phase, n, ctrl); + if (failtype != NULL) { + const TypeInt* type2 = filtered_int_type(phase, n, fail); + if (type2 != NULL) { + failtype = failtype->join(type2)->is_int(); + } else { + failtype = NULL; + } + } + + if (failtype != NULL && + dom_bool->_test._test != BoolTest::ne && dom_bool->_test._test != BoolTest::eq) { + int bound = failtype->_hi - failtype->_lo + 1; + if (failtype->_hi != max_jint && failtype->_lo != min_jint && bound > 1) { + // Merge the two compares into a single unsigned compare by building (CmpU (n - lo) hi) + BoolTest::mask cond = fail->as_Proj()->_con ? BoolTest::lt : BoolTest::ge; + Node* adjusted = phase->transform(new (phase->C, 3) SubINode(n, phase->intcon(failtype->_lo))); + Node* newcmp = phase->transform(new (phase->C, 3) CmpUNode(adjusted, phase->intcon(bound))); + Node* newbool = phase->transform(new (phase->C, 2) BoolNode(newcmp, cond)); + phase->hash_delete(dom_iff); + dom_iff->set_req(1, phase->intcon(ctrl->as_Proj()->_con)); + phase->is_IterGVN()->_worklist.push(dom_iff); + phase->hash_delete(this); + set_req(1, newbool); + return this; + } + if (failtype->_lo > failtype->_hi) { + // previous if determines the result of this if so + // replace Bool with constant + phase->hash_delete(this); + set_req(1, phase->intcon(success->as_Proj()->_con)); + return this; + } + } + } + } + } + } + return NULL; +} + //------------------------------remove_useless_bool---------------------------- // Check for people making a useless boolean: things like // if( (x < y ? true : false) ) { ... } @@ -744,6 +897,11 @@ // Normal equivalent-test check. if( !dom ) return NULL; // Dead loop? + Node* result = fold_compares(phase); + if (result != NULL) { + return result; + } + // Search up the dominator tree for an If with an identical test while( dom->Opcode() != op || // Not same opcode? dom->in(1) != in(1) || // Not same input 1? diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/library_call.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -2097,7 +2097,7 @@ int type_words = type2size[type]; // Cannot inline wide CAS on machines that don't support it natively - if (type2aelembytes[type] > BytesPerInt && !VM_Version::supports_cx8()) + if (type2aelembytes(type) > BytesPerInt && !VM_Version::supports_cx8()) return false; C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". @@ -3975,7 +3975,7 @@ // both indices are constants int s_offs = src_offset_inttype->get_con(); int d_offs = dest_offset_inttype->get_con(); - int element_size = type2aelembytes[t]; + int element_size = type2aelembytes(t); aligned = ((arrayOopDesc::base_offset_in_bytes(t) + s_offs * element_size) % HeapWordSize == 0) && ((arrayOopDesc::base_offset_in_bytes(t) + d_offs * element_size) % HeapWordSize == 0); if (s_offs >= d_offs) disjoint = true; @@ -4170,6 +4170,7 @@ && !_gvn.eqv_uncast(src, dest) && ((alloc = tightly_coupled_allocation(dest, slow_region)) != NULL) + && _gvn.find_int_con(alloc->in(AllocateNode::ALength), 1) > 0 && alloc->maybe_set_complete(&_gvn)) { // "You break it, you buy it." InitializeNode* init = alloc->initialization(); @@ -4389,7 +4390,7 @@ if (alloc != NULL && use_ReduceInitialCardMarks()) { // If we do not need card marks, copy using the jint or jlong stub. copy_type = LP64_ONLY(T_LONG) NOT_LP64(T_INT); - assert(type2aelembytes[basic_elem_type] == type2aelembytes[copy_type], + assert(type2aelembytes(basic_elem_type) == type2aelembytes(copy_type), "sizes agree"); } } @@ -4659,7 +4660,7 @@ Node* mem = memory(adr_type); // memory slice to operate on // scaling and rounding of indexes: - int scale = exact_log2(type2aelembytes[basic_elem_type]); + int scale = exact_log2(type2aelembytes(basic_elem_type)); int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type); int clear_low = (-1 << scale) & (BytesPerInt - 1); int bump_bit = (-1 << scale) & BytesPerInt; @@ -4753,7 +4754,7 @@ Node* dest, Node* dest_offset, Node* dest_size) { // See if there is an advantage from block transfer. - int scale = exact_log2(type2aelembytes[basic_elem_type]); + int scale = exact_log2(type2aelembytes(basic_elem_type)); if (scale >= LogBytesPerLong) return false; // it is already a block transfer diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/locknode.cpp --- a/src/share/vm/opto/locknode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/locknode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -36,7 +36,8 @@ uint BoxLockNode::size_of() const { return sizeof(*this); } -BoxLockNode::BoxLockNode( int slot ) : Node( Compile::current()->root() ), _slot(slot) { +BoxLockNode::BoxLockNode( int slot ) : Node( Compile::current()->root() ), + _slot(slot), _is_eliminated(false) { init_class_id(Class_BoxLock); init_flags(Flag_rematerialize); OptoReg::Name reg = OptoReg::stack2reg(_slot); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/locknode.hpp --- a/src/share/vm/opto/locknode.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/locknode.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -27,6 +27,7 @@ public: const int _slot; RegMask _inmask; + bool _is_eliminated; // indicates this lock was safely eliminated BoxLockNode( int lock ); virtual int Opcode() const; @@ -42,6 +43,10 @@ static OptoReg::Name stack_slot(Node* box_node); + bool is_eliminated() { return _is_eliminated; } + // mark lock as eliminated. + void set_eliminated() { _is_eliminated = true; } + #ifndef PRODUCT virtual void format( PhaseRegAlloc *, outputStream *st ) const; virtual void dump_spec(outputStream *st) const { st->print(" Lock %d",_slot); } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/loopTransform.cpp --- a/src/share/vm/opto/loopTransform.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/loopTransform.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1714,6 +1714,7 @@ // Gate unrolling, RCE and peeling efforts. if( !_child && // If not an inner loop, do not split !_irreducible && + _allow_optimizations && !tail()->is_top() ) { // Also ignore the occasional dead backedge if (!_has_call) { iteration_split_impl( phase, old_new ); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/loopnode.cpp --- a/src/share/vm/opto/loopnode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/loopnode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -651,7 +651,7 @@ while (if_cnt < if_limit) { if ((pred->Opcode() == Op_IfTrue || pred->Opcode() == Op_IfFalse)) { if_cnt++; - const TypeInt* if_t = filtered_type_at_if(val, pred); + const TypeInt* if_t = IfNode::filtered_int_type(&_igvn, val, pred); if (if_t != NULL) { if (rtn_t == NULL) { rtn_t = if_t; @@ -674,59 +674,6 @@ } -//------------------------------filtered_type_at_if-------------------------------- -// Return a possibly more restrictive type for val based on condition control flow for an if -const TypeInt* PhaseIdealLoop::filtered_type_at_if( Node* val, Node *if_proj) { - assert(if_proj && - (if_proj->Opcode() == Op_IfTrue || if_proj->Opcode() == Op_IfFalse), "expecting an if projection"); - if (if_proj->in(0) && if_proj->in(0)->is_If()) { - IfNode* iff = if_proj->in(0)->as_If(); - if (iff->in(1) && iff->in(1)->is_Bool()) { - BoolNode* bol = iff->in(1)->as_Bool(); - if (bol->in(1) && bol->in(1)->is_Cmp()) { - const CmpNode* cmp = bol->in(1)->as_Cmp(); - if (cmp->in(1) == val) { - const TypeInt* cmp2_t = _igvn.type(cmp->in(2))->isa_int(); - if (cmp2_t != NULL) { - jint lo = cmp2_t->_lo; - jint hi = cmp2_t->_hi; - BoolTest::mask msk = if_proj->Opcode() == Op_IfTrue ? bol->_test._test : bol->_test.negate(); - switch (msk) { - case BoolTest::ne: - // Can't refine type - return NULL; - case BoolTest::eq: - return cmp2_t; - case BoolTest::lt: - lo = TypeInt::INT->_lo; - if (hi - 1 < hi) { - hi = hi - 1; - } - break; - case BoolTest::le: - lo = TypeInt::INT->_lo; - break; - case BoolTest::gt: - if (lo + 1 > lo) { - lo = lo + 1; - } - hi = TypeInt::INT->_hi; - break; - case BoolTest::ge: - // lo unchanged - hi = TypeInt::INT->_hi; - break; - } - const TypeInt* rtn_t = TypeInt::make(lo, hi, cmp2_t->_widen); - return rtn_t; - } - } - } - } - } - return NULL; -} - //------------------------------dump_spec-------------------------------------- // Dump special per-node info #ifndef PRODUCT @@ -1614,7 +1561,7 @@ // on just their loop-phi's for this pass of loop opts if( SplitIfBlocks && do_split_ifs ) { if (lpt->policy_range_check(this)) { - lpt->_rce_candidate = true; + lpt->_rce_candidate = 1; // = true } } } @@ -2198,7 +2145,7 @@ // as well? If so, then I found another entry into the loop. while( is_postvisited(l->_head) ) { // found irreducible - l->_irreducible = true; + l->_irreducible = 1; // = true l = l->_parent; _has_irreducible_loops = true; // Check for bad CFG here to prevent crash, and bailout of compile @@ -2252,6 +2199,12 @@ (iff->as_If()->_prob >= 0.01) ) innermost->_has_call = 1; } + } else if( n->is_Allocate() && n->as_Allocate()->_is_scalar_replaceable ) { + // Disable loop optimizations if the loop has a scalar replaceable + // allocation. This disabling may cause a potential performance lost + // if the allocation is not eliminated for some reason. + innermost->_allow_optimizations = false; + innermost->_has_call = 1; // = true } } } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/loopnode.hpp --- a/src/share/vm/opto/loopnode.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/loopnode.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -289,13 +289,15 @@ _has_sfpt:1, // True if has non-call safepoint _rce_candidate:1; // True if candidate for range check elimination - Node_List* _required_safept; // A inner loop cannot delete these safepts; + Node_List* _required_safept; // A inner loop cannot delete these safepts; + bool _allow_optimizations; // Allow loop optimizations IdealLoopTree( PhaseIdealLoop* phase, Node *head, Node *tail ) : _parent(0), _next(0), _child(0), _head(head), _tail(tail), _phase(phase), _required_safept(NULL), + _allow_optimizations(true), _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0) { } @@ -850,7 +852,6 @@ const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); } // Helpers for filtered type const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl); - const TypeInt* filtered_type_at_if( Node* val, Node *if_proj); // Helper functions void register_new_node( Node *n, Node *blk ); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/loopopts.cpp --- a/src/share/vm/opto/loopopts.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/loopopts.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -29,10 +29,26 @@ //------------------------------split_thru_phi--------------------------------- // Split Node 'n' through merge point if there is enough win. Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) { + if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) { + // ConvI2L may have type information on it which is unsafe to push up + // so disable this for now + return NULL; + } int wins = 0; assert( !n->is_CFG(), "" ); assert( region->is_Region(), "" ); - Node *phi = new (C, region->req()) PhiNode( region, n->bottom_type() ); + + const Type* type = n->bottom_type(); + const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr(); + Node *phi; + if( t_oop != NULL && t_oop->is_instance_field() ) { + int iid = t_oop->instance_id(); + int index = C->get_alias_index(t_oop); + int offset = t_oop->offset(); + phi = new (C,region->req()) PhiNode(region, type, NULL, iid, index, offset); + } else { + phi = new (C,region->req()) PhiNode(region, type); + } uint old_unique = C->unique(); for( uint i = 1; i < region->req(); i++ ) { Node *x; @@ -435,9 +451,11 @@ // Check profitability int cost = 0; + int phis = 0; for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { Node *out = region->fast_out(i); if( !out->is_Phi() ) continue; // Ignore other control edges, etc + phis++; PhiNode* phi = out->as_Phi(); switch (phi->type()->basic_type()) { case T_LONG: @@ -489,6 +507,12 @@ } } if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo + Node* bol = iff->in(1); + assert( bol->Opcode() == Op_Bool, "" ); + int cmp_op = bol->in(1)->Opcode(); + // It is expensive to generate flags from a float compare. + // Avoid duplicated float compare. + if( phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL; // -------------- // Now replace all Phis with CMOV's diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/macro.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -54,15 +54,30 @@ uint new_dbg_start = newcall->tf()->domain()->cnt(); int jvms_adj = new_dbg_start - old_dbg_start; assert (new_dbg_start == newcall->req(), "argument count mismatch"); + + Dict* sosn_map = new Dict(cmpkey,hashkey); for (uint i = old_dbg_start; i < oldcall->req(); i++) { - newcall->add_req(oldcall->in(i)); + Node* old_in = oldcall->in(i); + // Clone old SafePointScalarObjectNodes, adjusting their field contents. + if (old_in->is_SafePointScalarObject()) { + SafePointScalarObjectNode* old_sosn = old_in->as_SafePointScalarObject(); + uint old_unique = C->unique(); + Node* new_in = old_sosn->clone(jvms_adj, sosn_map); + if (old_unique != C->unique()) { + new_in = transform_later(new_in); // Register new node. + } + old_in = new_in; + } + newcall->add_req(old_in); } + newcall->set_jvms(oldcall->jvms()); for (JVMState *jvms = newcall->jvms(); jvms != NULL; jvms = jvms->caller()) { jvms->set_map(newcall); jvms->set_locoff(jvms->locoff()+jvms_adj); jvms->set_stkoff(jvms->stkoff()+jvms_adj); jvms->set_monoff(jvms->monoff()+jvms_adj); + jvms->set_scloff(jvms->scloff()+jvms_adj); jvms->set_endoff(jvms->endoff()+jvms_adj); } } @@ -166,6 +181,622 @@ } +// Eliminate a card mark sequence. p2x is a ConvP2XNode +void PhaseMacroExpand::eliminate_card_mark(Node *p2x) { + assert(p2x->Opcode() == Op_CastP2X, "ConvP2XNode required"); + Node *shift = p2x->unique_out(); + Node *addp = shift->unique_out(); + for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) { + Node *st = addp->last_out(j); + assert(st->is_Store(), "store required"); + _igvn.replace_node(st, st->in(MemNode::Memory)); + } +} + +// Search for a memory operation for the specified memory slice. +static Node *scan_mem_chain(Node *mem, int alias_idx, int offset, Node *start_mem, Node *alloc) { + Node *orig_mem = mem; + Node *alloc_mem = alloc->in(TypeFunc::Memory); + while (true) { + if (mem == alloc_mem || mem == start_mem ) { + return mem; // hit one of our sentinals + } else if (mem->is_MergeMem()) { + mem = mem->as_MergeMem()->memory_at(alias_idx); + } else if (mem->is_Proj() && mem->as_Proj()->_con == TypeFunc::Memory) { + Node *in = mem->in(0); + // we can safely skip over safepoints, calls, locks and membars because we + // already know that the object is safe to eliminate. + if (in->is_Initialize() && in->as_Initialize()->allocation() == alloc) { + return in; + } else if (in->is_Call() || in->is_MemBar()) { + mem = in->in(TypeFunc::Memory); + } else { + assert(false, "unexpected projection"); + } + } else if (mem->is_Store()) { + const TypePtr* atype = mem->as_Store()->adr_type(); + int adr_idx = Compile::current()->get_alias_index(atype); + if (adr_idx == alias_idx) { + assert(atype->isa_oopptr(), "address type must be oopptr"); + int adr_offset = atype->offset(); + uint adr_iid = atype->is_oopptr()->instance_id(); + // Array elements references have the same alias_idx + // but different offset and different instance_id. + if (adr_offset == offset && adr_iid == alloc->_idx) + return mem; + } else { + assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw"); + } + mem = mem->in(MemNode::Memory); + } else { + return mem; + } + if (mem == orig_mem) + return mem; + } +} + +// +// Given a Memory Phi, compute a value Phi containing the values from stores +// on the input paths. +// Note: this function is recursive, its depth is limied by the "level" argument +// Returns the computed Phi, or NULL if it cannot compute it. +Node *PhaseMacroExpand::value_from_mem_phi(Node *mem, BasicType ft, const Type *phi_type, const TypeOopPtr *adr_t, Node *alloc, int level) { + + if (level <= 0) { + return NULL; + } + int alias_idx = C->get_alias_index(adr_t); + int offset = adr_t->offset(); + int instance_id = adr_t->instance_id(); + + Node *start_mem = C->start()->proj_out(TypeFunc::Memory); + Node *alloc_mem = alloc->in(TypeFunc::Memory); + + uint length = mem->req(); + GrowableArray values(length, length, NULL); + + for (uint j = 1; j < length; j++) { + Node *in = mem->in(j); + if (in == NULL || in->is_top()) { + values.at_put(j, in); + } else { + Node *val = scan_mem_chain(in, alias_idx, offset, start_mem, alloc); + if (val == start_mem || val == alloc_mem) { + // hit a sentinel, return appropriate 0 value + values.at_put(j, _igvn.zerocon(ft)); + continue; + } + if (val->is_Initialize()) { + val = val->as_Initialize()->find_captured_store(offset, type2aelembytes(ft), &_igvn); + } + if (val == NULL) { + return NULL; // can't find a value on this path + } + if (val == mem) { + values.at_put(j, mem); + } else if (val->is_Store()) { + values.at_put(j, val->in(MemNode::ValueIn)); + } else if(val->is_Proj() && val->in(0) == alloc) { + values.at_put(j, _igvn.zerocon(ft)); + } else if (val->is_Phi()) { + // Check if an appropriate node already exists. + Node* region = val->in(0); + Node* old_phi = NULL; + for (DUIterator_Fast kmax, k = region->fast_outs(kmax); k < kmax; k++) { + Node* phi = region->fast_out(k); + if (phi->is_Phi() && phi != val && + phi->as_Phi()->is_same_inst_field(phi_type, instance_id, alias_idx, offset)) { + old_phi = phi; + break; + } + } + if (old_phi == NULL) { + val = value_from_mem_phi(val, ft, phi_type, adr_t, alloc, level-1); + if (val == NULL) { + return NULL; + } + values.at_put(j, val); + } else { + values.at_put(j, old_phi); + } + } else { + return NULL; // unknown node on this path + } + } + } + // create a new Phi for the value + PhiNode *phi = new (C, length) PhiNode(mem->in(0), phi_type, NULL, instance_id, alias_idx, offset); + for (uint j = 1; j < length; j++) { + if (values.at(j) == mem) { + phi->init_req(j, phi); + } else { + phi->init_req(j, values.at(j)); + } + } + transform_later(phi); + return phi; +} + +// Search the last value stored into the object's field. +Node *PhaseMacroExpand::value_from_mem(Node *sfpt_mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc) { + assert(adr_t->is_instance_field(), "instance required"); + uint instance_id = adr_t->instance_id(); + assert(instance_id == alloc->_idx, "wrong allocation"); + + int alias_idx = C->get_alias_index(adr_t); + int offset = adr_t->offset(); + Node *start_mem = C->start()->proj_out(TypeFunc::Memory); + Node *alloc_ctrl = alloc->in(TypeFunc::Control); + Node *alloc_mem = alloc->in(TypeFunc::Memory); + VectorSet visited(Thread::current()->resource_area()); + + + bool done = sfpt_mem == alloc_mem; + Node *mem = sfpt_mem; + while (!done) { + if (visited.test_set(mem->_idx)) { + return NULL; // found a loop, give up + } + mem = scan_mem_chain(mem, alias_idx, offset, start_mem, alloc); + if (mem == start_mem || mem == alloc_mem) { + done = true; // hit a sentinel, return appropriate 0 value + } else if (mem->is_Initialize()) { + mem = mem->as_Initialize()->find_captured_store(offset, type2aelembytes(ft), &_igvn); + if (mem == NULL) { + done = true; // Something go wrong. + } else if (mem->is_Store()) { + const TypePtr* atype = mem->as_Store()->adr_type(); + assert(C->get_alias_index(atype) == Compile::AliasIdxRaw, "store is correct memory slice"); + done = true; + } + } else if (mem->is_Store()) { + const TypeOopPtr* atype = mem->as_Store()->adr_type()->isa_oopptr(); + assert(atype != NULL, "address type must be oopptr"); + assert(C->get_alias_index(atype) == alias_idx && + atype->is_instance_field() && atype->offset() == offset && + atype->instance_id() == instance_id, "store is correct memory slice"); + done = true; + } else if (mem->is_Phi()) { + // try to find a phi's unique input + Node *unique_input = NULL; + Node *top = C->top(); + for (uint i = 1; i < mem->req(); i++) { + Node *n = scan_mem_chain(mem->in(i), alias_idx, offset, start_mem, alloc); + if (n == NULL || n == top || n == mem) { + continue; + } else if (unique_input == NULL) { + unique_input = n; + } else if (unique_input != n) { + unique_input = top; + break; + } + } + if (unique_input != NULL && unique_input != top) { + mem = unique_input; + } else { + done = true; + } + } else { + assert(false, "unexpected node"); + } + } + if (mem != NULL) { + if (mem == start_mem || mem == alloc_mem) { + // hit a sentinel, return appropriate 0 value + return _igvn.zerocon(ft); + } else if (mem->is_Store()) { + return mem->in(MemNode::ValueIn); + } else if (mem->is_Phi()) { + // attempt to produce a Phi reflecting the values on the input paths of the Phi + Node * phi = value_from_mem_phi(mem, ft, ftype, adr_t, alloc, 8); + if (phi != NULL) { + return phi; + } + } + } + // Something go wrong. + return NULL; +} + +// Check the possibility of scalar replacement. +bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArray & safepoints) { + // Scan the uses of the allocation to check for anything that would + // prevent us from eliminating it. + NOT_PRODUCT( const char* fail_eliminate = NULL; ) + DEBUG_ONLY( Node* disq_node = NULL; ) + bool can_eliminate = true; + + Node* res = alloc->result_cast(); + const TypeOopPtr* res_type = NULL; + if (res == NULL) { + // All users were eliminated. + } else if (!res->is_CheckCastPP()) { + alloc->_is_scalar_replaceable = false; // don't try again + NOT_PRODUCT(fail_eliminate = "Allocation does not have unique CheckCastPP";) + can_eliminate = false; + } else { + res_type = _igvn.type(res)->isa_oopptr(); + if (res_type == NULL) { + NOT_PRODUCT(fail_eliminate = "Neither instance or array allocation";) + can_eliminate = false; + } else if (res_type->isa_aryptr()) { + int length = alloc->in(AllocateNode::ALength)->find_int_con(-1); + if (length < 0) { + NOT_PRODUCT(fail_eliminate = "Array's size is not constant";) + can_eliminate = false; + } + } + } + + if (can_eliminate && res != NULL) { + for (DUIterator_Fast jmax, j = res->fast_outs(jmax); + j < jmax && can_eliminate; j++) { + Node* use = res->fast_out(j); + + if (use->is_AddP()) { + const TypePtr* addp_type = _igvn.type(use)->is_ptr(); + int offset = addp_type->offset(); + + if (offset == Type::OffsetTop || offset == Type::OffsetBot) { + NOT_PRODUCT(fail_eliminate = "Undefined field referrence";) + can_eliminate = false; + break; + } + for (DUIterator_Fast kmax, k = use->fast_outs(kmax); + k < kmax && can_eliminate; k++) { + Node* n = use->fast_out(k); + if (!n->is_Store() && n->Opcode() != Op_CastP2X) { + DEBUG_ONLY(disq_node = n;) + if (n->is_Load()) { + NOT_PRODUCT(fail_eliminate = "Field load";) + } else { + NOT_PRODUCT(fail_eliminate = "Not store field referrence";) + } + can_eliminate = false; + } + } + } else if (use->is_SafePoint()) { + SafePointNode* sfpt = use->as_SafePoint(); + if (sfpt->has_non_debug_use(res)) { + // Object is passed as argument. + DEBUG_ONLY(disq_node = use;) + NOT_PRODUCT(fail_eliminate = "Object is passed as argument";) + can_eliminate = false; + } + Node* sfptMem = sfpt->memory(); + if (sfptMem == NULL || sfptMem->is_top()) { + DEBUG_ONLY(disq_node = use;) + NOT_PRODUCT(fail_eliminate = "NULL or TOP memory";) + can_eliminate = false; + } else { + safepoints.append_if_missing(sfpt); + } + } else if (use->Opcode() != Op_CastP2X) { // CastP2X is used by card mark + if (use->is_Phi()) { + if (use->outcnt() == 1 && use->unique_out()->Opcode() == Op_Return) { + NOT_PRODUCT(fail_eliminate = "Object is return value";) + } else { + NOT_PRODUCT(fail_eliminate = "Object is referenced by Phi";) + } + DEBUG_ONLY(disq_node = use;) + } else { + if (use->Opcode() == Op_Return) { + NOT_PRODUCT(fail_eliminate = "Object is return value";) + }else { + NOT_PRODUCT(fail_eliminate = "Object is referenced by node";) + } + DEBUG_ONLY(disq_node = use;) + } + can_eliminate = false; + } + } + } + +#ifndef PRODUCT + if (PrintEliminateAllocations) { + if (can_eliminate) { + tty->print("Scalar "); + if (res == NULL) + alloc->dump(); + else + res->dump(); + } else { + tty->print("NotScalar (%s)", fail_eliminate); + if (res == NULL) + alloc->dump(); + else + res->dump(); +#ifdef ASSERT + if (disq_node != NULL) { + tty->print(" >>>> "); + disq_node->dump(); + } +#endif /*ASSERT*/ + } + } +#endif + return can_eliminate; +} + +// Do scalar replacement. +bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray & safepoints) { + GrowableArray safepoints_done; + + ciKlass* klass = NULL; + ciInstanceKlass* iklass = NULL; + int nfields = 0; + int array_base; + int element_size; + BasicType basic_elem_type; + ciType* elem_type; + + Node* res = alloc->result_cast(); + const TypeOopPtr* res_type = NULL; + if (res != NULL) { // Could be NULL when there are no users + res_type = _igvn.type(res)->isa_oopptr(); + } + + if (res != NULL) { + klass = res_type->klass(); + if (res_type->isa_instptr()) { + // find the fields of the class which will be needed for safepoint debug information + assert(klass->is_instance_klass(), "must be an instance klass."); + iklass = klass->as_instance_klass(); + nfields = iklass->nof_nonstatic_fields(); + } else { + // find the array's elements which will be needed for safepoint debug information + nfields = alloc->in(AllocateNode::ALength)->find_int_con(-1); + assert(klass->is_array_klass() && nfields >= 0, "must be an array klass."); + elem_type = klass->as_array_klass()->element_type(); + basic_elem_type = elem_type->basic_type(); + array_base = arrayOopDesc::base_offset_in_bytes(basic_elem_type); + element_size = type2aelembytes(basic_elem_type); + } + } + // + // Process the safepoint uses + // + while (safepoints.length() > 0) { + SafePointNode* sfpt = safepoints.pop(); + Node* mem = sfpt->memory(); + uint first_ind = sfpt->req(); + SafePointScalarObjectNode* sobj = new (C, 1) SafePointScalarObjectNode(res_type, +#ifdef ASSERT + alloc, +#endif + first_ind, nfields); + sobj->init_req(0, sfpt->in(TypeFunc::Control)); + transform_later(sobj); + + // Scan object's fields adding an input to the safepoint for each field. + for (int j = 0; j < nfields; j++) { + int offset; + ciField* field = NULL; + if (iklass != NULL) { + field = iklass->nonstatic_field_at(j); + offset = field->offset(); + elem_type = field->type(); + basic_elem_type = field->layout_type(); + } else { + offset = array_base + j * element_size; + } + + const Type *field_type; + // The next code is taken from Parse::do_get_xxx(). + if (basic_elem_type == T_OBJECT) { + if (!elem_type->is_loaded()) { + field_type = TypeInstPtr::BOTTOM; + } else if (field != NULL && field->is_constant()) { + // This can happen if the constant oop is non-perm. + ciObject* con = field->constant_value().as_object(); + // Do not "join" in the previous type; it doesn't add value, + // and may yield a vacuous result if the field is of interface type. + field_type = TypeOopPtr::make_from_constant(con)->isa_oopptr(); + assert(field_type != NULL, "field singleton type must be consistent"); + } else { + field_type = TypeOopPtr::make_from_klass(elem_type->as_klass()); + } + } else { + field_type = Type::get_const_basic_type(basic_elem_type); + } + + const TypeOopPtr *field_addr_type = res_type->add_offset(offset)->isa_oopptr(); + + Node *field_val = value_from_mem(mem, basic_elem_type, field_type, field_addr_type, alloc); + if (field_val == NULL) { + // we weren't able to find a value for this field, + // give up on eliminating this allocation + alloc->_is_scalar_replaceable = false; // don't try again + // remove any extra entries we added to the safepoint + uint last = sfpt->req() - 1; + for (int k = 0; k < j; k++) { + sfpt->del_req(last--); + } + // rollback processed safepoints + while (safepoints_done.length() > 0) { + SafePointNode* sfpt_done = safepoints_done.pop(); + // remove any extra entries we added to the safepoint + last = sfpt_done->req() - 1; + for (int k = 0; k < nfields; k++) { + sfpt_done->del_req(last--); + } + JVMState *jvms = sfpt_done->jvms(); + jvms->set_endoff(sfpt_done->req()); + // Now make a pass over the debug information replacing any references + // to SafePointScalarObjectNode with the allocated object. + int start = jvms->debug_start(); + int end = jvms->debug_end(); + for (int i = start; i < end; i++) { + if (sfpt_done->in(i)->is_SafePointScalarObject()) { + SafePointScalarObjectNode* scobj = sfpt_done->in(i)->as_SafePointScalarObject(); + if (scobj->first_index() == sfpt_done->req() && + scobj->n_fields() == (uint)nfields) { + assert(scobj->alloc() == alloc, "sanity"); + sfpt_done->set_req(i, res); + } + } + } + } +#ifndef PRODUCT + if (PrintEliminateAllocations) { + if (field != NULL) { + tty->print("=== At SafePoint node %d can't find value of Field: ", + sfpt->_idx); + field->print(); + int field_idx = C->get_alias_index(field_addr_type); + tty->print(" (alias_idx=%d)", field_idx); + } else { // Array's element + tty->print("=== At SafePoint node %d can't find value of array element [%d]", + sfpt->_idx, j); + } + tty->print(", which prevents elimination of: "); + if (res == NULL) + alloc->dump(); + else + res->dump(); + } +#endif + return false; + } + sfpt->add_req(field_val); + } + JVMState *jvms = sfpt->jvms(); + jvms->set_endoff(sfpt->req()); + // Now make a pass over the debug information replacing any references + // to the allocated object with "sobj" + int start = jvms->debug_start(); + int end = jvms->debug_end(); + for (int i = start; i < end; i++) { + if (sfpt->in(i) == res) { + sfpt->set_req(i, sobj); + } + } + safepoints_done.append_if_missing(sfpt); // keep it for rollback + } + return true; +} + +// Process users of eliminated allocation. +void PhaseMacroExpand::process_users_of_allocation(AllocateNode *alloc) { + Node* res = alloc->result_cast(); + if (res != NULL) { + for (DUIterator_Last jmin, j = res->last_outs(jmin); j >= jmin; ) { + Node *use = res->last_out(j); + uint oc1 = res->outcnt(); + + if (use->is_AddP()) { + for (DUIterator_Last kmin, k = use->last_outs(kmin); k >= kmin; ) { + Node *n = use->last_out(k); + uint oc2 = use->outcnt(); + if (n->is_Store()) { + _igvn.replace_node(n, n->in(MemNode::Memory)); + } else { + assert( n->Opcode() == Op_CastP2X, "CastP2X required"); + eliminate_card_mark(n); + } + k -= (oc2 - use->outcnt()); + } + } else { + assert( !use->is_SafePoint(), "safepoint uses must have been already elimiated"); + assert( use->Opcode() == Op_CastP2X, "CastP2X required"); + eliminate_card_mark(use); + } + j -= (oc1 - res->outcnt()); + } + assert(res->outcnt() == 0, "all uses of allocated objects must be deleted"); + _igvn.remove_dead_node(res); + } + + // + // Process other users of allocation's projections + // + if (_resproj != NULL && _resproj->outcnt() != 0) { + for (DUIterator_Last jmin, j = _resproj->last_outs(jmin); j >= jmin; ) { + Node *use = _resproj->last_out(j); + uint oc1 = _resproj->outcnt(); + if (use->is_Initialize()) { + // Eliminate Initialize node. + InitializeNode *init = use->as_Initialize(); + assert(init->outcnt() <= 2, "only a control and memory projection expected"); + Node *ctrl_proj = init->proj_out(TypeFunc::Control); + if (ctrl_proj != NULL) { + assert(init->in(TypeFunc::Control) == _fallthroughcatchproj, "allocation control projection"); + _igvn.replace_node(ctrl_proj, _fallthroughcatchproj); + } + Node *mem_proj = init->proj_out(TypeFunc::Memory); + if (mem_proj != NULL) { + Node *mem = init->in(TypeFunc::Memory); +#ifdef ASSERT + if (mem->is_MergeMem()) { + assert(mem->in(TypeFunc::Memory) == _memproj_fallthrough, "allocation memory projection"); + } else { + assert(mem == _memproj_fallthrough, "allocation memory projection"); + } +#endif + _igvn.replace_node(mem_proj, mem); + } + } else if (use->is_AddP()) { + // raw memory addresses used only by the initialization + _igvn.hash_delete(use); + _igvn.subsume_node(use, C->top()); + } else { + assert(false, "only Initialize or AddP expected"); + } + j -= (oc1 - _resproj->outcnt()); + } + } + if (_fallthroughcatchproj != NULL) { + _igvn.replace_node(_fallthroughcatchproj, alloc->in(TypeFunc::Control)); + } + if (_memproj_fallthrough != NULL) { + _igvn.replace_node(_memproj_fallthrough, alloc->in(TypeFunc::Memory)); + } + if (_memproj_catchall != NULL) { + _igvn.replace_node(_memproj_catchall, C->top()); + } + if (_ioproj_fallthrough != NULL) { + _igvn.replace_node(_ioproj_fallthrough, alloc->in(TypeFunc::I_O)); + } + if (_ioproj_catchall != NULL) { + _igvn.replace_node(_ioproj_catchall, C->top()); + } + if (_catchallcatchproj != NULL) { + _igvn.replace_node(_catchallcatchproj, C->top()); + } +} + +bool PhaseMacroExpand::eliminate_allocate_node(AllocateNode *alloc) { + + if (!EliminateAllocations || !alloc->_is_scalar_replaceable) { + return false; + } + + extract_call_projections(alloc); + + GrowableArray safepoints; + if (!can_eliminate_allocation(alloc, safepoints)) { + return false; + } + + if (!scalar_replacement(alloc, safepoints)) { + return false; + } + + process_users_of_allocation(alloc); + +#ifndef PRODUCT +if (PrintEliminateAllocations) { + if (alloc->is_AllocateArray()) + tty->print_cr("++++ Eliminated: %d AllocateArray", alloc->_idx); + else + tty->print_cr("++++ Eliminated: %d Allocate", alloc->_idx); +} +#endif + + return true; +} + //---------------------------set_eden_pointers------------------------- void PhaseMacroExpand::set_eden_pointers(Node* &eden_top_adr, Node* &eden_end_adr) { @@ -270,6 +901,13 @@ Node* klass_node = alloc->in(AllocateNode::KlassNode); Node* initial_slow_test = alloc->in(AllocateNode::InitialTest); + // With escape analysis, the entire memory state was needed to be able to + // eliminate the allocation. Since the allocations cannot be eliminated, + // optimize it to the raw slice. + if (mem->is_MergeMem()) { + mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); + } + Node* eden_top_adr; Node* eden_end_adr; set_eden_pointers(eden_top_adr, eden_end_adr); @@ -813,27 +1451,87 @@ // Note: The membar's associated with the lock/unlock are currently not // eliminated. This should be investigated as a future enhancement. // -void PhaseMacroExpand::eliminate_locking_node(AbstractLockNode *alock) { - Node* mem = alock->in(TypeFunc::Memory); +bool PhaseMacroExpand::eliminate_locking_node(AbstractLockNode *alock) { + + if (!alock->is_eliminated()) { + return false; + } + // Mark the box lock as eliminated if all correspondent locks are eliminated + // to construct correct debug info. + BoxLockNode* box = alock->box_node()->as_BoxLock(); + if (!box->is_eliminated()) { + bool eliminate = true; + for (DUIterator_Fast imax, i = box->fast_outs(imax); i < imax; i++) { + Node *lck = box->fast_out(i); + if (lck->is_Lock() && !lck->as_AbstractLock()->is_eliminated()) { + eliminate = false; + break; + } + } + if (eliminate) + box->set_eliminated(); + } + + #ifndef PRODUCT + if (PrintEliminateLocks) { + if (alock->is_Lock()) { + tty->print_cr("++++ Eliminating: %d Lock", alock->_idx); + } else { + tty->print_cr("++++ Eliminating: %d Unlock", alock->_idx); + } + } + #endif + + Node* mem = alock->in(TypeFunc::Memory); + Node* ctrl = alock->in(TypeFunc::Control); + + extract_call_projections(alock); + // There are 2 projections from the lock. The lock node will + // be deleted when its last use is subsumed below. + assert(alock->outcnt() == 2 && + _fallthroughproj != NULL && + _memproj_fallthrough != NULL, + "Unexpected projections from Lock/Unlock"); + + Node* fallthroughproj = _fallthroughproj; + Node* memproj_fallthrough = _memproj_fallthrough; // The memory projection from a lock/unlock is RawMem // The input to a Lock is merged memory, so extract its RawMem input // (unless the MergeMem has been optimized away.) if (alock->is_Lock()) { - if (mem->is_MergeMem()) - mem = mem->as_MergeMem()->in(Compile::AliasIdxRaw); + // Seach for MemBarAcquire node and delete it also. + MemBarNode* membar = fallthroughproj->unique_ctrl_out()->as_MemBar(); + assert(membar != NULL && membar->Opcode() == Op_MemBarAcquire, ""); + Node* ctrlproj = membar->proj_out(TypeFunc::Control); + Node* memproj = membar->proj_out(TypeFunc::Memory); + _igvn.hash_delete(ctrlproj); + _igvn.subsume_node(ctrlproj, fallthroughproj); + _igvn.hash_delete(memproj); + _igvn.subsume_node(memproj, memproj_fallthrough); } - extract_call_projections(alock); - // There are 2 projections from the lock. The lock node will - // be deleted when its last use is subsumed below. - assert(alock->outcnt() == 2 && _fallthroughproj != NULL && - _memproj_fallthrough != NULL, "Unexpected projections from Lock/Unlock"); - _igvn.hash_delete(_fallthroughproj); - _igvn.subsume_node(_fallthroughproj, alock->in(TypeFunc::Control)); - _igvn.hash_delete(_memproj_fallthrough); - _igvn.subsume_node(_memproj_fallthrough, mem); - return; + // Seach for MemBarRelease node and delete it also. + if (alock->is_Unlock() && ctrl != NULL && ctrl->is_Proj() && + ctrl->in(0)->is_MemBar()) { + MemBarNode* membar = ctrl->in(0)->as_MemBar(); + assert(membar->Opcode() == Op_MemBarRelease && + mem->is_Proj() && membar == mem->in(0), ""); + _igvn.hash_delete(fallthroughproj); + _igvn.subsume_node(fallthroughproj, ctrl); + _igvn.hash_delete(memproj_fallthrough); + _igvn.subsume_node(memproj_fallthrough, mem); + fallthroughproj = ctrl; + memproj_fallthrough = mem; + ctrl = membar->in(TypeFunc::Control); + mem = membar->in(TypeFunc::Memory); + } + + _igvn.hash_delete(fallthroughproj); + _igvn.subsume_node(fallthroughproj, ctrl); + _igvn.hash_delete(memproj_fallthrough); + _igvn.subsume_node(memproj_fallthrough, mem); + return true; } @@ -844,12 +1542,7 @@ Node* mem = lock->in(TypeFunc::Memory); Node* obj = lock->obj_node(); Node* box = lock->box_node(); - Node *flock = lock->fastlock_node(); - - if (lock->is_eliminated()) { - eliminate_locking_node(lock); - return; - } + Node* flock = lock->fastlock_node(); // Make the merge point Node *region = new (C, 3) RegionNode(3); @@ -898,17 +1591,11 @@ //------------------------------expand_unlock_node---------------------- void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) { - Node *ctrl = unlock->in(TypeFunc::Control); + Node* ctrl = unlock->in(TypeFunc::Control); Node* mem = unlock->in(TypeFunc::Memory); Node* obj = unlock->obj_node(); Node* box = unlock->box_node(); - - if (unlock->is_eliminated()) { - eliminate_locking_node(unlock); - return; - } - // No need for a null check on unlock // Make the merge point @@ -958,14 +1645,41 @@ bool PhaseMacroExpand::expand_macro_nodes() { if (C->macro_count() == 0) return false; - // Make sure expansion will not cause node limit to be exceeded. Worst case is a - // macro node gets expanded into about 50 nodes. Allow 50% more for optimization + // attempt to eliminate allocations + bool progress = true; + while (progress) { + progress = false; + for (int i = C->macro_count(); i > 0; i--) { + Node * n = C->macro_node(i-1); + bool success = false; + debug_only(int old_macro_count = C->macro_count();); + switch (n->class_id()) { + case Node::Class_Allocate: + case Node::Class_AllocateArray: + success = eliminate_allocate_node(n->as_Allocate()); + break; + case Node::Class_Lock: + case Node::Class_Unlock: + success = eliminate_locking_node(n->as_AbstractLock()); + break; + default: + assert(false, "unknown node type in macro list"); + } + assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count"); + progress = progress || success; + } + } + // Make sure expansion will not cause node limit to be exceeded. + // Worst case is a macro node gets expanded into about 50 nodes. + // Allow 50% more for optimization. if (C->check_node_count(C->macro_count() * 75, "out of nodes before macro expansion" ) ) return true; + // expand "macro" nodes // nodes are removed from the macro list as they are processed while (C->macro_count() > 0) { - Node * n = C->macro_node(0); + int macro_count = C->macro_count(); + Node * n = C->macro_node(macro_count-1); assert(n->is_macro(), "only macro nodes expected here"); if (_igvn.type(n) == Type::TOP || n->in(0)->is_top() ) { // node is unreachable, so don't try to expand it @@ -988,6 +1702,7 @@ default: assert(false, "unknown node type in macro list"); } + assert(C->macro_count() < macro_count, "must have deleted a node from macro list"); if (C->failing()) return true; } _igvn.optimize(); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/macro.hpp --- a/src/share/vm/opto/macro.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/macro.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -78,7 +78,16 @@ Node* length, const TypeFunc* slow_call_type, address slow_call_address); - void eliminate_locking_node(AbstractLockNode *alock); + Node *value_from_mem(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc); + Node *value_from_mem_phi(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, Node *alloc, int level); + + bool eliminate_allocate_node(AllocateNode *alloc); + bool can_eliminate_allocation(AllocateNode *alloc, GrowableArray & safepoints); + bool scalar_replacement(AllocateNode *alloc, GrowableArray & safepoints_done); + void process_users_of_allocation(AllocateNode *alloc); + + void eliminate_card_mark(Node *cm); + bool eliminate_locking_node(AbstractLockNode *alock); void expand_lock_node(LockNode *lock); void expand_unlock_node(UnlockNode *unlock); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/matcher.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1647,6 +1647,7 @@ case Op_Phi: // Treat Phis as shared roots case Op_Parm: case Op_Proj: // All handled specially during matching + case Op_SafePointScalarObject: set_shared(n); set_dontcare(n); break; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/memnode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -29,6 +29,8 @@ #include "incls/_precompiled.incl" #include "incls/_memnode.cpp.incl" +static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem, const TypePtr *tp, const TypePtr *adr_check, outputStream *st); + //============================================================================= uint MemNode::size_of() const { return sizeof(*this); } @@ -87,6 +89,112 @@ #endif +Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase) { + const TypeOopPtr *tinst = t_adr->isa_oopptr(); + if (tinst == NULL || !tinst->is_instance_field()) + return mchain; // don't try to optimize non-instance types + uint instance_id = tinst->instance_id(); + Node *prev = NULL; + Node *result = mchain; + while (prev != result) { + prev = result; + // skip over a call which does not affect this memory slice + if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) { + Node *proj_in = result->in(0); + if (proj_in->is_Call()) { + CallNode *call = proj_in->as_Call(); + if (!call->may_modify(t_adr, phase)) { + result = call->in(TypeFunc::Memory); + } + } else if (proj_in->is_Initialize()) { + AllocateNode* alloc = proj_in->as_Initialize()->allocation(); + // Stop if this is the initialization for the object instance which + // which contains this memory slice, otherwise skip over it. + if (alloc != NULL && alloc->_idx != instance_id) { + result = proj_in->in(TypeFunc::Memory); + } + } else if (proj_in->is_MemBar()) { + result = proj_in->in(TypeFunc::Memory); + } + } else if (result->is_MergeMem()) { + result = step_through_mergemem(phase, result->as_MergeMem(), t_adr, NULL, tty); + } + } + return result; +} + +Node *MemNode::optimize_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase) { + const TypeOopPtr *t_oop = t_adr->isa_oopptr(); + bool is_instance = (t_oop != NULL) && t_oop->is_instance_field(); + PhaseIterGVN *igvn = phase->is_IterGVN(); + Node *result = mchain; + result = optimize_simple_memory_chain(result, t_adr, phase); + if (is_instance && igvn != NULL && result->is_Phi()) { + PhiNode *mphi = result->as_Phi(); + assert(mphi->bottom_type() == Type::MEMORY, "memory phi required"); + const TypePtr *t = mphi->adr_type(); + if (t == TypePtr::BOTTOM || t == TypeRawPtr::BOTTOM) { + // clone the Phi with our address type + result = mphi->split_out_instance(t_adr, igvn); + } else { + assert(phase->C->get_alias_index(t) == phase->C->get_alias_index(t_adr), "correct memory chain"); + } + } + return result; +} + +static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem, const TypePtr *tp, const TypePtr *adr_check, outputStream *st) { + uint alias_idx = phase->C->get_alias_index(tp); + Node *mem = mmem; +#ifdef ASSERT + { + // Check that current type is consistent with the alias index used during graph construction + assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx"); + bool consistent = adr_check == NULL || adr_check->empty() || + phase->C->must_alias(adr_check, alias_idx ); + // Sometimes dead array references collapse to a[-1], a[-2], or a[-3] + if( !consistent && adr_check != NULL && !adr_check->empty() && + tp->isa_aryptr() && tp->offset() == Type::OffsetBot && + adr_check->isa_aryptr() && adr_check->offset() != Type::OffsetBot && + ( adr_check->offset() == arrayOopDesc::length_offset_in_bytes() || + adr_check->offset() == oopDesc::klass_offset_in_bytes() || + adr_check->offset() == oopDesc::mark_offset_in_bytes() ) ) { + // don't assert if it is dead code. + consistent = true; + } + if( !consistent ) { + st->print("alias_idx==%d, adr_check==", alias_idx); + if( adr_check == NULL ) { + st->print("NULL"); + } else { + adr_check->dump(); + } + st->cr(); + print_alias_types(); + assert(consistent, "adr_check must match alias idx"); + } + } +#endif + // TypeInstPtr::NOTNULL+any is an OOP with unknown offset - generally + // means an array I have not precisely typed yet. Do not do any + // alias stuff with it any time soon. + const TypeOopPtr *tinst = tp->isa_oopptr(); + if( tp->base() != Type::AnyPtr && + !(tinst && + tinst->klass()->is_java_lang_Object() && + tinst->offset() == Type::OffsetBot) ) { + // compress paths and change unreachable cycles to TOP + // If not, we can update the input infinitely along a MergeMem cycle + // Equivalent code in PhiNode::Ideal + Node* m = phase->transform(mmem); + // If tranformed to a MergeMem, get the desired slice + // Otherwise the returned node represents memory for every slice + mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m; + // Update input if it is progress over what we have now + } + return mem; +} + //--------------------------Ideal_common--------------------------------------- // Look for degenerate control and memory inputs. Bypass MergeMem inputs. // Unhook non-raw memories from complete (macro-expanded) initializations. @@ -108,65 +216,19 @@ // Avoid independent memory operations Node* old_mem = mem; - if (mem->is_Proj() && mem->in(0)->is_Initialize()) { - InitializeNode* init = mem->in(0)->as_Initialize(); - if (init->is_complete()) { // i.e., after macro expansion - const TypePtr* tp = t_adr->is_ptr(); - uint alias_idx = phase->C->get_alias_index(tp); - // Free this slice from the init. It was hooked, temporarily, - // by GraphKit::set_output_for_allocation. - if (alias_idx > Compile::AliasIdxRaw) { - mem = init->memory(alias_idx); - // ...but not with the raw-pointer slice. - } - } - } + // The code which unhooks non-raw memories from complete (macro-expanded) + // initializations was removed. After macro-expansion all stores catched + // by Initialize node became raw stores and there is no information + // which memory slices they modify. So it is unsafe to move any memory + // operation above these stores. Also in most cases hooked non-raw memories + // were already unhooked by using information from detect_ptr_independence() + // and find_previous_store(). if (mem->is_MergeMem()) { MergeMemNode* mmem = mem->as_MergeMem(); const TypePtr *tp = t_adr->is_ptr(); - uint alias_idx = phase->C->get_alias_index(tp); -#ifdef ASSERT - { - // Check that current type is consistent with the alias index used during graph construction - assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx"); - const TypePtr *adr_t = adr_type(); - bool consistent = adr_t == NULL || adr_t->empty() || phase->C->must_alias(adr_t, alias_idx ); - // Sometimes dead array references collapse to a[-1], a[-2], or a[-3] - if( !consistent && adr_t != NULL && !adr_t->empty() && - tp->isa_aryptr() && tp->offset() == Type::OffsetBot && - adr_t->isa_aryptr() && adr_t->offset() != Type::OffsetBot && - ( adr_t->offset() == arrayOopDesc::length_offset_in_bytes() || - adr_t->offset() == oopDesc::klass_offset_in_bytes() || - adr_t->offset() == oopDesc::mark_offset_in_bytes() ) ) { - // don't assert if it is dead code. - consistent = true; - } - if( !consistent ) { - tty->print("alias_idx==%d, adr_type()==", alias_idx); if( adr_t == NULL ) { tty->print("NULL"); } else { adr_t->dump(); } - tty->cr(); - print_alias_types(); - assert(consistent, "adr_type must match alias idx"); - } - } -#endif - // TypeInstPtr::NOTNULL+any is an OOP with unknown offset - generally - // means an array I have not precisely typed yet. Do not do any - // alias stuff with it any time soon. - const TypeInstPtr *tinst = tp->isa_instptr(); - if( tp->base() != Type::AnyPtr && - !(tinst && - tinst->klass()->is_java_lang_Object() && - tinst->offset() == Type::OffsetBot) ) { - // compress paths and change unreachable cycles to TOP - // If not, we can update the input infinitely along a MergeMem cycle - // Equivalent code in PhiNode::Ideal - Node* m = phase->transform(mmem); - // If tranformed to a MergeMem, get the desired slice - // Otherwise the returned node represents memory for every slice - mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m; - // Update input if it is progress over what we have now - } + + mem = step_through_mergemem(phase, mmem, tp, adr_type(), tty); } if (mem != old_mem) { @@ -179,36 +241,91 @@ } // Helper function for proving some simple control dominations. -// Attempt to prove that control input 'dom' dominates (or equals) 'sub'. +// Attempt to prove that all control inputs of 'dom' dominate 'sub'. // Already assumes that 'dom' is available at 'sub', and that 'sub' // is not a constant (dominated by the method's StartNode). // Used by MemNode::find_previous_store to prove that the // control input of a memory operation predates (dominates) // an allocation it wants to look past. -bool MemNode::detect_dominating_control(Node* dom, Node* sub) { - if (dom == NULL) return false; - if (dom->is_Proj()) dom = dom->in(0); - if (dom->is_Start()) return true; // anything inside the method - if (dom->is_Root()) return true; // dom 'controls' a constant - int cnt = 20; // detect cycle or too much effort - while (sub != NULL) { // walk 'sub' up the chain to 'dom' - if (--cnt < 0) return false; // in a cycle or too complex - if (sub == dom) return true; - if (sub->is_Start()) return false; - if (sub->is_Root()) return false; - Node* up = sub->in(0); - if (sub == up && sub->is_Region()) { - for (uint i = 1; i < sub->req(); i++) { - Node* in = sub->in(i); - if (in != NULL && !in->is_top() && in != sub) { - up = in; break; // take any path on the way up to 'dom' +bool MemNode::all_controls_dominate(Node* dom, Node* sub) { + if (dom == NULL || dom->is_top() || sub == NULL || sub->is_top()) + return false; // Conservative answer for dead code + + // Check 'dom'. + dom = dom->find_exact_control(dom); + if (dom == NULL || dom->is_top()) + return false; // Conservative answer for dead code + + if (dom->is_Start() || dom->is_Root() || dom == sub) + return true; + + // 'dom' dominates 'sub' if its control edge and control edges + // of all its inputs dominate or equal to sub's control edge. + + // Currently 'sub' is either Allocate, Initialize or Start nodes. + assert(sub->is_Allocate() || sub->is_Initialize() || sub->is_Start(), "expecting only these nodes"); + + // Get control edge of 'sub'. + sub = sub->find_exact_control(sub->in(0)); + if (sub == NULL || sub->is_top()) + return false; // Conservative answer for dead code + + assert(sub->is_CFG(), "expecting control"); + + if (sub == dom) + return true; + + if (sub->is_Start() || sub->is_Root()) + return false; + + { + // Check all control edges of 'dom'. + + ResourceMark rm; + Arena* arena = Thread::current()->resource_area(); + Node_List nlist(arena); + Unique_Node_List dom_list(arena); + + dom_list.push(dom); + bool only_dominating_controls = false; + + for (uint next = 0; next < dom_list.size(); next++) { + Node* n = dom_list.at(next); + if (!n->is_CFG() && n->pinned()) { + // Check only own control edge for pinned non-control nodes. + n = n->find_exact_control(n->in(0)); + if (n == NULL || n->is_top()) + return false; // Conservative answer for dead code + assert(n->is_CFG(), "expecting control"); + } + if (n->is_Start() || n->is_Root()) { + only_dominating_controls = true; + } else if (n->is_CFG()) { + if (n->dominates(sub, nlist)) + only_dominating_controls = true; + else + return false; + } else { + // First, own control edge. + Node* m = n->find_exact_control(n->in(0)); + if (m == NULL) + continue; + if (m->is_top()) + return false; // Conservative answer for dead code + dom_list.push(m); + + // Now, the rest of edges. + uint cnt = n->req(); + for (uint i = 1; i < cnt; i++) { + m = n->find_exact_control(n->in(i)); + if (m == NULL || m->is_top()) + continue; + dom_list.push(m); } } } - if (sub == up) return false; // some kind of tight cycle - sub = up; + return only_dominating_controls; } - return false; } //---------------------detect_ptr_independence--------------------------------- @@ -229,9 +346,9 @@ return (a1 != a2); } else if (a1 != NULL) { // one allocation a1 // (Note: p2->is_Con implies p2->in(0)->is_Root, which dominates.) - return detect_dominating_control(p2->in(0), a1->in(0)); + return all_controls_dominate(p2, a1); } else { //(a2 != NULL) // one allocation a2 - return detect_dominating_control(p1->in(0), a2->in(0)); + return all_controls_dominate(p1, a2); } return false; } @@ -260,6 +377,8 @@ if (offset == Type::OffsetBot) return NULL; // cannot unalias unless there are precise offsets + const TypeOopPtr *addr_t = adr->bottom_type()->isa_oopptr(); + intptr_t size_in_bytes = memory_size(); Node* mem = in(MemNode::Memory); // start searching here... @@ -315,8 +434,7 @@ known_identical = true; else if (alloc != NULL) known_independent = true; - else if (ctrl != NULL && - detect_dominating_control(ctrl, st_alloc->in(0))) + else if (all_controls_dominate(this, st_alloc)) known_independent = true; if (known_independent) { @@ -339,6 +457,22 @@ return mem; // let caller handle steps (c), (d) } + } else if (addr_t != NULL && addr_t->is_instance_field()) { + // Can't use optimize_simple_memory_chain() since it needs PhaseGVN. + if (mem->is_Proj() && mem->in(0)->is_Call()) { + CallNode *call = mem->in(0)->as_Call(); + if (!call->may_modify(addr_t, phase)) { + mem = call->in(TypeFunc::Memory); + continue; // (a) advance through independent call memory + } + } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) { + mem = mem->in(0)->in(TypeFunc::Memory); + continue; // (a) advance through independent MemBar memory + } else if (mem->is_MergeMem()) { + int alias_idx = phase->C->get_alias_index(adr_type()); + mem = mem->as_MergeMem()->memory_at(alias_idx); + continue; // (a) advance through independent MergeMem memory + } } // Unless there is an explicit 'continue', we must bail out here, @@ -540,7 +674,10 @@ const Node* call = adr->in(0); if (call->is_CallStaticJava()) { const CallStaticJavaNode* call_java = call->as_CallStaticJava(); - assert(call_java && call_java->method() == NULL, "must be runtime call"); + const TypeTuple *r = call_java->tf()->range(); + assert(r->cnt() > TypeFunc::Parms, "must return value"); + const Type* ret_type = r->field_at(TypeFunc::Parms); + assert(ret_type && ret_type->isa_ptr(), "must return pointer"); // We further presume that this is one of // new_instance_Java, new_array_Java, or // the like, but do not assert for this. @@ -634,6 +771,46 @@ Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const { Node* ld_adr = in(MemNode::Address); + const TypeInstPtr* tp = phase->type(ld_adr)->isa_instptr(); + Compile::AliasType* atp = tp != NULL ? phase->C->alias_type(tp) : NULL; + if (EliminateAutoBox && atp != NULL && atp->index() >= Compile::AliasIdxRaw && + atp->field() != NULL && !atp->field()->is_volatile()) { + uint alias_idx = atp->index(); + bool final = atp->field()->is_final(); + Node* result = NULL; + Node* current = st; + // Skip through chains of MemBarNodes checking the MergeMems for + // new states for the slice of this load. Stop once any other + // kind of node is encountered. Loads from final memory can skip + // through any kind of MemBar but normal loads shouldn't skip + // through MemBarAcquire since the could allow them to move out of + // a synchronized region. + while (current->is_Proj()) { + int opc = current->in(0)->Opcode(); + if ((final && opc == Op_MemBarAcquire) || + opc == Op_MemBarRelease || opc == Op_MemBarCPUOrder) { + Node* mem = current->in(0)->in(TypeFunc::Memory); + if (mem->is_MergeMem()) { + MergeMemNode* merge = mem->as_MergeMem(); + Node* new_st = merge->memory_at(alias_idx); + if (new_st == merge->base_memory()) { + // Keep searching + current = merge->base_memory(); + continue; + } + // Save the new memory state for the slice and fall through + // to exit. + result = new_st; + } + } + break; + } + if (result != NULL) { + st = result; + } + } + + // Loop around twice in the case Load -> Initialize -> Store. // (See PhaseIterGVN::add_users_to_worklist, which knows about this case.) for (int trip = 0; trip <= 1; trip++) { @@ -698,6 +875,21 @@ return NULL; } +//----------------------is_instance_field_load_with_local_phi------------------ +bool LoadNode::is_instance_field_load_with_local_phi(Node* ctrl) { + if( in(MemNode::Memory)->is_Phi() && in(MemNode::Memory)->in(0) == ctrl && + in(MemNode::Address)->is_AddP() ) { + const TypeOopPtr* t_oop = in(MemNode::Address)->bottom_type()->isa_oopptr(); + // Only instances. + if( t_oop != NULL && t_oop->is_instance_field() && + t_oop->offset() != Type::OffsetBot && + t_oop->offset() != Type::OffsetTop) { + return true; + } + } + return false; +} + //------------------------------Identity--------------------------------------- // Loads are identity if previous store is to same address Node *LoadNode::Identity( PhaseTransform *phase ) { @@ -720,9 +912,190 @@ // usually runs first, producing the singleton type of the Con.) return value; } + + // Search for an existing data phi which was generated before for the same + // instance's field to avoid infinite genertion of phis in a loop. + Node *region = mem->in(0); + if (is_instance_field_load_with_local_phi(region)) { + const TypePtr *addr_t = in(MemNode::Address)->bottom_type()->isa_ptr(); + int this_index = phase->C->get_alias_index(addr_t); + int this_offset = addr_t->offset(); + int this_id = addr_t->is_oopptr()->instance_id(); + const Type* this_type = bottom_type(); + for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { + Node* phi = region->fast_out(i); + if (phi->is_Phi() && phi != mem && + phi->as_Phi()->is_same_inst_field(this_type, this_id, this_index, this_offset)) { + return phi; + } + } + } + return this; } + +// Returns true if the AliasType refers to the field that holds the +// cached box array. Currently only handles the IntegerCache case. +static bool is_autobox_cache(Compile::AliasType* atp) { + if (atp != NULL && atp->field() != NULL) { + ciField* field = atp->field(); + ciSymbol* klass = field->holder()->name(); + if (field->name() == ciSymbol::cache_field_name() && + field->holder()->uses_default_loader() && + klass == ciSymbol::java_lang_Integer_IntegerCache()) { + return true; + } + } + return false; +} + +// Fetch the base value in the autobox array +static bool fetch_autobox_base(Compile::AliasType* atp, int& cache_offset) { + if (atp != NULL && atp->field() != NULL) { + ciField* field = atp->field(); + ciSymbol* klass = field->holder()->name(); + if (field->name() == ciSymbol::cache_field_name() && + field->holder()->uses_default_loader() && + klass == ciSymbol::java_lang_Integer_IntegerCache()) { + assert(field->is_constant(), "what?"); + ciObjArray* array = field->constant_value().as_object()->as_obj_array(); + // Fetch the box object at the base of the array and get its value + ciInstance* box = array->obj_at(0)->as_instance(); + ciInstanceKlass* ik = box->klass()->as_instance_klass(); + if (ik->nof_nonstatic_fields() == 1) { + // This should be true nonstatic_field_at requires calling + // nof_nonstatic_fields so check it anyway + ciConstant c = box->field_value(ik->nonstatic_field_at(0)); + cache_offset = c.as_int(); + } + return true; + } + } + return false; +} + +// Returns true if the AliasType refers to the value field of an +// autobox object. Currently only handles Integer. +static bool is_autobox_object(Compile::AliasType* atp) { + if (atp != NULL && atp->field() != NULL) { + ciField* field = atp->field(); + ciSymbol* klass = field->holder()->name(); + if (field->name() == ciSymbol::value_name() && + field->holder()->uses_default_loader() && + klass == ciSymbol::java_lang_Integer()) { + return true; + } + } + return false; +} + + +// We're loading from an object which has autobox behaviour. +// If this object is result of a valueOf call we'll have a phi +// merging a newly allocated object and a load from the cache. +// We want to replace this load with the original incoming +// argument to the valueOf call. +Node* LoadNode::eliminate_autobox(PhaseGVN* phase) { + Node* base = in(Address)->in(AddPNode::Base); + if (base->is_Phi() && base->req() == 3) { + AllocateNode* allocation = NULL; + int allocation_index = -1; + int load_index = -1; + for (uint i = 1; i < base->req(); i++) { + allocation = AllocateNode::Ideal_allocation(base->in(i), phase); + if (allocation != NULL) { + allocation_index = i; + load_index = 3 - allocation_index; + break; + } + } + LoadNode* load = NULL; + if (allocation != NULL && base->in(load_index)->is_Load()) { + load = base->in(load_index)->as_Load(); + } + if (load != NULL && in(Memory)->is_Phi() && in(Memory)->in(0) == base->in(0)) { + // Push the loads from the phi that comes from valueOf up + // through it to allow elimination of the loads and the recovery + // of the original value. + Node* mem_phi = in(Memory); + Node* offset = in(Address)->in(AddPNode::Offset); + + Node* in1 = clone(); + Node* in1_addr = in1->in(Address)->clone(); + in1_addr->set_req(AddPNode::Base, base->in(allocation_index)); + in1_addr->set_req(AddPNode::Address, base->in(allocation_index)); + in1_addr->set_req(AddPNode::Offset, offset); + in1->set_req(0, base->in(allocation_index)); + in1->set_req(Address, in1_addr); + in1->set_req(Memory, mem_phi->in(allocation_index)); + + Node* in2 = clone(); + Node* in2_addr = in2->in(Address)->clone(); + in2_addr->set_req(AddPNode::Base, base->in(load_index)); + in2_addr->set_req(AddPNode::Address, base->in(load_index)); + in2_addr->set_req(AddPNode::Offset, offset); + in2->set_req(0, base->in(load_index)); + in2->set_req(Address, in2_addr); + in2->set_req(Memory, mem_phi->in(load_index)); + + in1_addr = phase->transform(in1_addr); + in1 = phase->transform(in1); + in2_addr = phase->transform(in2_addr); + in2 = phase->transform(in2); + + PhiNode* result = PhiNode::make_blank(base->in(0), this); + result->set_req(allocation_index, in1); + result->set_req(load_index, in2); + return result; + } + } else if (base->is_Load()) { + // Eliminate the load of Integer.value for integers from the cache + // array by deriving the value from the index into the array. + // Capture the offset of the load and then reverse the computation. + Node* load_base = base->in(Address)->in(AddPNode::Base); + if (load_base != NULL) { + Compile::AliasType* atp = phase->C->alias_type(load_base->adr_type()); + intptr_t cache_offset; + int shift = -1; + Node* cache = NULL; + if (is_autobox_cache(atp)) { + shift = exact_log2(type2aelembytes(T_OBJECT)); + cache = AddPNode::Ideal_base_and_offset(load_base->in(Address), phase, cache_offset); + } + if (cache != NULL && base->in(Address)->is_AddP()) { + Node* elements[4]; + int count = base->in(Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements)); + int cache_low; + if (count > 0 && fetch_autobox_base(atp, cache_low)) { + int offset = arrayOopDesc::base_offset_in_bytes(memory_type()) - (cache_low << shift); + // Add up all the offsets making of the address of the load + Node* result = elements[0]; + for (int i = 1; i < count; i++) { + result = phase->transform(new (phase->C, 3) AddXNode(result, elements[i])); + } + // Remove the constant offset from the address and then + // remove the scaling of the offset to recover the original index. + result = phase->transform(new (phase->C, 3) AddXNode(result, phase->MakeConX(-offset))); + if (result->Opcode() == Op_LShiftX && result->in(2) == phase->intcon(shift)) { + // Peel the shift off directly but wrap it in a dummy node + // since Ideal can't return existing nodes + result = new (phase->C, 3) RShiftXNode(result->in(1), phase->intcon(0)); + } else { + result = new (phase->C, 3) RShiftXNode(result, phase->intcon(shift)); + } +#ifdef _LP64 + result = new (phase->C, 2) ConvL2INode(phase->transform(result)); +#endif + return result; + } + } + } + } + return NULL; +} + + //------------------------------Ideal------------------------------------------ // If the load is from Field memory and the pointer is non-null, we can // zero out the control input. @@ -749,12 +1122,145 @@ Node* base = AddPNode::Ideal_base_and_offset(address, phase, ignore); if (base != NULL && phase->type(base)->higher_equal(TypePtr::NOTNULL) - && detect_dominating_control(base->in(0), phase->C->start())) { + && all_controls_dominate(base, phase->C->start())) { // A method-invariant, non-null address (constant or 'this' argument). set_req(MemNode::Control, NULL); } } + if (EliminateAutoBox && can_reshape && in(Address)->is_AddP()) { + Node* base = in(Address)->in(AddPNode::Base); + if (base != NULL) { + Compile::AliasType* atp = phase->C->alias_type(adr_type()); + if (is_autobox_object(atp)) { + Node* result = eliminate_autobox(phase); + if (result != NULL) return result; + } + } + } + + Node* mem = in(MemNode::Memory); + const TypePtr *addr_t = phase->type(address)->isa_ptr(); + + if (addr_t != NULL) { + // try to optimize our memory input + Node* opt_mem = MemNode::optimize_memory_chain(mem, addr_t, phase); + if (opt_mem != mem) { + set_req(MemNode::Memory, opt_mem); + return this; + } + const TypeOopPtr *t_oop = addr_t->isa_oopptr(); + if (can_reshape && opt_mem->is_Phi() && + (t_oop != NULL) && t_oop->is_instance_field()) { + assert(t_oop->offset() != Type::OffsetBot && t_oop->offset() != Type::OffsetTop, ""); + Node *region = opt_mem->in(0); + uint cnt = opt_mem->req(); + for( uint i = 1; i < cnt; i++ ) { + Node *in = opt_mem->in(i); + if( in == NULL ) { + region = NULL; // Wait stable graph + break; + } + } + if (region != NULL) { + // Check for loop invariant. + if (cnt == 3) { + for( uint i = 1; i < cnt; i++ ) { + Node *in = opt_mem->in(i); + Node* m = MemNode::optimize_memory_chain(in, addr_t, phase); + if (m == opt_mem) { + set_req(MemNode::Memory, opt_mem->in(cnt - i)); // Skip this phi. + return this; + } + } + } + // Split through Phi (see original code in loopopts.cpp). + assert(phase->C->have_alias_type(addr_t), "instance should have alias type"); + + // Do nothing here if Identity will find a value + // (to avoid infinite chain of value phis generation). + if ( !phase->eqv(this, this->Identity(phase)) ) + return NULL; + + const Type* this_type = this->bottom_type(); + int this_index = phase->C->get_alias_index(addr_t); + int this_offset = addr_t->offset(); + int this_iid = addr_t->is_oopptr()->instance_id(); + int wins = 0; + PhaseIterGVN *igvn = phase->is_IterGVN(); + Node *phi = new (igvn->C, region->req()) PhiNode(region, this_type, NULL, this_iid, this_index, this_offset); + for( uint i = 1; i < region->req(); i++ ) { + Node *x; + Node* the_clone = NULL; + if( region->in(i) == phase->C->top() ) { + x = phase->C->top(); // Dead path? Use a dead data op + } else { + x = this->clone(); // Else clone up the data op + the_clone = x; // Remember for possible deletion. + // Alter data node to use pre-phi inputs + if( this->in(0) == region ) { + x->set_req( 0, region->in(i) ); + } else { + x->set_req( 0, NULL ); + } + for( uint j = 1; j < this->req(); j++ ) { + Node *in = this->in(j); + if( in->is_Phi() && in->in(0) == region ) + x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone + } + } + // Check for a 'win' on some paths + const Type *t = x->Value(igvn); + + bool singleton = t->singleton(); + + // See comments in PhaseIdealLoop::split_thru_phi(). + if( singleton && t == Type::TOP ) { + singleton &= region->is_Loop() && (i != LoopNode::EntryControl); + } + + if( singleton ) { + wins++; + x = igvn->makecon(t); + } else { + // We now call Identity to try to simplify the cloned node. + // Note that some Identity methods call phase->type(this). + // Make sure that the type array is big enough for + // our new node, even though we may throw the node away. + // (This tweaking with igvn only works because x is a new node.) + igvn->set_type(x, t); + Node *y = x->Identity(igvn); + if( y != x ) { + wins++; + x = y; + } else { + y = igvn->hash_find(x); + if( y ) { + wins++; + x = y; + } else { + // Else x is a new node we are keeping + // We do not need register_new_node_with_optimizer + // because set_type has already been called. + igvn->_worklist.push(x); + } + } + } + if (x != the_clone && the_clone != NULL) + igvn->remove_dead_node(the_clone); + phi->set_req(i, x); + } + if( wins > 0 ) { + // Record Phi + igvn->register_new_node_with_optimizer(phi); + return phi; + } else { + igvn->remove_dead_node(phi); + } + } + } + } + // Check for prior store with a different base or offset; make Load // independent. Skip through any number of them. Bail out if the stores // are in an endless dead cycle and report no progress. This is a key @@ -858,6 +1364,17 @@ // This can happen if a interface-typed array narrows to a class type. jt = _type; } + + if (EliminateAutoBox) { + // The pointers in the autobox arrays are always non-null + Node* base = in(Address)->in(AddPNode::Base); + if (base != NULL) { + Compile::AliasType* atp = phase->C->alias_type(base->adr_type()); + if (is_autobox_cache(atp)) { + return jt->join(TypePtr::NOTNULL)->is_ptr(); + } + } + } return jt; } } @@ -971,6 +1488,17 @@ return value->bottom_type(); } + const TypeOopPtr *tinst = tp->isa_oopptr(); + if (tinst != NULL && tinst->is_instance_field()) { + // If we have an instance type and our memory input is the + // programs's initial memory state, there is no matching store, + // so just return a zero of the appropriate type + Node *mem = in(MemNode::Memory); + if (mem->is_Parm() && mem->in(0)->is_Start()) { + assert(mem->as_Parm()->_con == TypeFunc::Memory, "must be memory Parm"); + return Type::get_zero_type(_type->basic_type()); + } + } return _type; } @@ -1494,7 +2022,7 @@ const TypeOopPtr *adr_oop = phase->type(adr)->isa_oopptr(); if (adr_oop == NULL) return false; - if (!adr_oop->is_instance()) + if (!adr_oop->is_instance_field()) return false; // if not a distinct instance, there may be aliases of the address for (DUIterator_Fast imax, i = adr->fast_outs(imax); i < imax; i++) { Node *use = adr->fast_out(i); @@ -1553,9 +2081,16 @@ //------------------------------Value----------------------------------------- const Type *StoreCMNode::Value( PhaseTransform *phase ) const { + // Either input is TOP ==> the result is TOP + const Type *t = phase->type( in(MemNode::Memory) ); + if( t == Type::TOP ) return Type::TOP; + t = phase->type( in(MemNode::Address) ); + if( t == Type::TOP ) return Type::TOP; + t = phase->type( in(MemNode::ValueIn) ); + if( t == Type::TOP ) return Type::TOP; // If extra input is TOP ==> the result is TOP - const Type *t1 = phase->type( in(MemNode::OopStore) ); - if( t1 == Type::TOP ) return Type::TOP; + t = phase->type( in(MemNode::OopStore) ); + if( t == Type::TOP ) return Type::TOP; return StoreNode::Value( phase ); } @@ -1596,7 +2131,7 @@ //------------------------------Identity--------------------------------------- // Clearing a zero length array does nothing Node *ClearArrayNode::Identity( PhaseTransform *phase ) { - return phase->type(in(2))->higher_equal(TypeInt::ZERO) ? in(1) : this; + return phase->type(in(2))->higher_equal(TypeX::ZERO) ? in(1) : this; } //------------------------------Idealize--------------------------------------- @@ -1669,6 +2204,11 @@ Node* start_offset, Node* end_offset, PhaseGVN* phase) { + if (start_offset == end_offset) { + // nothing to do + return mem; + } + Compile* C = phase->C; int unit = BytesPerLong; Node* zbase = start_offset; @@ -1694,6 +2234,11 @@ intptr_t start_offset, intptr_t end_offset, PhaseGVN* phase) { + if (start_offset == end_offset) { + // nothing to do + return mem; + } + Compile* C = phase->C; assert((end_offset % BytesPerInt) == 0, "odd end offset"); intptr_t done_offset = end_offset; @@ -1998,7 +2543,7 @@ // must have preceded the init, or else be equal to the init. // Even after loop optimizations (which might change control edges) // a store is never pinned *before* the availability of its inputs. - if (!MemNode::detect_dominating_control(ctl, this->in(0))) + if (!MemNode::all_controls_dominate(n, this)) return false; // failed to prove a good control } @@ -3019,7 +3564,7 @@ } } - assert(verify_sparse(), "please, no dups of base"); + assert(progress || verify_sparse(), "please, no dups of base"); return progress; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/memnode.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -60,15 +60,17 @@ debug_only(_adr_type=at; adr_type();) } +public: // Helpers for the optimizer. Documented in memnode.cpp. static bool detect_ptr_independence(Node* p1, AllocateNode* a1, Node* p2, AllocateNode* a2, PhaseTransform* phase); static bool adr_phi_is_loop_invariant(Node* adr_phi, Node* cast); -public: + static Node *optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase); + static Node *optimize_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase); // This one should probably be a phase-specific function: - static bool detect_dominating_control(Node* dom, Node* sub); + static bool all_controls_dominate(Node* dom, Node* sub); // Is this Node a MemNode or some descendent? Default is YES. virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp ); @@ -97,7 +99,13 @@ // What is the type of the value in memory? (T_VOID mean "unspecified".) virtual BasicType memory_type() const = 0; - virtual int memory_size() const { return type2aelembytes[memory_type()]; } + virtual int memory_size() const { +#ifdef ASSERT + return type2aelembytes(memory_type(), true); +#else + return type2aelembytes(memory_type()); +#endif + } // Search through memory states which precede this node (load or store). // Look for an exact match for the address, with no intervening @@ -141,6 +149,9 @@ // zero out the control input. virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); + // Recover original value from boxed values + Node *eliminate_autobox(PhaseGVN *phase); + // Compute a new Type for this node. Basically we just do the pre-check, // then call the virtual add() to set the type. virtual const Type *Value( PhaseTransform *phase ) const; @@ -163,6 +174,9 @@ // Map a load opcode to its corresponding store opcode. virtual int store_Opcode() const = 0; + // Check if the load's memory input is a Phi node with the same control. + bool is_instance_field_load_with_local_phi(Node* ctrl); + #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/node.cpp --- a/src/share/vm/opto/node.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/node.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -812,8 +812,7 @@ Node* Node::uncast() const { // Should be inline: //return is_ConstraintCast() ? uncast_helper(this) : (Node*) this; - if (is_ConstraintCast() || - (is_Type() && req() == 2 && Opcode() == Op_CheckCastPP)) + if (is_ConstraintCast() || is_CheckCastPP()) return uncast_helper(this); else return (Node*) this; @@ -827,7 +826,7 @@ break; } else if (p->is_ConstraintCast()) { p = p->in(1); - } else if (p->is_Type() && p->Opcode() == Op_CheckCastPP) { + } else if (p->is_CheckCastPP()) { p = p->in(1); } else { break; @@ -1018,6 +1017,101 @@ return false; }; +//--------------------------find_exact_control--------------------------------- +// Skip Proj and CatchProj nodes chains. Check for Null and Top. +Node* Node::find_exact_control(Node* ctrl) { + if (ctrl == NULL && this->is_Region()) + ctrl = this->as_Region()->is_copy(); + + if (ctrl != NULL && ctrl->is_CatchProj()) { + if (ctrl->as_CatchProj()->_con == CatchProjNode::fall_through_index) + ctrl = ctrl->in(0); + if (ctrl != NULL && !ctrl->is_top()) + ctrl = ctrl->in(0); + } + + if (ctrl != NULL && ctrl->is_Proj()) + ctrl = ctrl->in(0); + + return ctrl; +} + +//--------------------------dominates------------------------------------------ +// Helper function for MemNode::all_controls_dominate(). +// Check if 'this' control node dominates or equal to 'sub' control node. +bool Node::dominates(Node* sub, Node_List &nlist) { + assert(this->is_CFG(), "expecting control"); + assert(sub != NULL && sub->is_CFG(), "expecting control"); + + Node* orig_sub = sub; + nlist.clear(); + bool this_dominates = false; + uint region_input = 0; + while (sub != NULL) { // walk 'sub' up the chain to 'this' + if (sub == this) { + if (nlist.size() == 0) { + // No Region nodes except loops were visited before and the EntryControl + // path was taken for loops: it did not walk in a cycle. + return true; + } else if (!this_dominates) { + // Region nodes were visited. Continue walk up to Start or Root + // to make sure that it did not walk in a cycle. + this_dominates = true; // first time meet + } else { + return false; // already met before: walk in a cycle + } + } + if (sub->is_Start() || sub->is_Root()) + return this_dominates; + + Node* up = sub->find_exact_control(sub->in(0)); + if (up == NULL || up->is_top()) + return false; // Conservative answer for dead code + + if (sub == up && sub->is_Loop()) { + up = sub->in(0); // in(LoopNode::EntryControl); + } else if (sub == up && sub->is_Region()) { + uint i = 1; + if (nlist.size() == 0) { + // No Region nodes (except Loops) were visited before. + // Take first valid path on the way up to 'this'. + } else if (nlist.at(nlist.size() - 1) == sub) { + // This Region node was just visited. Take other path. + i = region_input + 1; + nlist.pop(); + } else { + // Was this Region node visited before? + uint size = nlist.size(); + for (uint j = 0; j < size; j++) { + if (nlist.at(j) == sub) { + return false; // The Region node was visited before. Give up. + } + } + // The Region node was not visited before. + // Take first valid path on the way up to 'this'. + } + for (; i < sub->req(); i++) { + Node* in = sub->in(i); + if (in != NULL && !in->is_top() && in != sub) { + break; + } + } + if (i < sub->req()) { + nlist.push(sub); + up = sub->in(i); + region_input = i; + } + } + if (sub == up) + return false; // some kind of tight cycle + if (orig_sub == up) + return false; // walk in a cycle + + sub = up; + } + return false; +} + //------------------------------remove_dead_region----------------------------- // This control node is dead. Follow the subgraph below it making everything // using it dead as well. This will happen normally via the usual IterGVN @@ -1462,97 +1556,48 @@ } //------------------------------dump_nodes------------------------------------- - -// Helper class for dump_nodes. Wraps an old and new VectorSet. -class OldNewVectorSet : public StackObj { - Arena* _node_arena; - VectorSet _old_vset, _new_vset; - VectorSet* select(Node* n) { - return _node_arena->contains(n) ? &_new_vset : &_old_vset; - } - public: - OldNewVectorSet(Arena* node_arena, ResourceArea* area) : - _node_arena(node_arena), - _old_vset(area), _new_vset(area) {} - - void set(Node* n) { select(n)->set(n->_idx); } - bool test_set(Node* n) { return select(n)->test_set(n->_idx) != 0; } - bool test(Node* n) { return select(n)->test(n->_idx) != 0; } - void del(Node* n) { (*select(n)) >>= n->_idx; } -}; - - static void dump_nodes(const Node* start, int d, bool only_ctrl) { Node* s = (Node*)start; // remove const if (NotANode(s)) return; + uint depth = (uint)ABS(d); + int direction = d; Compile* C = Compile::current(); - ResourceArea *area = Thread::current()->resource_area(); - Node_Stack stack(area, MIN2((uint)ABS(d), C->unique() >> 1)); - OldNewVectorSet visited(C->node_arena(), area); - OldNewVectorSet on_stack(C->node_arena(), area); - - visited.set(s); - on_stack.set(s); - stack.push(s, 0); - if (d < 0) s->dump(); - - // Do a depth first walk over edges - while (stack.is_nonempty()) { - Node* tp = stack.node(); - uint idx = stack.index(); - uint limit = d > 0 ? tp->len() : tp->outcnt(); - if (idx >= limit) { - // no more arcs to visit - if (d > 0) tp->dump(); - on_stack.del(tp); - stack.pop(); - } else { - // process the "idx"th arc - stack.set_index(idx + 1); - Node* n = d > 0 ? tp->in(idx) : tp->raw_out(idx); - - if (NotANode(n)) continue; - // do not recurse through top or the root (would reach unrelated stuff) - if (n->is_Root() || n->is_top()) continue; - if (only_ctrl && !n->is_CFG()) continue; + GrowableArray nstack(C->unique()); - if (!visited.test_set(n)) { // forward arc - // Limit depth - if (stack.size() < (uint)ABS(d)) { - if (d < 0) n->dump(); - stack.push(n, 0); - on_stack.set(n); - } - } else { // back or cross arc - if (on_stack.test(n)) { // back arc - // print loop if there are no phis or regions in the mix - bool found_loop_breaker = false; - int k; - for (k = stack.size() - 1; k >= 0; k--) { - Node* m = stack.node_at(k); - if (m->is_Phi() || m->is_Region() || m->is_Root() || m->is_Start()) { - found_loop_breaker = true; - break; - } - if (m == n) // Found loop head - break; - } - assert(k >= 0, "n must be on stack"); + nstack.append(s); + int begin = 0; + int end = 0; + for(uint i = 0; i < depth; i++) { + end = nstack.length(); + for(int j = begin; j < end; j++) { + Node* tp = nstack.at(j); + uint limit = direction > 0 ? tp->len() : tp->outcnt(); + for(uint k = 0; k < limit; k++) { + Node* n = direction > 0 ? tp->in(k) : tp->raw_out(k); - if (!found_loop_breaker) { - tty->print("# %s LOOP FOUND:", only_ctrl ? "CONTROL" : "DATA"); - for (int i = stack.size() - 1; i >= k; i--) { - Node* m = stack.node_at(i); - bool mnew = C->node_arena()->contains(m); - tty->print(" %s%d:%s", (mnew? "": "o"), m->_idx, m->Name()); - if (i != 0) tty->print(d > 0? " <-": " ->"); - } - tty->cr(); - } + if (NotANode(n)) continue; + // do not recurse through top or the root (would reach unrelated stuff) + if (n->is_Root() || n->is_top()) continue; + if (only_ctrl && !n->is_CFG()) continue; + + bool on_stack = nstack.contains(n); + if (!on_stack) { + nstack.append(n); } } } + begin = end; + } + end = nstack.length(); + if (direction > 0) { + for(int j = end-1; j >= 0; j--) { + nstack.at(j)->dump(); + } + } else { + for(int j = 0; j < end; j++) { + nstack.at(j)->dump(); + } } } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/node.hpp --- a/src/share/vm/opto/node.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/node.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -91,6 +91,7 @@ class Node_Stack; class NullCheckNode; class OopMap; +class ParmNode; class PCTableNode; class PhaseCCP; class PhaseGVN; @@ -105,6 +106,7 @@ class RegionNode; class RootNode; class SafePointNode; +class SafePointScalarObjectNode; class StartNode; class State; class StoreNode; @@ -557,6 +559,7 @@ DEFINE_CLASS_ID(JumpProj, Proj, 1) DEFINE_CLASS_ID(IfTrue, Proj, 2) DEFINE_CLASS_ID(IfFalse, Proj, 3) + DEFINE_CLASS_ID(Parm, Proj, 4) DEFINE_CLASS_ID(Region, Node, 3) DEFINE_CLASS_ID(Loop, Region, 0) @@ -573,6 +576,7 @@ DEFINE_CLASS_ID(ConstraintCast, Type, 1) DEFINE_CLASS_ID(CheckCastPP, Type, 2) DEFINE_CLASS_ID(CMove, Type, 3) + DEFINE_CLASS_ID(SafePointScalarObject, Type, 4) DEFINE_CLASS_ID(Mem, Node, 6) DEFINE_CLASS_ID(Load, Mem, 0) @@ -712,12 +716,14 @@ DEFINE_CLASS_QUERY(Mul) DEFINE_CLASS_QUERY(Multi) DEFINE_CLASS_QUERY(MultiBranch) + DEFINE_CLASS_QUERY(Parm) DEFINE_CLASS_QUERY(PCTable) DEFINE_CLASS_QUERY(Phi) DEFINE_CLASS_QUERY(Proj) DEFINE_CLASS_QUERY(Region) DEFINE_CLASS_QUERY(Root) DEFINE_CLASS_QUERY(SafePoint) + DEFINE_CLASS_QUERY(SafePointScalarObject) DEFINE_CLASS_QUERY(Start) DEFINE_CLASS_QUERY(Store) DEFINE_CLASS_QUERY(Sub) @@ -811,6 +817,12 @@ // for the transformations to happen. bool has_special_unique_user() const; + // Skip Proj and CatchProj nodes chains. Check for Null and Top. + Node* find_exact_control(Node* ctrl); + + // Check if 'this' node dominates or equal to 'sub'. + bool dominates(Node* sub, Node_List &nlist); + protected: bool remove_dead_region(PhaseGVN *phase, bool can_reshape); public: @@ -1322,7 +1334,6 @@ // Inline definition of Compile::record_for_igvn must be deferred to this point. inline void Compile::record_for_igvn(Node* n) { _for_igvn->push(n); - record_for_escape_analysis(n); } //------------------------------Node_Stack------------------------------------- @@ -1381,7 +1392,7 @@ _inode_top->indx = i; } uint size_max() const { return (uint)pointer_delta(_inode_max, _inodes, sizeof(INode)); } // Max size - uint size() const { return (uint)pointer_delta(_inode_top, _inodes, sizeof(INode)) + 1; } // Current size + uint size() const { return (uint)pointer_delta((_inode_top+1), _inodes, sizeof(INode)); } // Current size bool is_nonempty() const { return (_inode_top >= _inodes); } bool is_empty() const { return (_inode_top < _inodes); } void clear() { _inode_top = _inodes - 1; } // retain storage diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/output.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -561,7 +561,30 @@ : new LocationValue(Location::new_stk_loc(l_type, ra->reg2offset(regnum))); } -void Compile::FillLocArray( int idx, Node *local, GrowableArray *array ) { + +ObjectValue* +Compile::sv_for_node_id(GrowableArray *objs, int id) { + for (int i = 0; i < objs->length(); i++) { + assert(objs->at(i)->is_object(), "corrupt object cache"); + ObjectValue* sv = (ObjectValue*) objs->at(i); + if (sv->id() == id) { + return sv; + } + } + // Otherwise.. + return NULL; +} + +void Compile::set_sv_for_object_node(GrowableArray *objs, + ObjectValue* sv ) { + assert(sv_for_node_id(objs, sv->id()) == NULL, "Precondition"); + objs->append(sv); +} + + +void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local, + GrowableArray *array, + GrowableArray *objs ) { assert( local, "use _top instead of null" ); if (array->length() != idx) { assert(array->length() == idx + 1, "Unexpected array count"); @@ -578,6 +601,29 @@ } const Type *t = local->bottom_type(); + // Is it a safepoint scalar object node? + if (local->is_SafePointScalarObject()) { + SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject(); + + ObjectValue* sv = Compile::sv_for_node_id(objs, spobj->_idx); + if (sv == NULL) { + ciKlass* cik = t->is_oopptr()->klass(); + assert(cik->is_instance_klass() || + cik->is_array_klass(), "Not supported allocation."); + sv = new ObjectValue(spobj->_idx, + new ConstantOopWriteValue(cik->encoding())); + Compile::set_sv_for_object_node(objs, sv); + + uint first_ind = spobj->first_index(); + for (uint i = 0; i < spobj->n_fields(); i++) { + Node* fld_node = sfpt->in(first_ind+i); + (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs); + } + } + array->append(sv); + return; + } + // Grab the register number for the local OptoReg::Name regnum = _regalloc->get_reg_first(local); if( OptoReg::is_valid(regnum) ) {// Got a register/stack? @@ -755,6 +801,11 @@ JVMState* youngest_jvms = sfn->jvms(); int max_depth = youngest_jvms->depth(); + // Allocate the object pool for scalar-replaced objects -- the map from + // small-integer keys (which can be recorded in the local and ostack + // arrays) to descriptions of the object state. + GrowableArray *objs = new GrowableArray(); + // Visit scopes from oldest to youngest. for (int depth = 1; depth <= max_depth; depth++) { JVMState* jvms = youngest_jvms->of_depth(depth); @@ -773,13 +824,13 @@ // Insert locals into the locarray GrowableArray *locarray = new GrowableArray(num_locs); for( idx = 0; idx < num_locs; idx++ ) { - FillLocArray( idx, sfn->local(jvms, idx), locarray ); + FillLocArray( idx, sfn, sfn->local(jvms, idx), locarray, objs ); } // Insert expression stack entries into the exparray GrowableArray *exparray = new GrowableArray(num_exps); for( idx = 0; idx < num_exps; idx++ ) { - FillLocArray( idx, sfn->stack(jvms, idx), exparray ); + FillLocArray( idx, sfn, sfn->stack(jvms, idx), exparray, objs ); } // Add in mappings of the monitors @@ -803,7 +854,27 @@ // Create ScopeValue for object ScopeValue *scval = NULL; - if( !obj_node->is_Con() ) { + + if( obj_node->is_SafePointScalarObject() ) { + SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject(); + scval = Compile::sv_for_node_id(objs, spobj->_idx); + if (scval == NULL) { + const Type *t = obj_node->bottom_type(); + ciKlass* cik = t->is_oopptr()->klass(); + assert(cik->is_instance_klass() || + cik->is_array_klass(), "Not supported allocation."); + ObjectValue* sv = new ObjectValue(spobj->_idx, + new ConstantOopWriteValue(cik->encoding())); + Compile::set_sv_for_object_node(objs, sv); + + uint first_ind = spobj->first_index(); + for (uint i = 0; i < spobj->n_fields(); i++) { + Node* fld_node = sfn->in(first_ind+i); + (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs); + } + scval = sv; + } + } else if( !obj_node->is_Con() ) { OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node); scval = new_loc_value( _regalloc, obj_reg, Location::oop ); } else { @@ -811,9 +882,13 @@ } OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node); - monarray->append(new MonitorValue(scval, Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg)))); + Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg)); + monarray->append(new MonitorValue(scval, basic_lock, box_node->as_BoxLock()->is_eliminated())); } + // We dump the object pool first, since deoptimization reads it in first. + debug_info()->dump_object_pool(objs); + // Build first class objects to pass to scope DebugToken *locvals = debug_info()->create_scope_values(locarray); DebugToken *expvals = debug_info()->create_scope_values(exparray); @@ -823,6 +898,7 @@ ciMethod* scope_method = method ? method : _method; // Describe the scope here assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI"); + // Now we can describe the scope. debug_info()->describe_scope(safepoint_pc_offset,scope_method,jvms->bci(),locvals,expvals,monvals); } // End jvms loop @@ -921,11 +997,8 @@ // blown the code cache size. C->record_failure("excessive request to CodeCache"); } else { - UseInterpreter = true; - UseCompiler = false; - AlwaysCompileLoopMethods = false; + // Let CompilerBroker disable further compilations. C->record_failure("CodeCache is full"); - warning("CodeCache is full. Compiling has been disabled"); } } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/parse.hpp --- a/src/share/vm/opto/parse.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/parse.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -54,9 +54,9 @@ InlineTree *build_inline_tree_for_callee(ciMethod* callee_method, JVMState* caller_jvms, int caller_bci); - const char* try_to_inline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result); - const char* shouldInline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const; - const char* shouldNotInline(ciMethod* callee_method, WarmCallInfo* wci_result) const; + const char* try_to_inline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result); + const char* shouldInline(ciMethod* callee_method, ciMethod* caller_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const; + const char* shouldNotInline(ciMethod* callee_method, ciMethod* caller_method, WarmCallInfo* wci_result) const; void print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const PRODUCT_RETURN; InlineTree *caller_tree() const { return _caller_tree; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/parse1.cpp --- a/src/share/vm/opto/parse1.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/parse1.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1836,7 +1836,7 @@ PhiNode* phi = PhiNode::make(region, o, t); gvn().set_type(phi, t); - if (DoEscapeAnalysis) record_for_igvn(phi); + if (C->do_escape_analysis()) record_for_igvn(phi); map->set_req(idx, phi); return phi; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/parse2.cpp --- a/src/share/vm/opto/parse2.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/parse2.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -885,6 +885,9 @@ void Parse::do_ifnull(BoolTest::mask btest) { int target_bci = iter().get_dest(); + Block* branch_block = successor_for_bci(target_bci); + Block* next_block = successor_for_bci(iter().next_bci()); + float cnt; float prob = branch_prediction(cnt, btest, target_bci); if (prob == PROB_UNKNOWN) { @@ -902,13 +905,16 @@ uncommon_trap(Deoptimization::Reason_unreached, Deoptimization::Action_reinterpret, NULL, "cold"); + if (EliminateAutoBox) { + // Mark the successor blocks as parsed + branch_block->next_path_num(); + next_block->next_path_num(); + } return; } // If this is a backwards branch in the bytecodes, add Safepoint maybe_add_safepoint(target_bci); - Block* branch_block = successor_for_bci(target_bci); - Block* next_block = successor_for_bci(iter().next_bci()); explicit_null_checks_inserted++; Node* a = null(); @@ -935,6 +941,10 @@ if (stopped()) { // Path is dead? explicit_null_checks_elided++; + if (EliminateAutoBox) { + // Mark the successor block as parsed + branch_block->next_path_num(); + } } else { // Path is live. // Update method data profile_taken_branch(target_bci); @@ -950,6 +960,10 @@ if (stopped()) { // Path is dead? explicit_null_checks_elided++; + if (EliminateAutoBox) { + // Mark the successor block as parsed + next_block->next_path_num(); + } } else { // Path is live. // Update method data profile_not_taken_branch(); @@ -962,6 +976,9 @@ void Parse::do_if(BoolTest::mask btest, Node* c) { int target_bci = iter().get_dest(); + Block* branch_block = successor_for_bci(target_bci); + Block* next_block = successor_for_bci(iter().next_bci()); + float cnt; float prob = branch_prediction(cnt, btest, target_bci); float untaken_prob = 1.0 - prob; @@ -980,6 +997,11 @@ uncommon_trap(Deoptimization::Reason_unreached, Deoptimization::Action_reinterpret, NULL, "cold"); + if (EliminateAutoBox) { + // Mark the successor blocks as parsed + branch_block->next_path_num(); + next_block->next_path_num(); + } return; } @@ -1000,10 +1022,27 @@ Node* tst = _gvn.transform(tst0); BoolTest::mask taken_btest = BoolTest::illegal; BoolTest::mask untaken_btest = BoolTest::illegal; - if (btest == BoolTest::ne) { - // For now, these are the only cases of btest that matter. (More later.) - taken_btest = taken_if_true ? btest : BoolTest::eq; - untaken_btest = taken_if_true ? BoolTest::eq : btest; + + if (tst->is_Bool()) { + // Refresh c from the transformed bool node, since it may be + // simpler than the original c. Also re-canonicalize btest. + // This wins when (Bool ne (Conv2B p) 0) => (Bool ne (CmpP p NULL)). + // That can arise from statements like: if (x instanceof C) ... + if (tst != tst0) { + // Canonicalize one more time since transform can change it. + btest = tst->as_Bool()->_test._test; + if (!BoolTest(btest).is_canonical()) { + // Reverse edges one more time... + tst = _gvn.transform( tst->as_Bool()->negate(&_gvn) ); + btest = tst->as_Bool()->_test._test; + assert(BoolTest(btest).is_canonical(), "sanity"); + taken_if_true = !taken_if_true; + } + c = tst->in(1); + } + BoolTest::mask neg_btest = BoolTest(btest).negate(); + taken_btest = taken_if_true ? btest : neg_btest; + untaken_btest = taken_if_true ? neg_btest : btest; } // Generate real control flow @@ -1018,15 +1057,17 @@ untaken_branch = tmp; } - Block* branch_block = successor_for_bci(target_bci); - Block* next_block = successor_for_bci(iter().next_bci()); - // Branch is taken: { PreserveJVMState pjvms(this); taken_branch = _gvn.transform(taken_branch); set_control(taken_branch); - if (!stopped()) { + if (stopped()) { + if (EliminateAutoBox) { + // Mark the successor block as parsed + branch_block->next_path_num(); + } + } else { // Update method data profile_taken_branch(target_bci); adjust_map_after_if(taken_btest, c, prob, branch_block, next_block); @@ -1039,7 +1080,12 @@ set_control(untaken_branch); // Branch not taken. - if (!stopped()) { + if (stopped()) { + if (EliminateAutoBox) { + // Mark the successor block as parsed + next_block->next_path_num(); + } + } else { // Update method data profile_not_taken_branch(); adjust_map_after_if(untaken_btest, c, untaken_prob, diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/phaseX.cpp --- a/src/share/vm/opto/phaseX.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/phaseX.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -587,11 +587,6 @@ Node_Notes* loc = C->locate_node_notes(nna, x->_idx, true); loc->clear(); // do not put debug info on constants } - // Collect points-to information for escape analysys - ConnectionGraph *cgr = C->congraph(); - if (cgr != NULL) { - cgr->record_escape(x, this); - } } else { x->destruct(); // Hit, destroy duplicate constant x = k; // use existing constant @@ -648,79 +643,9 @@ //============================================================================= //------------------------------transform-------------------------------------- // Return a node which computes the same function as this node, but in a -// faster or cheaper fashion. The Node passed in here must have no other -// pointers to it, as its storage will be reclaimed if the Node can be -// optimized away. +// faster or cheaper fashion. Node *PhaseGVN::transform( Node *n ) { - NOT_PRODUCT( set_transforms(); ) - - // Apply the Ideal call in a loop until it no longer applies - Node *k = n; - NOT_PRODUCT( uint loop_count = 0; ) - while( 1 ) { - Node *i = k->Ideal(this, /*can_reshape=*/false); - if( !i ) break; - assert( i->_idx >= k->_idx, "Idealize should return new nodes, use Identity to return old nodes" ); - // Can never reclaim storage for Ideal calls, because the Ideal call - // returns a new Node, bumping the High Water Mark and our old Node - // is caught behind the new one. - //if( k != i ) { - //k->destruct(); // Reclaim storage for recent node - k = i; - //} - assert(loop_count++ < K, "infinite loop in PhaseGVN::transform"); - } - NOT_PRODUCT( if( loop_count != 0 ) { set_progress(); } ) - - // If brand new node, make space in type array. - ensure_type_or_null(k); - - // Cache result of Value call since it can be expensive - // (abstract interpretation of node 'k' using phase->_types[ inputs ]) - const Type *t = k->Value(this); // Get runtime Value set - assert(t != NULL, "value sanity"); - if (type_or_null(k) != t) { -#ifndef PRODUCT - // Do not record transformation or value construction on first visit - if (type_or_null(k) == NULL) { - inc_new_values(); - set_progress(); - } -#endif - set_type(k, t); - // If k is a TypeNode, capture any more-precise type permanently into Node - k->raise_bottom_type(t); - } - - if( t->singleton() && !k->is_Con() ) { - //k->destruct(); // Reclaim storage for recent node - NOT_PRODUCT( set_progress(); ) - return makecon(t); // Turn into a constant - } - - // Now check for Identities - Node *i = k->Identity(this); // Look for a nearby replacement - if( i != k ) { // Found? Return replacement! - //k->destruct(); // Reclaim storage for recent node - NOT_PRODUCT( set_progress(); ) - return i; - } - - // Try Global Value Numbering - i = hash_find_insert(k); // Found older value when i != NULL - if( i && i != k ) { // Hit? Return the old guy - NOT_PRODUCT( set_progress(); ) - return i; - } - - // Collect points-to information for escape analysys - ConnectionGraph *cgr = C->congraph(); - if (cgr != NULL) { - cgr->record_escape(k, this); - } - - // Return Idealized original - return k; + return transform_no_reclaim(n); } //------------------------------transform-------------------------------------- @@ -1309,7 +1234,7 @@ uint use_op = use->Opcode(); // If changed Cast input, check Phi users for simple cycles - if( use->is_ConstraintCast() || use->Opcode() == Op_CheckCastPP ) { + if( use->is_ConstraintCast() || use->is_CheckCastPP() ) { for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) { Node* u = use->fast_out(i2); if (u->is_Phi()) diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/phaseX.hpp --- a/src/share/vm/opto/phaseX.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/phaseX.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -439,6 +439,13 @@ void add_users_to_worklist0( Node *n ); void add_users_to_worklist ( Node *n ); + // Replace old node with new one. + void replace_node( Node *old, Node *nn ) { + add_users_to_worklist(old); + hash_delete(old); + subsume_node(old, nn); + } + #ifndef PRODUCT protected: // Sub-quadratic implementation of VerifyIterativeGVN. diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/postaloc.cpp --- a/src/share/vm/opto/postaloc.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/postaloc.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -253,7 +253,8 @@ // nodes can represent the same constant so the type and rule of the // MachNode must be checked to ensure equivalence. // -bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Block *current_block, +bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Node* n, + Block *current_block, Node_List& value, Node_List& regnd, OptoReg::Name nreg, OptoReg::Name nreg2) { if (value[nreg] != val && val->is_Con() && @@ -269,12 +270,12 @@ // Since they are equivalent the second one if redundant and can // be removed. // - // val will be replaced with the old value but val might have + // n will be replaced with the old value but n might have // kills projections associated with it so remove them now so that // yank_if_dead will be able to elminate the copy once the uses // have been transferred to the old[value]. - for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) { - Node* use = val->fast_out(i); + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* use = n->fast_out(i); if (use->is_Proj() && use->outcnt() == 0) { // Kill projections have no users and one input use->set_req(0, C->top()); @@ -521,7 +522,7 @@ // then 'n' is a useless copy. Do not update the register->node // mapping so 'n' will go dead. if( value[nreg] != val ) { - if (eliminate_copy_of_constant(val, b, value, regnd, nreg, OptoReg::Bad)) { + if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, OptoReg::Bad)) { n->replace_by(regnd[nreg]); j -= yank_if_dead(n,b,&value,®nd); } else { @@ -549,7 +550,7 @@ nreg_lo = tmp.find_first_elem(); } if( value[nreg] != val || value[nreg_lo] != val ) { - if (eliminate_copy_of_constant(n, b, value, regnd, nreg, nreg_lo)) { + if (eliminate_copy_of_constant(val, n, b, value, regnd, nreg, nreg_lo)) { n->replace_by(regnd[nreg]); j -= yank_if_dead(n,b,&value,®nd); } else { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/subnode.cpp --- a/src/share/vm/opto/subnode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/subnode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -614,6 +614,13 @@ const TypeOopPtr* p0 = r0->isa_oopptr(); const TypeOopPtr* p1 = r1->isa_oopptr(); if (p0 && p1) { + Node* in1 = in(1)->uncast(); + Node* in2 = in(2)->uncast(); + AllocateNode* alloc1 = AllocateNode::Ideal_allocation(in1, NULL); + AllocateNode* alloc2 = AllocateNode::Ideal_allocation(in2, NULL); + if (MemNode::detect_ptr_independence(in1, alloc1, in2, alloc2, NULL)) { + return TypeInt::CC_GT; // different pointers + } ciKlass* klass0 = p0->klass(); bool xklass0 = p0->klass_is_exact(); ciKlass* klass1 = p1->klass(); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/superword.cpp --- a/src/share/vm/opto/superword.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/superword.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -159,7 +159,8 @@ Node_List memops; for (int i = 0; i < _block.length(); i++) { Node* n = _block.at(i); - if (n->is_Mem() && in_bb(n)) { + if (n->is_Mem() && in_bb(n) && + is_java_primitive(n->as_Mem()->memory_type())) { int align = memory_alignment(n->as_Mem(), 0); if (align != bottom_align) { memops.push(n); @@ -182,8 +183,8 @@ #ifndef PRODUCT if (TraceSuperWord) - tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d", - offset, iv_adjustment, align_to_ref_p.memory_size()); + tty->print_cr("\noffset = %d iv_adjustment = %d elt_align = %d scale = %d iv_stride = %d", + offset, iv_adjustment, align_to_ref_p.memory_size(), align_to_ref_p.scale_in_bytes(), iv_stride()); #endif // Set alignment relative to "align_to_ref" @@ -570,7 +571,7 @@ int SuperWord::data_size(Node* s) { const Type* t = velt_type(s); BasicType bt = t->array_element_basic_type(); - int bsize = type2aelembytes[bt]; + int bsize = type2aelembytes(bt); assert(bsize != 0, "valid size"); return bsize; } @@ -1542,7 +1543,7 @@ Node *pre_opaq1 = pre_end->limit(); assert(pre_opaq1->Opcode() == Op_Opaque1, ""); Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1; - Node *pre_limit = pre_opaq->in(1); + Node *lim0 = pre_opaq->in(1); // Where we put new limit calculations Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl); @@ -1554,64 +1555,116 @@ SWPointer align_to_ref_p(align_to_ref, this); - // Let l0 == original pre_limit, l == new pre_limit, V == v_align + // Given: + // lim0 == original pre loop limit + // V == v_align (power of 2) + // invar == extra invariant piece of the address expression + // e == k [ +/- invar ] + // + // When reassociating expressions involving '%' the basic rules are: + // (a - b) % k == 0 => a % k == b % k + // and: + // (a + b) % k == 0 => a % k == (k - b) % k + // + // For stride > 0 && scale > 0, + // Derive the new pre-loop limit "lim" such that the two constraints: + // (1) lim = lim0 + N (where N is some positive integer < V) + // (2) (e + lim) % V == 0 + // are true. + // + // Substituting (1) into (2), + // (e + lim0 + N) % V == 0 + // solve for N: + // N = (V - (e + lim0)) % V + // substitute back into (1), so that new limit + // lim = lim0 + (V - (e + lim0)) % V // - // For stride > 0 - // Need l such that l > l0 && (l+k)%V == 0 - // Find n such that l = (l0 + n) - // (l0 + n + k) % V == 0 - // n = [V - (l0 + k)%V]%V - // new limit = l0 + [V - (l0 + k)%V]%V - // For stride < 0 - // Need l such that l < l0 && (l+k)%V == 0 - // Find n such that l = (l0 - n) - // (l0 - n + k) % V == 0 - // n = (l0 + k)%V - // new limit = l0 - (l0 + k)%V + // For stride > 0 && scale < 0 + // Constraints: + // lim = lim0 + N + // (e - lim) % V == 0 + // Solving for lim: + // (e - lim0 - N) % V == 0 + // N = (e - lim0) % V + // lim = lim0 + (e - lim0) % V + // + // For stride < 0 && scale > 0 + // Constraints: + // lim = lim0 - N + // (e + lim) % V == 0 + // Solving for lim: + // (e + lim0 - N) % V == 0 + // N = (e + lim0) % V + // lim = lim0 - (e + lim0) % V + // + // For stride < 0 && scale < 0 + // Constraints: + // lim = lim0 - N + // (e - lim) % V == 0 + // Solving for lim: + // (e - lim0 + N) % V == 0 + // N = (V - (e - lim0)) % V + // lim = lim0 - (V - (e - lim0)) % V + int stride = iv_stride(); + int scale = align_to_ref_p.scale_in_bytes(); int elt_size = align_to_ref_p.memory_size(); int v_align = vector_width_in_bytes() / elt_size; int k = align_to_ref_p.offset_in_bytes() / elt_size; Node *kn = _igvn.intcon(k); - Node *limk = new (_phase->C, 3) AddINode(pre_limit, kn); - _phase->_igvn.register_new_node_with_optimizer(limk); - _phase->set_ctrl(limk, pre_ctrl); + + Node *e = kn; if (align_to_ref_p.invar() != NULL) { + // incorporate any extra invariant piece producing k +/- invar >>> log2(elt) Node* log2_elt = _igvn.intcon(exact_log2(elt_size)); Node* aref = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt); _phase->_igvn.register_new_node_with_optimizer(aref); _phase->set_ctrl(aref, pre_ctrl); - if (!align_to_ref_p.negate_invar()) { - limk = new (_phase->C, 3) AddINode(limk, aref); + if (align_to_ref_p.negate_invar()) { + e = new (_phase->C, 3) SubINode(e, aref); } else { - limk = new (_phase->C, 3) SubINode(limk, aref); + e = new (_phase->C, 3) AddINode(e, aref); } - _phase->_igvn.register_new_node_with_optimizer(limk); - _phase->set_ctrl(limk, pre_ctrl); + _phase->_igvn.register_new_node_with_optimizer(e); + _phase->set_ctrl(e, pre_ctrl); } - Node* va_msk = _igvn.intcon(v_align - 1); - Node* n = new (_phase->C, 3) AndINode(limk, va_msk); - _phase->_igvn.register_new_node_with_optimizer(n); - _phase->set_ctrl(n, pre_ctrl); - Node* newlim; - if (iv_stride() > 0) { + + // compute e +/- lim0 + if (scale < 0) { + e = new (_phase->C, 3) SubINode(e, lim0); + } else { + e = new (_phase->C, 3) AddINode(e, lim0); + } + _phase->_igvn.register_new_node_with_optimizer(e); + _phase->set_ctrl(e, pre_ctrl); + + if (stride * scale > 0) { + // compute V - (e +/- lim0) Node* va = _igvn.intcon(v_align); - Node* adj = new (_phase->C, 3) SubINode(va, n); - _phase->_igvn.register_new_node_with_optimizer(adj); - _phase->set_ctrl(adj, pre_ctrl); - Node* adj2 = new (_phase->C, 3) AndINode(adj, va_msk); - _phase->_igvn.register_new_node_with_optimizer(adj2); - _phase->set_ctrl(adj2, pre_ctrl); - newlim = new (_phase->C, 3) AddINode(pre_limit, adj2); + e = new (_phase->C, 3) SubINode(va, e); + _phase->_igvn.register_new_node_with_optimizer(e); + _phase->set_ctrl(e, pre_ctrl); + } + // compute N = (exp) % V + Node* va_msk = _igvn.intcon(v_align - 1); + Node* N = new (_phase->C, 3) AndINode(e, va_msk); + _phase->_igvn.register_new_node_with_optimizer(N); + _phase->set_ctrl(N, pre_ctrl); + + // substitute back into (1), so that new limit + // lim = lim0 + N + Node* lim; + if (stride < 0) { + lim = new (_phase->C, 3) SubINode(lim0, N); } else { - newlim = new (_phase->C, 3) SubINode(pre_limit, n); + lim = new (_phase->C, 3) AddINode(lim0, N); } - _phase->_igvn.register_new_node_with_optimizer(newlim); - _phase->set_ctrl(newlim, pre_ctrl); + _phase->_igvn.register_new_node_with_optimizer(lim); + _phase->set_ctrl(lim, pre_ctrl); Node* constrained = - (iv_stride() > 0) ? (Node*) new (_phase->C,3) MinINode(newlim, orig_limit) - : (Node*) new (_phase->C,3) MaxINode(newlim, orig_limit); + (stride > 0) ? (Node*) new (_phase->C,3) MinINode(lim, orig_limit) + : (Node*) new (_phase->C,3) MaxINode(lim, orig_limit); _phase->_igvn.register_new_node_with_optimizer(constrained); _phase->set_ctrl(constrained, pre_ctrl); _igvn.hash_delete(pre_opaq); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/type.cpp --- a/src/share/vm/opto/type.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/type.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -3164,7 +3164,7 @@ case TopPTR: // Compute new klass on demand, do not use tap->_klass xk = (tap->_klass_is_exact | this->_klass_is_exact); - return make( ptr, const_oop(), tary, lazy_klass, xk, off ); + return make( ptr, const_oop(), tary, lazy_klass, xk, off, iid ); case Constant: { ciObject* o = const_oop(); if( _ptr == Constant ) { @@ -3176,7 +3176,7 @@ o = tap->const_oop(); } xk = true; - return TypeAryPtr::make( ptr, o, tary, tap->_klass, xk, off ); + return TypeAryPtr::make( ptr, o, tary, tap->_klass, xk, off, iid ); } case NotNull: case BotPTR: @@ -3263,14 +3263,21 @@ break; } - st->print("*"); + if( _offset != 0 ) { + int header_size = objArrayOopDesc::header_size() * wordSize; + if( _offset == OffsetTop ) st->print("+undefined"); + else if( _offset == OffsetBot ) st->print("+any"); + else if( _offset < header_size ) st->print("+%d", _offset); + else { + BasicType basic_elem_type = elem()->basic_type(); + int array_base = arrayOopDesc::base_offset_in_bytes(basic_elem_type); + int elem_size = type2aelembytes(basic_elem_type); + st->print("[%d]", (_offset - array_base)/elem_size); + } + } + st->print(" *"); if (_instance_id != UNKNOWN_INSTANCE) st->print(",iid=%d",_instance_id); - if( !_offset ) return; - if( _offset == OffsetTop ) st->print("+undefined"); - else if( _offset == OffsetBot ) st->print("+any"); - else if( _offset < 12 ) st->print("+%d",_offset); - else st->print("[%d]", (_offset-12)/4 ); } #endif diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/type.hpp --- a/src/share/vm/opto/type.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/type.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -686,6 +686,7 @@ bool klass_is_exact() const { return _klass_is_exact; } bool is_instance() const { return _instance_id != UNKNOWN_INSTANCE; } uint instance_id() const { return _instance_id; } + bool is_instance_field() const { return _instance_id != UNKNOWN_INSTANCE && _offset >= 0; } virtual intptr_t get_con() const; @@ -1070,6 +1071,7 @@ #define LShiftXNode LShiftLNode // For object size computation: #define AddXNode AddLNode +#define RShiftXNode RShiftLNode // For card marks and hashcodes #define URShiftXNode URShiftLNode // Opcodes @@ -1108,6 +1110,7 @@ #define LShiftXNode LShiftINode // For object size computation: #define AddXNode AddINode +#define RShiftXNode RShiftINode // For card marks and hashcodes #define URShiftXNode URShiftINode // Opcodes diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/vectornode.cpp --- a/src/share/vm/opto/vectornode.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/vectornode.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -135,7 +135,7 @@ int mid = lo + ct/2; Node* n1 = ct == 2 ? in(lo) : binaryTreePack(C, lo, mid); Node* n2 = ct == 2 ? in(lo+1) : binaryTreePack(C, mid, hi ); - int rslt_bsize = ct * type2aelembytes[elt_basic_type()]; + int rslt_bsize = ct * type2aelembytes(elt_basic_type()); if (bottom_type()->is_floatingpoint()) { switch (rslt_bsize) { case 8: return new (C, 3) PackFNode(n1, n2); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/opto/vectornode.hpp --- a/src/share/vm/opto/vectornode.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/opto/vectornode.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -48,7 +48,7 @@ uint length() const { return _length; } // Vector length static uint max_vlen(BasicType bt) { // max vector length - return (uint)(Matcher::vector_width_in_bytes() / type2aelembytes[bt]); + return (uint)(Matcher::vector_width_in_bytes() / type2aelembytes(bt)); } // Element and vector type @@ -392,7 +392,7 @@ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); } virtual BasicType memory_type() const { return T_VOID; } - virtual int memory_size() const { return length()*type2aelembytes[elt_basic_type()]; } + virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); } // Vector opcode from scalar opcode static int opcode(int sopc, uint vlen); @@ -620,7 +620,7 @@ virtual uint ideal_reg() const { return Matcher::vector_ideal_reg(); } virtual BasicType memory_type() const { return T_VOID; } - virtual int memory_size() const { return length()*type2aelembytes[elt_basic_type()]; } + virtual int memory_size() const { return length()*type2aelembytes(elt_basic_type()); } // Vector opcode from scalar opcode static int opcode(int sopc, uint vlen); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/prims/jvmtiRedefineClassesTrace.hpp --- a/src/share/vm/prims/jvmtiRedefineClassesTrace.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/prims/jvmtiRedefineClassesTrace.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -64,7 +64,7 @@ // 0x01000000 | 16777216 - impl details: nmethod evolution info // 0x02000000 | 33554432 - impl details: annotation updates // 0x04000000 | 67108864 - impl details: StackMapTable updates -// 0x08000000 | 134217728 - unused +// 0x08000000 | 134217728 - impl details: OopMapCache updates // 0x10000000 | 268435456 - unused // 0x20000000 | 536870912 - unused // 0x40000000 | 1073741824 - unused diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/arguments.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -924,10 +924,18 @@ void Arguments::set_parnew_gc_flags() { assert(!UseSerialGC && !UseParallelGC, "control point invariant"); + // Turn off AdaptiveSizePolicy by default for parnew until it is + // complete. + if (UseParNewGC && + FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) { + FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false); + } + if (FLAG_IS_DEFAULT(UseParNewGC) && ParallelGCThreads > 1) { FLAG_SET_DEFAULT(UseParNewGC, true); } else if (UseParNewGC && ParallelGCThreads == 0) { - FLAG_SET_DEFAULT(ParallelGCThreads, nof_parallel_gc_threads()); + FLAG_SET_DEFAULT(ParallelGCThreads, + Abstract_VM_Version::parallel_worker_threads()); if (FLAG_IS_DEFAULT(ParallelGCThreads) && ParallelGCThreads == 1) { FLAG_SET_DEFAULT(UseParNewGC, false); } @@ -956,25 +964,6 @@ } } -// CAUTION: this code is currently shared by UseParallelGC, UseParNewGC and -// UseconcMarkSweepGC. Further tuning of individual collectors might -// dictate refinement on a per-collector basis. -int Arguments::nof_parallel_gc_threads() { - if (FLAG_IS_DEFAULT(ParallelGCThreads)) { - // For very large machines, there are diminishing returns - // for large numbers of worker threads. Instead of - // hogging the whole system, use 5/8ths of a worker for every - // processor after the first 8. For example, on a 72 cpu - // machine use 8 + (72 - 8) * (5/8) == 48 worker threads. - // This is just a start and needs further tuning and study in - // Tiger. - int ncpus = os::active_processor_count(); - return (ncpus <= 8) ? ncpus : 3 + ((ncpus * 5) / 8); - } else { - return ParallelGCThreads; - } -} - // Adjust some sizes to suit CMS and/or ParNew needs; these work well on // sparc/solaris for certain applications, but would gain from // further optimization and tuning efforts, and would almost @@ -984,26 +973,24 @@ return; } + assert(UseConcMarkSweepGC, "CMS is expected to be on here"); + // If we are using CMS, we prefer to UseParNewGC, // unless explicitly forbidden. - if (UseConcMarkSweepGC && !UseParNewGC && FLAG_IS_DEFAULT(UseParNewGC)) { - FLAG_SET_DEFAULT(UseParNewGC, true); + if (!UseParNewGC && FLAG_IS_DEFAULT(UseParNewGC)) { + FLAG_SET_ERGO(bool, UseParNewGC, true); } // Turn off AdaptiveSizePolicy by default for cms until it is - // complete. Also turn it off in general if the - // parnew collector has been selected. - if ((UseConcMarkSweepGC || UseParNewGC) && - FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) { + // complete. + if (FLAG_IS_DEFAULT(UseAdaptiveSizePolicy)) { FLAG_SET_DEFAULT(UseAdaptiveSizePolicy, false); } // In either case, adjust ParallelGCThreads and/or UseParNewGC // as needed. - set_parnew_gc_flags(); - - if (!UseConcMarkSweepGC) { - return; + if (UseParNewGC) { + set_parnew_gc_flags(); } // Now make adjustments for CMS @@ -1013,7 +1000,7 @@ intx tenuring_default; if (CMSUseOldDefaults) { // old defaults: "old" as of 6.0 if FLAG_IS_DEFAULT(CMSYoungGenPerWorker) { - FLAG_SET_DEFAULT(CMSYoungGenPerWorker, 4*M); + FLAG_SET_ERGO(intx, CMSYoungGenPerWorker, 4*M); } young_gen_per_worker = 4*M; new_ratio = (intx)15; @@ -1038,16 +1025,20 @@ // for "short" pauses ~ 4M*ParallelGCThreads if (FLAG_IS_DEFAULT(MaxNewSize)) { // MaxNewSize not set at command-line if (!FLAG_IS_DEFAULT(NewSize)) { // NewSize explicitly set at command-line - FLAG_SET_DEFAULT(MaxNewSize, MAX2(NewSize, preferred_max_new_size)); + FLAG_SET_ERGO(uintx, MaxNewSize, MAX2(NewSize, preferred_max_new_size)); } else { - FLAG_SET_DEFAULT(MaxNewSize, preferred_max_new_size); + FLAG_SET_ERGO(uintx, MaxNewSize, preferred_max_new_size); } + if(PrintGCDetails && Verbose) { + // Too early to use gclog_or_tty + tty->print_cr("Ergo set MaxNewSize: " SIZE_FORMAT, MaxNewSize); + } } // Unless explicitly requested otherwise, prefer a large // Old to Young gen size so as to shift the collection load // to the old generation concurrent collector if (FLAG_IS_DEFAULT(NewRatio)) { - FLAG_SET_DEFAULT(NewRatio, MAX2(NewRatio, new_ratio)); + FLAG_SET_ERGO(intx, NewRatio, MAX2(NewRatio, new_ratio)); size_t min_new = align_size_up(ScaleForWordSize(min_new_default), os::vm_page_size()); size_t prev_initial_size = initial_heap_size(); @@ -1065,19 +1056,34 @@ size_t max_heap = align_size_down(MaxHeapSize, CardTableRS::ct_max_alignment_constraint()); + if(PrintGCDetails && Verbose) { + // Too early to use gclog_or_tty + tty->print_cr("CMS set min_heap_size: " SIZE_FORMAT + " initial_heap_size: " SIZE_FORMAT + " max_heap: " SIZE_FORMAT, + min_heap_size(), initial_heap_size(), max_heap); + } if (max_heap > min_new) { // Unless explicitly requested otherwise, make young gen // at least min_new, and at most preferred_max_new_size. if (FLAG_IS_DEFAULT(NewSize)) { - FLAG_SET_DEFAULT(NewSize, MAX2(NewSize, min_new)); - FLAG_SET_DEFAULT(NewSize, MIN2(preferred_max_new_size, NewSize)); + FLAG_SET_ERGO(uintx, NewSize, MAX2(NewSize, min_new)); + FLAG_SET_ERGO(uintx, NewSize, MIN2(preferred_max_new_size, NewSize)); + if(PrintGCDetails && Verbose) { + // Too early to use gclog_or_tty + tty->print_cr("Ergo set NewSize: " SIZE_FORMAT, NewSize); + } } // Unless explicitly requested otherwise, size old gen // so that it's at least 3X of NewSize to begin with; // later NewRatio will decide how it grows; see above. if (FLAG_IS_DEFAULT(OldSize)) { if (max_heap > NewSize) { - FLAG_SET_DEFAULT(OldSize, MIN2(3*NewSize, max_heap - NewSize)); + FLAG_SET_ERGO(uintx, OldSize, MIN2(3*NewSize, max_heap - NewSize)); + if(PrintGCDetails && Verbose) { + // Too early to use gclog_or_tty + tty->print_cr("Ergo set OldSize: " SIZE_FORMAT, OldSize); + } } } } @@ -1086,14 +1092,14 @@ // promote all objects surviving "tenuring_default" scavenges. if (FLAG_IS_DEFAULT(MaxTenuringThreshold) && FLAG_IS_DEFAULT(SurvivorRatio)) { - FLAG_SET_DEFAULT(MaxTenuringThreshold, tenuring_default); + FLAG_SET_ERGO(intx, MaxTenuringThreshold, tenuring_default); } // If we decided above (or user explicitly requested) // `promote all' (via MaxTenuringThreshold := 0), // prefer minuscule survivor spaces so as not to waste // space for (non-existent) survivors if (FLAG_IS_DEFAULT(SurvivorRatio) && MaxTenuringThreshold == 0) { - FLAG_SET_DEFAULT(SurvivorRatio, MAX2((intx)1024, SurvivorRatio)); + FLAG_SET_ERGO(intx, SurvivorRatio, MAX2((intx)1024, SurvivorRatio)); } // If OldPLABSize is set and CMSParPromoteBlocksToClaim is not, // set CMSParPromoteBlocksToClaim equal to OldPLABSize. @@ -1102,7 +1108,11 @@ // See CR 6362902. if (!FLAG_IS_DEFAULT(OldPLABSize)) { if (FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim)) { - FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, OldPLABSize); + // OldPLABSize is not the default value but CMSParPromoteBlocksToClaim + // is. In this situtation let CMSParPromoteBlocksToClaim follow + // the value (either from the command line or ergonomics) of + // OldPLABSize. Following OldPLABSize is an ergonomics decision. + FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, OldPLABSize); } else { // OldPLABSize and CMSParPromoteBlocksToClaim are both set. @@ -1147,17 +1157,11 @@ FLAG_IS_DEFAULT(UseParallelGC)) { if (should_auto_select_low_pause_collector()) { FLAG_SET_ERGO(bool, UseConcMarkSweepGC, true); - set_cms_and_parnew_gc_flags(); } else { FLAG_SET_ERGO(bool, UseParallelGC, true); } no_shared_spaces(); } - - // This is here because the parallel collector could - // have been selected so this initialization should - // still be done. - set_parallel_gc_flags(); } } @@ -1170,6 +1174,9 @@ // If no heap maximum was requested explicitly, use some reasonable fraction // of the physical memory, up to a maximum of 1GB. if (UseParallelGC) { + FLAG_SET_ERGO(uintx, ParallelGCThreads, + Abstract_VM_Version::parallel_worker_threads()); + if (FLAG_IS_DEFAULT(MaxHeapSize)) { const uint64_t reasonable_fraction = os::physical_memory() / DefaultMaxRAMFraction; @@ -1227,12 +1234,13 @@ if (UseParallelOldGC) { // Par compact uses lower default values since they are treated as - // minimums. + // minimums. These are different defaults because of the different + // interpretation and are not ergonomically set. if (FLAG_IS_DEFAULT(MarkSweepDeadRatio)) { - MarkSweepDeadRatio = 1; + FLAG_SET_DEFAULT(MarkSweepDeadRatio, 1); } if (FLAG_IS_DEFAULT(PermMarkSweepDeadRatio)) { - PermMarkSweepDeadRatio = 5; + FLAG_SET_DEFAULT(PermMarkSweepDeadRatio, 5); } } } @@ -1254,13 +1262,30 @@ // Aggressive optimization flags -XX:+AggressiveOpts void Arguments::set_aggressive_opts_flags() { +#ifdef COMPILER2 + if (AggressiveOpts || !FLAG_IS_DEFAULT(AutoBoxCacheMax)) { + if (FLAG_IS_DEFAULT(EliminateAutoBox)) { + FLAG_SET_DEFAULT(EliminateAutoBox, true); + } + if (FLAG_IS_DEFAULT(AutoBoxCacheMax)) { + FLAG_SET_DEFAULT(AutoBoxCacheMax, 20000); + } + + // Feed the cache size setting into the JDK + char buffer[1024]; + sprintf(buffer, "java.lang.Integer.IntegerCache.high=%d", AutoBoxCacheMax); + add_property(buffer); + } + if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) { + FLAG_SET_DEFAULT(DoEscapeAnalysis, true); + } +#endif + if (AggressiveOpts) { -NOT_WINDOWS( - // No measured benefit on Windows - if (FLAG_IS_DEFAULT(CacheTimeMillis)) { - FLAG_SET_DEFAULT(CacheTimeMillis, true); - } -) +// Sample flag setting code +// if (FLAG_IS_DEFAULT(EliminateZeroing)) { +// FLAG_SET_DEFAULT(EliminateZeroing, true); +// } } } @@ -1312,6 +1337,31 @@ UseParallelOldGC)); } +// Check consistency of GC selection +bool Arguments::check_gc_consistency() { + bool status = true; + // Ensure that the user has not selected conflicting sets + // of collectors. [Note: this check is merely a user convenience; + // collectors over-ride each other so that only a non-conflicting + // set is selected; however what the user gets is not what they + // may have expected from the combination they asked for. It's + // better to reduce user confusion by not allowing them to + // select conflicting combinations. + uint i = 0; + if (UseSerialGC) i++; + if (UseConcMarkSweepGC || UseParNewGC) i++; + if (UseParallelGC || UseParallelOldGC) i++; + if (i > 1) { + jio_fprintf(defaultStream::error_stream(), + "Conflicting collector combinations in option list; " + "please refer to the release notes for the combinations " + "allowed\n"); + status = false; + } + + return status; +} + // Check the consistency of vm_init_args bool Arguments::check_vm_args_consistency() { // Method for adding checks for flag consistency. @@ -1354,14 +1404,14 @@ status = false; } - status &= verify_percentage(MaxLiveObjectEvacuationRatio, + status = status && verify_percentage(MaxLiveObjectEvacuationRatio, "MaxLiveObjectEvacuationRatio"); - status &= verify_percentage(AdaptiveSizePolicyWeight, + status = status && verify_percentage(AdaptiveSizePolicyWeight, "AdaptiveSizePolicyWeight"); - status &= verify_percentage(AdaptivePermSizeWeight, "AdaptivePermSizeWeight"); - status &= verify_percentage(ThresholdTolerance, "ThresholdTolerance"); - status &= verify_percentage(MinHeapFreeRatio, "MinHeapFreeRatio"); - status &= verify_percentage(MaxHeapFreeRatio, "MaxHeapFreeRatio"); + status = status && verify_percentage(AdaptivePermSizeWeight, "AdaptivePermSizeWeight"); + status = status && verify_percentage(ThresholdTolerance, "ThresholdTolerance"); + status = status && verify_percentage(MinHeapFreeRatio, "MinHeapFreeRatio"); + status = status && verify_percentage(MaxHeapFreeRatio, "MaxHeapFreeRatio"); if (MinHeapFreeRatio > MaxHeapFreeRatio) { jio_fprintf(defaultStream::error_stream(), @@ -1377,14 +1427,14 @@ MarkSweepAlwaysCompactCount = 1; // Move objects every gc. } - status &= verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit"); - status &= verify_percentage(GCTimeLimit, "GCTimeLimit"); + status = status && verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit"); + status = status && verify_percentage(GCTimeLimit, "GCTimeLimit"); if (GCTimeLimit == 100) { // Turn off gc-overhead-limit-exceeded checks FLAG_SET_DEFAULT(UseGCOverheadLimit, false); } - status &= verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit"); + status = status && verify_percentage(GCHeapFreeLimit, "GCHeapFreeLimit"); // Check user specified sharing option conflict with Parallel GC bool cannot_share = (UseConcMarkSweepGC || UseParallelGC || @@ -1402,24 +1452,7 @@ } } - // Ensure that the user has not selected conflicting sets - // of collectors. [Note: this check is merely a user convenience; - // collectors over-ride each other so that only a non-conflicting - // set is selected; however what the user gets is not what they - // may have expected from the combination they asked for. It's - // better to reduce user confusion by not allowing them to - // select conflicting combinations. - uint i = 0; - if (UseSerialGC) i++; - if (UseConcMarkSweepGC || UseParNewGC) i++; - if (UseParallelGC || UseParallelOldGC) i++; - if (i > 1) { - jio_fprintf(defaultStream::error_stream(), - "Conflicting collector combinations in option list; " - "please refer to the release notes for the combinations " - "allowed\n"); - status = false; - } + status = status && check_gc_consistency(); if (_has_alloc_profile) { if (UseParallelGC || UseParallelOldGC) { @@ -1451,15 +1484,15 @@ "allocation buffers\n(-XX:+UseTLAB).\n"); status = false; } else { - status &= verify_percentage(CMSIncrementalDutyCycle, + status = status && verify_percentage(CMSIncrementalDutyCycle, "CMSIncrementalDutyCycle"); - status &= verify_percentage(CMSIncrementalDutyCycleMin, + status = status && verify_percentage(CMSIncrementalDutyCycleMin, "CMSIncrementalDutyCycleMin"); - status &= verify_percentage(CMSIncrementalSafetyFactor, + status = status && verify_percentage(CMSIncrementalSafetyFactor, "CMSIncrementalSafetyFactor"); - status &= verify_percentage(CMSIncrementalOffset, + status = status && verify_percentage(CMSIncrementalOffset, "CMSIncrementalOffset"); - status &= verify_percentage(CMSExpAvgFactor, + status = status && verify_percentage(CMSExpAvgFactor, "CMSExpAvgFactor"); // If it was not set on the command line, set // CMSInitiatingOccupancyFraction to 1 so icms can initiate cycles early. @@ -2064,7 +2097,8 @@ // Enable parallel GC and adaptive generation sizing FLAG_SET_CMDLINE(bool, UseParallelGC, true); - FLAG_SET_DEFAULT(ParallelGCThreads, nof_parallel_gc_threads()); + FLAG_SET_DEFAULT(ParallelGCThreads, + Abstract_VM_Version::parallel_worker_threads()); // Encourage steady state memory management FLAG_SET_CMDLINE(uintx, ThresholdTolerance, 100); @@ -2451,15 +2485,25 @@ no_shared_spaces(); #endif // KERNEL - // Set some flags for ParallelGC if needed. - set_parallel_gc_flags(); - - // Set some flags for CMS and/or ParNew collectors, as needed. - set_cms_and_parnew_gc_flags(); - // Set flags based on ergonomics. set_ergonomics_flags(); + // Check the GC selections again. + if (!check_gc_consistency()) { + return JNI_EINVAL; + } + + if (UseParallelGC || UseParallelOldGC) { + // Set some flags for ParallelGC if needed. + set_parallel_gc_flags(); + } else if (UseConcMarkSweepGC) { + // Set some flags for CMS + set_cms_and_parnew_gc_flags(); + } else if (UseParNewGC) { + // Set some flags for ParNew + set_parnew_gc_flags(); + } + #ifdef SERIALGC assert(verify_serial_gc_flags(), "SerialGC unset"); #endif // SERIALGC @@ -2479,6 +2523,12 @@ CommandLineFlags::printSetFlags(); } +#ifdef ASSERT + if (PrintFlagsFinal) { + CommandLineFlags::printFlags(); + } +#endif + return JNI_OK; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/arguments.hpp --- a/src/share/vm/runtime/arguments.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/arguments.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -291,8 +291,6 @@ static bool _CIDynamicCompilePriority; static intx _Tier2CompileThreshold; - // GC processing - static int nof_parallel_gc_threads(); // CMS/ParNew garbage collectors static void set_parnew_gc_flags(); static void set_cms_and_parnew_gc_flags(); @@ -385,6 +383,8 @@ public: // Parses the arguments static jint parse(const JavaVMInitArgs* args); + // Check for consistency in the selection of the garbage collector. + static bool check_gc_consistency(); // Check consistecy or otherwise of VM argument settings static bool check_vm_args_consistency(); // Used by os_solaris diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/biasedLocking.cpp --- a/src/share/vm/runtime/biasedLocking.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/biasedLocking.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1,4 +1,3 @@ - /* * Copyright 2005-2007 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -37,9 +36,14 @@ } class VM_EnableBiasedLocking: public VM_Operation { + private: + bool _is_cheap_allocated; public: - VM_EnableBiasedLocking() {} - VMOp_Type type() const { return VMOp_EnableBiasedLocking; } + VM_EnableBiasedLocking(bool is_cheap_allocated) { _is_cheap_allocated = is_cheap_allocated; } + VMOp_Type type() const { return VMOp_EnableBiasedLocking; } + Mode evaluation_mode() const { return _is_cheap_allocated ? _async_safepoint : _safepoint; } + bool is_cheap_allocated() const { return _is_cheap_allocated; } + void doit() { // Iterate the system dictionary enabling biased locking for all // currently loaded classes @@ -62,8 +66,10 @@ EnableBiasedLockingTask(size_t interval_time) : PeriodicTask(interval_time) {} virtual void task() { - VM_EnableBiasedLocking op; - VMThread::execute(&op); + // Use async VM operation to avoid blocking the Watcher thread. + // VM Thread will free C heap storage. + VM_EnableBiasedLocking *op = new VM_EnableBiasedLocking(true); + VMThread::execute(op); // Reclaim our storage and disenroll ourself delete this; @@ -84,7 +90,7 @@ EnableBiasedLockingTask* task = new EnableBiasedLockingTask(BiasedLockingStartupDelay); task->enroll(); } else { - VM_EnableBiasedLocking op; + VM_EnableBiasedLocking op(false); VMThread::execute(&op); } } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/deoptimization.cpp --- a/src/share/vm/runtime/deoptimization.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/deoptimization.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -141,41 +141,53 @@ #ifdef COMPILER2 // Reallocate the non-escaping objects and restore their fields. Then // relock objects if synchronization on them was eliminated. - if (DoEscapeAnalysis && EliminateAllocations) { - GrowableArray* objects = chunk->at(0)->scope()->objects(); - bool reallocated = false; - if (objects != NULL) { - JRT_BLOCK - reallocated = realloc_objects(thread, &deoptee, objects, THREAD); - JRT_END - } - if (reallocated) { - reassign_fields(&deoptee, &map, objects); -#ifndef PRODUCT - if (TraceDeoptimization) { - ttyLocker ttyl; - tty->print_cr("REALLOC OBJECTS in thread " INTPTR_FORMAT, thread); - print_objects(objects); + if (DoEscapeAnalysis) { + if (EliminateAllocations) { + assert (chunk->at(0)->scope() != NULL,"expect only compiled java frames"); + GrowableArray* objects = chunk->at(0)->scope()->objects(); + bool reallocated = false; + if (objects != NULL) { + JRT_BLOCK + reallocated = realloc_objects(thread, &deoptee, objects, THREAD); + JRT_END } -#endif - } - for (int i = 0; i < chunk->length(); i++) { - GrowableArray* monitors = chunk->at(i)->scope()->monitors(); - if (monitors != NULL) { - relock_objects(&deoptee, &map, monitors); + if (reallocated) { + reassign_fields(&deoptee, &map, objects); #ifndef PRODUCT if (TraceDeoptimization) { ttyLocker ttyl; - tty->print_cr("RELOCK OBJECTS in thread " INTPTR_FORMAT, thread); - for (int j = 0; i < monitors->length(); i++) { - MonitorValue* mv = monitors->at(i); - if (mv->eliminated()) { - StackValue* owner = StackValue::create_stack_value(&deoptee, &map, mv->owner()); - tty->print_cr(" object <" INTPTR_FORMAT "> locked", owner->get_obj()()); + tty->print_cr("REALLOC OBJECTS in thread " INTPTR_FORMAT, thread); + print_objects(objects); + } +#endif + } + } + if (EliminateLocks) { +#ifndef PRODUCT + bool first = true; +#endif + for (int i = 0; i < chunk->length(); i++) { + compiledVFrame* cvf = chunk->at(i); + assert (cvf->scope() != NULL,"expect only compiled java frames"); + GrowableArray* monitors = cvf->monitors(); + if (monitors->is_nonempty()) { + relock_objects(monitors, thread); +#ifndef PRODUCT + if (TraceDeoptimization) { + ttyLocker ttyl; + for (int j = 0; j < monitors->length(); j++) { + MonitorInfo* mi = monitors->at(j); + if (mi->eliminated()) { + if (first) { + first = false; + tty->print_cr("RELOCK OBJECTS in thread " INTPTR_FORMAT, thread); + } + tty->print_cr(" object <" INTPTR_FORMAT "> locked", mi->owner()); + } } } +#endif } -#endif } } } @@ -656,6 +668,7 @@ void do_field(fieldDescriptor* fd) { + intptr_t val; StackValue* value = StackValue::create_stack_value(_fr, _reg_map, _sv->field_at(i())); int offset = fd->offset(); @@ -669,24 +682,36 @@ assert(value->type() == T_INT, "Agreement."); StackValue* low = StackValue::create_stack_value(_fr, _reg_map, _sv->field_at(++_i)); +#ifdef _LP64 + jlong res = (jlong)low->get_int(); +#else +#ifdef SPARC + // For SPARC we have to swap high and low words. + jlong res = jlong_from((jint)low->get_int(), (jint)value->get_int()); +#else jlong res = jlong_from((jint)value->get_int(), (jint)low->get_int()); +#endif //SPARC +#endif _obj->long_field_put(offset, res); break; } - + // Have to cast to INT (32 bits) pointer to avoid little/big-endian problem. case T_INT: case T_FLOAT: // 4 bytes. assert(value->type() == T_INT, "Agreement."); - _obj->int_field_put(offset, (jint)value->get_int()); + val = value->get_int(); + _obj->int_field_put(offset, (jint)*((jint*)&val)); break; case T_SHORT: case T_CHAR: // 2 bytes assert(value->type() == T_INT, "Agreement."); - _obj->short_field_put(offset, (jshort)value->get_int()); + val = value->get_int(); + _obj->short_field_put(offset, (jshort)*((jint*)&val)); break; - case T_BOOLEAN: // 1 byte + case T_BOOLEAN: case T_BYTE: // 1 byte assert(value->type() == T_INT, "Agreement."); - _obj->bool_field_put(offset, (jboolean)value->get_int()); + val = value->get_int(); + _obj->bool_field_put(offset, (jboolean)*((jint*)&val)); break; default: @@ -698,25 +723,49 @@ // restore elements of an eliminated type array void Deoptimization::reassign_type_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, typeArrayOop obj, BasicType type) { - StackValue* low; - jlong lval; int index = 0; + intptr_t val; for (int i = 0; i < sv->field_size(); i++) { StackValue* value = StackValue::create_stack_value(fr, reg_map, sv->field_at(i)); switch(type) { - case T_BOOLEAN: obj->bool_at_put (index, (jboolean) value->get_int()); break; - case T_BYTE: obj->byte_at_put (index, (jbyte) value->get_int()); break; - case T_CHAR: obj->char_at_put (index, (jchar) value->get_int()); break; - case T_SHORT: obj->short_at_put(index, (jshort) value->get_int()); break; - case T_INT: obj->int_at_put (index, (jint) value->get_int()); break; - case T_FLOAT: obj->float_at_put(index, (jfloat) value->get_int()); break; - case T_LONG: - case T_DOUBLE: - low = StackValue::create_stack_value(fr, reg_map, sv->field_at(++i)); - lval = jlong_from((jint)value->get_int(), (jint)low->get_int()); - sv->value()->long_field_put(index, lval); - break; + case T_LONG: case T_DOUBLE: { + assert(value->type() == T_INT, "Agreement."); + StackValue* low = + StackValue::create_stack_value(fr, reg_map, sv->field_at(++i)); +#ifdef _LP64 + jlong res = (jlong)low->get_int(); +#else +#ifdef SPARC + // For SPARC we have to swap high and low words. + jlong res = jlong_from((jint)low->get_int(), (jint)value->get_int()); +#else + jlong res = jlong_from((jint)value->get_int(), (jint)low->get_int()); +#endif //SPARC +#endif + obj->long_at_put(index, res); + break; + } + + // Have to cast to INT (32 bits) pointer to avoid little/big-endian problem. + case T_INT: case T_FLOAT: // 4 bytes. + assert(value->type() == T_INT, "Agreement."); + val = value->get_int(); + obj->int_at_put(index, (jint)*((jint*)&val)); + break; + + case T_SHORT: case T_CHAR: // 2 bytes + assert(value->type() == T_INT, "Agreement."); + val = value->get_int(); + obj->short_at_put(index, (jshort)*((jint*)&val)); + break; + + case T_BOOLEAN: case T_BYTE: // 1 byte + assert(value->type() == T_INT, "Agreement."); + val = value->get_int(); + obj->bool_at_put(index, (jboolean)*((jint*)&val)); + break; + default: ShouldNotReachHere(); } @@ -758,18 +807,27 @@ // relock objects for which synchronization was eliminated -void Deoptimization::relock_objects(frame* fr, RegisterMap* reg_map, GrowableArray* monitors) { +void Deoptimization::relock_objects(GrowableArray* monitors, JavaThread* thread) { for (int i = 0; i < monitors->length(); i++) { - MonitorValue* mv = monitors->at(i); - StackValue* owner = StackValue::create_stack_value(fr, reg_map, mv->owner()); - if (mv->eliminated()) { - Handle obj = owner->get_obj(); - assert(obj.not_null(), "reallocation was missed"); - BasicLock* lock = StackValue::resolve_monitor_lock(fr, mv->basic_lock()); - lock->set_displaced_header(obj->mark()); - obj->set_mark((markOop) lock); + MonitorInfo* mon_info = monitors->at(i); + if (mon_info->eliminated()) { + assert(mon_info->owner() != NULL, "reallocation was missed"); + Handle obj = Handle(mon_info->owner()); + markOop mark = obj->mark(); + if (UseBiasedLocking && mark->has_bias_pattern()) { + // New allocated objects may have the mark set to anonymously biased. + // Also the deoptimized method may called methods with synchronization + // where the thread-local object is bias locked to the current thread. + assert(mark->is_biased_anonymously() || + mark->biased_locker() == thread, "should be locked to current thread"); + // Reset mark word to unbiased prototype. + markOop unbiased_prototype = markOopDesc::prototype()->set_age(mark->age()); + obj->set_mark(unbiased_prototype); + } + BasicLock* lock = mon_info->lock(); + ObjectSynchronizer::slow_enter(obj, lock, thread); } - assert(owner->get_obj()->is_locked(), "object must be locked now"); + assert(mon_info->owner()->is_locked(), "object must be locked now"); } } @@ -875,7 +933,7 @@ GrowableArray* monitors = cvf->monitors(); for (int i = 0; i < monitors->length(); i++) { MonitorInfo* mon_info = monitors->at(i); - if (mon_info->owner() != NULL) { + if (mon_info->owner() != NULL && !mon_info->eliminated()) { objects_to_revoke->append(Handle(mon_info->owner())); } } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/deoptimization.hpp --- a/src/share/vm/runtime/deoptimization.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/deoptimization.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -105,7 +105,7 @@ static void reassign_type_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, typeArrayOop obj, BasicType type); static void reassign_object_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, objArrayOop obj); static void reassign_fields(frame* fr, RegisterMap* reg_map, GrowableArray* objects); - static void relock_objects(frame* fr, RegisterMap* reg_map, GrowableArray* monitors); + static void relock_objects(GrowableArray* monitors, JavaThread* thread); NOT_PRODUCT(static void print_objects(GrowableArray* objects);) #endif // COMPILER2 diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/globals.cpp --- a/src/share/vm/runtime/globals.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/globals.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -205,6 +205,18 @@ return (f->origin == DEFAULT); } +bool CommandLineFlagsEx::is_ergo(CommandLineFlag flag) { + assert((size_t)flag < Flag::numFlags, "bad command line flag index"); + Flag* f = &Flag::flags[flag]; + return (f->origin == ERGONOMIC); +} + +bool CommandLineFlagsEx::is_cmdline(CommandLineFlag flag) { + assert((size_t)flag < Flag::numFlags, "bad command line flag index"); + Flag* f = &Flag::flags[flag]; + return (f->origin == COMMAND_LINE); +} + bool CommandLineFlags::wasSetOnCmdline(const char* name, bool* value) { Flag* result = Flag::find_flag((char*)name, strlen(name)); if (result == NULL) return false; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/globals.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -344,12 +344,6 @@ product(bool, ForceTimeHighResolution, false, \ "Using high time resolution(For Win32 only)") \ \ - product(bool, CacheTimeMillis, false, \ - "Cache os::javaTimeMillis with CacheTimeMillisGranularity") \ - \ - diagnostic(uintx, CacheTimeMillisGranularity, 50, \ - "Granularity for CacheTimeMillis") \ - \ develop(bool, TraceItables, false, \ "Trace initialization and use of itables") \ \ @@ -586,7 +580,7 @@ develop(bool, ZapJNIHandleArea, trueInDebug, \ "Zap freed JNI handle space with 0xFEFEFEFE") \ \ - develop(bool, ZapUnusedHeapArea, trueInDebug, \ + develop(bool, ZapUnusedHeapArea, false, \ "Zap unused heap space with 0xBAADBABE") \ \ develop(bool, PrintVMMessages, true, \ @@ -949,6 +943,12 @@ product(bool, UseXmmRegToRegMoveAll, false, \ "Copy all XMM register bits when moving value between registers") \ \ + product(bool, UseXmmI2D, false, \ + "Use SSE2 CVTDQ2PD instruction to convert Integer to Double") \ + \ + product(bool, UseXmmI2F, false, \ + "Use SSE2 CVTDQ2PS instruction to convert Integer to Float") \ + \ product(intx, FieldsAllocationStyle, 1, \ "0 - type based with oops first, 1 - with oops last") \ \ @@ -1794,6 +1794,9 @@ "number of times a GC thread (minus the coordinator) " \ "will sleep while yielding before giving up and resuming GC") \ \ + notproduct(bool, PrintFlagsFinal, false, \ + "Print all command line flags after argument processing") \ + \ /* gc tracing */ \ manageable(bool, PrintGC, false, \ "Print message at garbage collect") \ diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/globals_extension.hpp --- a/src/share/vm/runtime/globals_extension.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/globals_extension.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -154,6 +154,8 @@ } CommandLineFlagWithType; #define FLAG_IS_DEFAULT(name) (CommandLineFlagsEx::is_default(FLAG_MEMBER(name))) +#define FLAG_IS_ERGO(name) (CommandLineFlagsEx::is_ergo(FLAG_MEMBER(name))) +#define FLAG_IS_CMDLINE(name) (CommandLineFlagsEx::is_cmdline(FLAG_MEMBER(name))) #define FLAG_SET_DEFAULT(name, value) ((name) = (value)) @@ -171,4 +173,6 @@ static void ccstrAtPut(CommandLineFlagWithType flag, ccstr value, FlagValueOrigin origin); static bool is_default(CommandLineFlag flag); + static bool is_ergo(CommandLineFlag flag); + static bool is_cmdline(CommandLineFlag flag); }; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/java.cpp --- a/src/share/vm/runtime/java.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/java.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -390,11 +390,6 @@ StatSampler::disengage(); StatSampler::destroy(); - // shut down the TimeMillisUpdateTask - if (CacheTimeMillis) { - TimeMillisUpdateTask::disengage(); - } - #ifndef SERIALGC // stop CMS threads if (UseConcMarkSweepGC) { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/mutex.cpp --- a/src/share/vm/runtime/mutex.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/mutex.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1119,10 +1119,15 @@ assert ((UNS(_owner)|UNS(_LockWord.FullWord)|UNS(_EntryList)|UNS(_WaitSet)|UNS(_OnDeck)) == 0, "") ; } -void Monitor::ClearMonitor (Monitor * m) { +void Monitor::ClearMonitor (Monitor * m, const char *name) { m->_owner = NULL ; m->_snuck = false ; - m->_name = "UNKNOWN" ; + if (name == NULL) { + strcpy(m->_name, "UNKNOWN") ; + } else { + strncpy(m->_name, name, MONITOR_NAME_LEN - 1); + m->_name[MONITOR_NAME_LEN - 1] = '\0'; + } m->_LockWord.FullWord = 0 ; m->_EntryList = NULL ; m->_OnDeck = NULL ; @@ -1133,7 +1138,7 @@ Monitor::Monitor() { ClearMonitor(this); } Monitor::Monitor (int Rank, const char * name, bool allow_vm_block) { - ClearMonitor (this) ; + ClearMonitor (this, name) ; #ifdef ASSERT _allow_vm_block = allow_vm_block; _rank = Rank ; @@ -1145,7 +1150,7 @@ } Mutex::Mutex (int Rank, const char * name, bool allow_vm_block) { - ClearMonitor ((Monitor *) this) ; + ClearMonitor ((Monitor *) this, name) ; #ifdef ASSERT _allow_vm_block = allow_vm_block; _rank = Rank ; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/mutex.hpp --- a/src/share/vm/runtime/mutex.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/mutex.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -82,6 +82,9 @@ // *in that order*. If their implementations change such that these // assumptions are violated, a whole lot of code will break. +// The default length of monitor name is choosen to be 64 to avoid false sharing. +static const int MONITOR_NAME_LEN = 64; + class Monitor : public CHeapObj { public: @@ -126,9 +129,8 @@ volatile intptr_t _WaitLock [1] ; // Protects _WaitSet ParkEvent * volatile _WaitSet ; // LL of ParkEvents volatile bool _snuck; // Used for sneaky locking (evil). - const char * _name; // Name of mutex int NotifyCount ; // diagnostic assist - double pad [8] ; // avoid false sharing + char _name[MONITOR_NAME_LEN]; // Name of mutex // Debugging fields for naming, deadlock detection, etc. (some only used in debug mode) #ifndef PRODUCT @@ -170,7 +172,7 @@ int ILocked () ; protected: - static void ClearMonitor (Monitor * m) ; + static void ClearMonitor (Monitor * m, const char* name = NULL) ; Monitor() ; public: diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/mutexLocker.cpp --- a/src/share/vm/runtime/mutexLocker.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/mutexLocker.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -188,10 +188,6 @@ def(Safepoint_lock , Monitor, safepoint, true ); // locks SnippetCache_lock/Threads_lock - if (!UseMembar) { - def(SerializePage_lock , Monitor, leaf, true ); - } - def(Threads_lock , Monitor, barrier, true ); def(VMOperationQueue_lock , Monitor, nonleaf, true ); // VM_thread allowed to block on these diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/mutexLocker.hpp --- a/src/share/vm/runtime/mutexLocker.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/mutexLocker.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -52,7 +52,6 @@ extern Monitor* VMOperationQueue_lock; // a lock on queue of vm_operations waiting to execute extern Monitor* VMOperationRequest_lock; // a lock on Threads waiting for a vm_operation to terminate extern Monitor* Safepoint_lock; // a lock used by the safepoint abstraction -extern Monitor* SerializePage_lock; // a lock used when VMThread changing serialize memory page permission during safepoint extern Monitor* Threads_lock; // a lock on the Threads table of active Java threads // (also used by Safepoints too to block threads creation/destruction) extern Monitor* CGC_lock; // used for coordination between diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/os.cpp --- a/src/share/vm/runtime/os.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/os.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -33,9 +33,6 @@ uintptr_t os::_serialize_page_mask = 0; long os::_rand_seed = 1; int os::_processor_count = 0; -volatile jlong os::_global_time = 0; -volatile int os::_global_time_lock = 0; -bool os::_use_global_time = false; size_t os::_page_sizes[os::page_sizes_max]; #ifndef PRODUCT @@ -44,74 +41,6 @@ int os::num_frees = 0; // # of calls to free #endif -// Atomic read of a jlong is assured by a seqlock; see update_global_time() -jlong os::read_global_time() { -#ifdef _LP64 - return _global_time; -#else - volatile int lock; - volatile jlong current_time; - int ctr = 0; - - for (;;) { - lock = _global_time_lock; - - // spin while locked - while ((lock & 0x1) != 0) { - ++ctr; - if ((ctr & 0xFFF) == 0) { - // Guarantee writer progress. Can't use yield; yield is advisory - // and has almost no effect on some platforms. Don't need a state - // transition - the park call will return promptly. - assert(Thread::current() != NULL, "TLS not initialized"); - assert(Thread::current()->_ParkEvent != NULL, "sync not initialized"); - Thread::current()->_ParkEvent->park(1); - } - lock = _global_time_lock; - } - - OrderAccess::loadload(); - current_time = _global_time; - OrderAccess::loadload(); - - // ratify seqlock value - if (lock == _global_time_lock) { - return current_time; - } - } -#endif -} - -// -// NOTE - Assumes only one writer thread! -// -// We use a seqlock to guarantee that jlong _global_time is updated -// atomically on 32-bit platforms. A locked value is indicated by -// the lock variable LSB == 1. Readers will initially read the lock -// value, spinning until the LSB == 0. They then speculatively read -// the global time value, then re-read the lock value to ensure that -// it hasn't changed. If the lock value has changed, the entire read -// sequence is retried. -// -// Writers simply set the LSB = 1 (i.e. increment the variable), -// update the global time, then release the lock and bump the version -// number (i.e. increment the variable again.) In this case we don't -// even need a CAS since we ensure there's only one writer. -// -void os::update_global_time() { -#ifdef _LP64 - _global_time = timeofday(); -#else - assert((_global_time_lock & 0x1) == 0, "multiple writers?"); - jlong current_time = timeofday(); - _global_time_lock++; // lock - OrderAccess::storestore(); - _global_time = current_time; - OrderAccess::storestore(); - _global_time_lock++; // unlock -#endif -} - // Fill in buffer with current local time as an ISO-8601 string. // E.g., yyyy-mm-ddThh:mm:ss-zzzz. // Returns buffer, or NULL if it failed. @@ -138,7 +67,7 @@ return NULL; } // Get the current time - jlong milliseconds_since_19700101 = timeofday(); + jlong milliseconds_since_19700101 = javaTimeMillis(); const int milliseconds_per_microsecond = 1000; const time_t seconds_since_19700101 = milliseconds_since_19700101 / milliseconds_per_microsecond; @@ -956,7 +885,6 @@ return true; } - void os::set_memory_serialize_page(address page) { int count = log2_intptr(sizeof(class JavaThread)) - log2_intptr(64); _mem_serialize_page = (volatile int32_t *)page; @@ -967,6 +895,8 @@ set_serialize_page_mask((uintptr_t)(vm_page_size() - sizeof(int32_t))); } +static volatile intptr_t SerializePageLock = 0; + // This method is called from signal handler when SIGSEGV occurs while the current // thread tries to store to the "read-only" memory serialize page during state // transition. @@ -974,15 +904,14 @@ if (TraceSafepoint) { tty->print_cr("Block until the serialize page permission restored"); } - // When VMThread is holding the SerializePage_lock during modifying the + // When VMThread is holding the SerializePageLock during modifying the // access permission of the memory serialize page, the following call // will block until the permission of that page is restored to rw. // Generally, it is unsafe to manipulate locks in signal handlers, but in // this case, it's OK as the signal is synchronous and we know precisely when - // it can occur. SerializePage_lock is a transiently-held leaf lock, so - // lock_without_safepoint_check should be safe. - SerializePage_lock->lock_without_safepoint_check(); - SerializePage_lock->unlock(); + // it can occur. + Thread::muxAcquire(&SerializePageLock, "set_memory_serialize_page"); + Thread::muxRelease(&SerializePageLock); } // Serialize all thread state variables @@ -990,14 +919,12 @@ // On some platforms such as Solaris & Linux, the time duration of the page // permission restoration is observed to be much longer than expected due to // scheduler starvation problem etc. To avoid the long synchronization - // time and expensive page trap spinning, 'SerializePage_lock' is used to block - // the mutator thread if such case is encountered. Since this method is always - // called by VMThread during safepoint, lock_without_safepoint_check is used - // instead. See bug 6546278. - SerializePage_lock->lock_without_safepoint_check(); + // time and expensive page trap spinning, 'SerializePageLock' is used to block + // the mutator thread if such case is encountered. See bug 6546278 for details. + Thread::muxAcquire(&SerializePageLock, "serialize_thread_states"); os::protect_memory( (char *)os::get_memory_serialize_page(), os::vm_page_size() ); os::unguard_memory( (char *)os::get_memory_serialize_page(), os::vm_page_size() ); - SerializePage_lock->unlock(); + Thread::muxRelease(&SerializePageLock); } // Returns true if the current stack pointer is above the stack shadow diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/os.hpp --- a/src/share/vm/runtime/os.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/os.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -66,9 +66,6 @@ static address _polling_page; static volatile int32_t * _mem_serialize_page; static uintptr_t _serialize_page_mask; - static volatile jlong _global_time; - static volatile int _global_time_lock; - static bool _use_global_time; static size_t _page_sizes[page_sizes_max]; static void init_page_sizes(size_t default_page_size) { @@ -88,11 +85,6 @@ static bool getenv(const char* name, char* buffer, int len); static bool have_special_privileges(); - static jlong timeofday(); - static void enable_global_time() { _use_global_time = true; } - static void disable_global_time() { _use_global_time = false; } - static jlong read_global_time(); - static void update_global_time(); static jlong javaTimeMillis(); static jlong javaTimeNanos(); static void javaTimeNanos_info(jvmtiTimerInfo *info_ptr); @@ -236,6 +228,7 @@ static bool large_page_init(); static size_t large_page_size(); static bool can_commit_large_page_memory(); + static bool can_execute_large_page_memory(); // OS interface to polling page static address get_polling_page() { return _polling_page; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/reflection.cpp --- a/src/share/vm/runtime/reflection.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/reflection.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1548,10 +1548,11 @@ } instanceKlassHandle klass(THREAD, java_lang_Class::as_klassOop(mirror)); - if (!klass->methods()->is_within_bounds(slot)) { + methodOop m = klass->method_with_idnum(slot); + if (m == NULL) { THROW_MSG_0(vmSymbols::java_lang_InternalError(), "invoke"); } - methodHandle method(THREAD, methodOop(klass->methods()->obj_at(slot))); + methodHandle method(THREAD, m); return invoke(klass, method, receiver, override, ptypes, rtype, args, true, THREAD); } @@ -1564,10 +1565,11 @@ objArrayHandle ptypes(THREAD, objArrayOop(java_lang_reflect_Constructor::parameter_types(constructor_mirror))); instanceKlassHandle klass(THREAD, java_lang_Class::as_klassOop(mirror)); - if (!klass->methods()->is_within_bounds(slot)) { + methodOop m = klass->method_with_idnum(slot); + if (m == NULL) { THROW_MSG_0(vmSymbols::java_lang_InternalError(), "invoke"); } - methodHandle method(THREAD, methodOop(klass->methods()->obj_at(slot))); + methodHandle method(THREAD, m); assert(method->name() == vmSymbols::object_initializer_name(), "invalid constructor"); // Make sure klass gets initialize diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/sharedRuntime.cpp --- a/src/share/vm/runtime/sharedRuntime.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/sharedRuntime.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -467,6 +467,11 @@ throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_AbstractMethodError()); JRT_END +JRT_ENTRY(void, SharedRuntime::throw_IncompatibleClassChangeError(JavaThread* thread)) + // These errors occur only at call sites + throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_IncompatibleClassChangeError(), "vtable stub"); +JRT_END + JRT_ENTRY(void, SharedRuntime::throw_ArithmeticException(JavaThread* thread)) throw_and_post_jvmti_exception(thread, vmSymbols::java_lang_ArithmeticException(), "/ by zero"); JRT_END @@ -1481,11 +1486,9 @@ const char* desc = " cannot be cast to "; size_t msglen = strlen(objName) + strlen(desc) + strlen(targetKlassName) + 1; - char* message = NEW_C_HEAP_ARRAY(char, msglen); + char* message = NEW_RESOURCE_ARRAY(char, msglen); if (NULL == message) { - // out of memory - can't use a detailed message. Since caller is - // using a resource mark to free memory, returning this should be - // safe (caller won't explicitly delete it). + // Shouldn't happen, but don't cause even more problems if it does message = const_cast(objName); } else { jio_snprintf(message, msglen, "%s%s%s", objName, desc, targetKlassName); @@ -1834,7 +1837,25 @@ regs); B = BufferBlob::create(AdapterHandlerEntry::name, &buffer); - if (B == NULL) return -2; // Out of CodeCache space + if (B == NULL) { + // CodeCache is full, disable compilation + // Ought to log this but compile log is only per compile thread + // and we're some non descript Java thread. + UseInterpreter = true; + if (UseCompiler || AlwaysCompileLoopMethods ) { +#ifndef PRODUCT + warning("CodeCache is full. Compiler has been disabled"); + if (CompileTheWorld || ExitOnFullCodeCache) { + before_exit(JavaThread::current()); + exit_globals(); // will delete tty + vm_direct_exit(CompileTheWorld ? 0 : 1); + } +#endif + UseCompiler = false; + AlwaysCompileLoopMethods = false; + } + return 0; // Out of CodeCache space (_handlers[0] == NULL) + } entry->relocate(B->instructions_begin()); #ifndef PRODUCT // debugging suppport diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/sharedRuntime.hpp --- a/src/share/vm/runtime/sharedRuntime.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/sharedRuntime.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -104,6 +104,7 @@ STACK_OVERFLOW }; static void throw_AbstractMethodError(JavaThread* thread); + static void throw_IncompatibleClassChangeError(JavaThread* thread); static void throw_ArithmeticException(JavaThread* thread); static void throw_NullPointerException(JavaThread* thread); static void throw_NullPointerException_at_call(JavaThread* thread); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/stubRoutines.cpp --- a/src/share/vm/runtime/stubRoutines.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/stubRoutines.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -40,6 +40,7 @@ address StubRoutines::_catch_exception_entry = NULL; address StubRoutines::_forward_exception_entry = NULL; address StubRoutines::_throw_AbstractMethodError_entry = NULL; +address StubRoutines::_throw_IncompatibleClassChangeError_entry = NULL; address StubRoutines::_throw_ArithmeticException_entry = NULL; address StubRoutines::_throw_NullPointerException_entry = NULL; address StubRoutines::_throw_NullPointerException_at_call_entry = NULL; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/stubRoutines.hpp --- a/src/share/vm/runtime/stubRoutines.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/stubRoutines.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -84,6 +84,7 @@ static address _forward_exception_entry; static address _catch_exception_entry; static address _throw_AbstractMethodError_entry; + static address _throw_IncompatibleClassChangeError_entry; static address _throw_ArithmeticException_entry; static address _throw_NullPointerException_entry; static address _throw_NullPointerException_at_call_entry; @@ -184,6 +185,7 @@ static address forward_exception_entry() { return _forward_exception_entry; } // Implicit exceptions static address throw_AbstractMethodError_entry() { return _throw_AbstractMethodError_entry; } + static address throw_IncompatibleClassChangeError_entry(){ return _throw_IncompatibleClassChangeError_entry; } static address throw_ArithmeticException_entry() { return _throw_ArithmeticException_entry; } static address throw_NullPointerException_entry() { return _throw_NullPointerException_entry; } static address throw_NullPointerException_at_call_entry(){ return _throw_NullPointerException_at_call_entry; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/task.cpp --- a/src/share/vm/runtime/task.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/task.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -107,25 +107,3 @@ _tasks[index] = _tasks[index+1]; } } - -TimeMillisUpdateTask* TimeMillisUpdateTask::_task = NULL; - -void TimeMillisUpdateTask::task() { - os::update_global_time(); -} - -void TimeMillisUpdateTask::engage() { - assert(_task == NULL, "init twice?"); - os::update_global_time(); // initial update - os::enable_global_time(); - _task = new TimeMillisUpdateTask(CacheTimeMillisGranularity); - _task->enroll(); -} - -void TimeMillisUpdateTask::disengage() { - assert(_task != NULL, "uninit twice?"); - os::disable_global_time(); - _task->disenroll(); - delete _task; - _task = NULL; -} diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/task.hpp --- a/src/share/vm/runtime/task.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/task.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -113,13 +113,3 @@ // The task to perform at each period virtual void task() = 0; }; - -class TimeMillisUpdateTask : public PeriodicTask { - private: - static TimeMillisUpdateTask* _task; - public: - TimeMillisUpdateTask(int interval) : PeriodicTask(interval) {} - void task(); - static void engage(); - static void disengage(); -}; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/thread.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -1317,10 +1317,6 @@ ThreadSafepointState::destroy(this); if (_thread_profiler != NULL) delete _thread_profiler; if (_thread_stat != NULL) delete _thread_stat; - - if (jvmti_thread_state() != NULL) { - JvmtiExport::cleanup_thread(this); - } } @@ -1571,6 +1567,10 @@ tlab().make_parsable(true); // retire TLAB } + if (jvmti_thread_state() != NULL) { + JvmtiExport::cleanup_thread(this); + } + // Remove from list of active threads list, and notify VM thread if we are the last non-daemon thread Threads::remove(this); } @@ -2925,6 +2925,25 @@ warning("java.lang.String not initialized"); } + if (AggressiveOpts) { + // Forcibly initialize java/util/HashMap and mutate the private + // static final "frontCacheEnabled" field before we start creating instances +#ifdef ASSERT + klassOop tmp_k = SystemDictionary::find(vmSymbolHandles::java_util_HashMap(), Handle(), Handle(), CHECK_0); + assert(tmp_k == NULL, "java/util/HashMap should not be loaded yet"); +#endif + klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_util_HashMap(), Handle(), Handle(), CHECK_0); + KlassHandle k = KlassHandle(THREAD, k_o); + guarantee(k.not_null(), "Must find java/util/HashMap"); + instanceKlassHandle ik = instanceKlassHandle(THREAD, k()); + ik->initialize(CHECK_0); + fieldDescriptor fd; + // Possible we might not find this field; if so, don't break + if (ik->find_local_field(vmSymbols::frontCacheEnabled_name(), vmSymbols::bool_signature(), &fd)) { + k()->bool_field_put(fd.offset(), true); + } + } + // Initialize java_lang.System (needed before creating the thread) if (InitializeJavaLangSystem) { initialize_class(vmSymbolHandles::java_lang_System(), CHECK_0); @@ -3066,7 +3085,6 @@ if (MemProfiling) MemProfiler::engage(); StatSampler::engage(); if (CheckJNICalls) JniPeriodicChecker::engage(); - if (CacheTimeMillis) TimeMillisUpdateTask::engage(); BiasedLocking::init(); diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/vframe.cpp --- a/src/share/vm/runtime/vframe.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/vframe.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -206,7 +206,7 @@ for (BasicObjectLock* current = (fr().previous_monitor_in_interpreter_frame(fr().interpreter_frame_monitor_begin())); current >= fr().interpreter_frame_monitor_end(); current = fr().previous_monitor_in_interpreter_frame(current)) { - result->push(new MonitorInfo(current->obj(), current->lock())); + result->push(new MonitorInfo(current->obj(), current->lock(), false)); } return result; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/vframe.hpp --- a/src/share/vm/runtime/vframe.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/vframe.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -230,15 +230,18 @@ private: oop _owner; // the object owning the monitor BasicLock* _lock; + bool _eliminated; public: // Constructor - MonitorInfo(oop owner, BasicLock* lock) { + MonitorInfo(oop owner, BasicLock* lock, bool eliminated) { _owner = owner; _lock = lock; + _eliminated = eliminated; } // Accessors oop owner() const { return _owner; } BasicLock* lock() const { return _lock; } + bool eliminated() const { return _eliminated; } }; class vframeStreamCommon : StackObj { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/vframe_hp.cpp --- a/src/share/vm/runtime/vframe_hp.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/vframe_hp.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -190,7 +190,7 @@ // Casting away const frame& fr = (frame&) _fr; MonitorInfo* info = new MonitorInfo(fr.compiled_synchronized_native_monitor_owner(nm), - fr.compiled_synchronized_native_monitor(nm)); + fr.compiled_synchronized_native_monitor(nm), false); monitors->push(info); return monitors; } @@ -202,7 +202,7 @@ for (int index = 0; index < monitors->length(); index++) { MonitorValue* mv = monitors->at(index); StackValue *owner_sv = create_stack_value(mv->owner()); // it is an oop - result->push(new MonitorInfo(owner_sv->get_obj()(), resolve_monitor_lock(mv->basic_lock()))); + result->push(new MonitorInfo(owner_sv->get_obj()(), resolve_monitor_lock(mv->basic_lock()), mv->eliminated())); } return result; } diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/vm_version.cpp --- a/src/share/vm/runtime/vm_version.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/vm_version.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -52,6 +52,8 @@ int Abstract_VM_Version::_vm_minor_version = 0; int Abstract_VM_Version::_vm_build_number = 0; bool Abstract_VM_Version::_initialized = false; +int Abstract_VM_Version::_parallel_worker_threads = 0; +bool Abstract_VM_Version::_parallel_worker_threads_initialized = false; void Abstract_VM_Version::initialize() { if (_initialized) { @@ -210,3 +212,43 @@ } #endif } + +unsigned int Abstract_VM_Version::nof_parallel_worker_threads( + unsigned int num, + unsigned int den, + unsigned int switch_pt) { + if (FLAG_IS_DEFAULT(ParallelGCThreads)) { + assert(ParallelGCThreads == 0, "Default ParallelGCThreads is not 0"); + // For very large machines, there are diminishing returns + // for large numbers of worker threads. Instead of + // hogging the whole system, use a fraction of the workers for every + // processor after the first 8. For example, on a 72 cpu machine + // and a chosen fraction of 5/8 + // use 8 + (72 - 8) * (5/8) == 48 worker threads. + unsigned int ncpus = (unsigned int) os::active_processor_count(); + return (ncpus <= switch_pt) ? + ncpus : + (switch_pt + ((ncpus - switch_pt) * num) / den); + } else { + return ParallelGCThreads; + } +} + +unsigned int Abstract_VM_Version::calc_parallel_worker_threads() { + return nof_parallel_worker_threads(5, 8, 8); +} + + +// Does not set the _initialized flag since it is +// a global flag. +unsigned int Abstract_VM_Version::parallel_worker_threads() { + if (!_parallel_worker_threads_initialized) { + if (FLAG_IS_DEFAULT(ParallelGCThreads)) { + _parallel_worker_threads = VM_Version::calc_parallel_worker_threads(); + } else { + _parallel_worker_threads = ParallelGCThreads; + } + _parallel_worker_threads_initialized = true; + } + return _parallel_worker_threads; +} diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/runtime/vm_version.hpp --- a/src/share/vm/runtime/vm_version.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/runtime/vm_version.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -36,6 +36,12 @@ static int _vm_minor_version; static int _vm_build_number; static bool _initialized; + static int _parallel_worker_threads; + static bool _parallel_worker_threads_initialized; + + static unsigned int nof_parallel_worker_threads(unsigned int num, + unsigned int dem, + unsigned int switch_pt); public: static void initialize(); @@ -69,4 +75,13 @@ // subclasses should define new versions to hide this one as needed. Note // that the O/S may support more sizes, but at most this many are used. static uint page_size_count() { return 2; } + + // Returns the number of parallel threads to be used for VM + // work. If that number has not been calculated, do so and + // save it. Returns ParallelGCThreads if it is set on the + // command line. + static unsigned int parallel_worker_threads(); + // Calculates and returns the number of parallel threads. May + // be VM version specific. + static unsigned int calc_parallel_worker_threads(); }; diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/services/heapDumper.cpp --- a/src/share/vm/services/heapDumper.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/services/heapDumper.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -997,7 +997,7 @@ } // If the byte ordering is big endian then we can copy most types directly - int length_in_bytes = array->length() * type2aelembytes[type]; + int length_in_bytes = array->length() * type2aelembytes(type); assert(length_in_bytes > 0, "nothing to copy"); switch (type) { diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/utilities/globalDefinitions.cpp --- a/src/share/vm/utilities/globalDefinitions.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/utilities/globalDefinitions.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -214,7 +214,7 @@ }; -int type2aelembytes[T_CONFLICT+1] = { +int _type2aelembytes[T_CONFLICT+1] = { 0, // 0 0, // 1 0, // 2 @@ -230,10 +230,16 @@ T_OBJECT_aelem_bytes, // T_OBJECT = 12, T_ARRAY_aelem_bytes, // T_ARRAY = 13, 0, // T_VOID = 14, - T_INT_aelem_bytes, // T_ADDRESS = 15, + T_OBJECT_aelem_bytes, // T_ADDRESS = 15, 0 // T_CONFLICT = 16, }; +#ifdef ASSERT +int type2aelembytes(BasicType t, bool allow_address) { + assert(allow_address || t != T_ADDRESS, " "); + return _type2aelembytes[t]; +} +#endif // Support for 64-bit integer arithmetic diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/utilities/globalDefinitions.hpp --- a/src/share/vm/utilities/globalDefinitions.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/utilities/globalDefinitions.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -392,6 +392,10 @@ T_ILLEGAL = 99 }; +inline bool is_java_primitive(BasicType t) { + return T_BOOLEAN <= t && t <= T_LONG; +} + // Convert a char from a classfile signature to a BasicType inline BasicType char2type(char c) { switch( c ) { @@ -464,7 +468,12 @@ T_VOID_aelem_bytes = 0 }; -extern int type2aelembytes[T_CONFLICT+1]; // maps a BasicType to nof bytes used by its array element +extern int _type2aelembytes[T_CONFLICT+1]; // maps a BasicType to nof bytes used by its array element +#ifdef ASSERT +extern int type2aelembytes(BasicType t, bool allow_address = false); // asserts +#else +inline int type2aelembytes(BasicType t) { return _type2aelembytes[t]; } +#endif // JavaValue serves as a container for arbitrary Java values. diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/utilities/globalDefinitions_sparcWorks.hpp --- a/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp Tue Apr 22 15:36:18 2008 -0700 @@ -37,23 +37,45 @@ # include # include # include +#ifdef SOLARIS # include +#endif # include +#ifdef LINUX +#ifndef FP_PZERO + // Linux doesn't have positive/negative zero + #define FP_PZERO FP_ZERO +#endif +#ifndef fpclass + #define fpclass fpclassify +#endif +#endif # include # include # include # include +#ifdef SOLARIS # include +#endif # include # include +#ifdef SOLARIS # include # include # include # include # include +#endif # ifdef SOLARIS_MUTATOR_LIBTHREAD # include # endif +#ifdef LINUX +# include +# include +# include +# include +#endif + // 4810578: varargs unsafe on 32-bit integer/64-bit pointer architectures // When __cplusplus is defined, NULL is defined as 0 (32-bit constant) in @@ -68,6 +90,11 @@ // pointer when it extracts the argument, then we have a problem. // // Solution: For 64-bit architectures, redefine NULL as 64-bit constant 0. +// +// Note: this fix doesn't work well on Linux because NULL will be overwritten +// whenever a system header file is included. Linux handles NULL correctly +// through a special type '__null'. +#ifdef SOLARIS #ifdef _LP64 #undef NULL #define NULL 0L @@ -76,13 +103,25 @@ #define NULL 0 #endif #endif +#endif // NULL vs NULL_WORD: // On Linux NULL is defined as a special type '__null'. Assigning __null to // integer variable will cause gcc warning. Use NULL_WORD in places where a -// pointer is stored as integer value. -#define NULL_WORD NULL +// pointer is stored as integer value. On some platforms, sizeof(intptr_t) > +// sizeof(void*), so here we want something which is integer type, but has the +// same size as a pointer. +#ifdef LINUX + #ifdef _LP64 + #define NULL_WORD 0L + #else + #define NULL_WORD 0 + #endif +#else + #define NULL_WORD NULL +#endif +#ifndef LINUX // Compiler-specific primitive types typedef unsigned short uint16_t; #ifndef _UINT32_T @@ -100,6 +139,7 @@ // If this gets an error, figure out a symbol XXX that implies the // prior definition of intptr_t, and add "&& !defined(XXX)" above. #endif +#endif // Additional Java basic types @@ -128,7 +168,7 @@ const jlong min_jlong = CONST64(0x8000000000000000); const jlong max_jlong = CONST64(0x7fffffffffffffff); - +#ifdef SOLARIS //---------------------------------------------------------------------------------------------------- // ANSI C++ fixes // NOTE:In the ANSI committee's continuing attempt to make each version @@ -162,7 +202,7 @@ typedef int (*int_fnP_cond_tP_i_vP)(cond_t *cv, int scope, void *arg); typedef int (*int_fnP_cond_tP)(cond_t *cv); }; - +#endif //---------------------------------------------------------------------------------------------------- // Debugging @@ -173,7 +213,7 @@ #define BREAKPOINT ::breakpoint() // checking for nanness - +#ifdef SOLARIS #ifdef SPARC inline int g_isnan(float f) { return isnanf(f); } #else @@ -182,6 +222,12 @@ #endif inline int g_isnan(double f) { return isnand(f); } +#elif LINUX +inline int g_isnan(float f) { return isnanf(f); } +inline int g_isnan(double f) { return isnan(f); } +#else +#error "missing platform-specific definition here" +#endif // Checking for finiteness @@ -195,9 +241,11 @@ // Misc +// NOTE: This one leads to an infinite recursion on Linux +#ifndef LINUX int local_vsnprintf(char* buf, size_t count, const char* fmt, va_list argptr); #define vsnprintf local_vsnprintf - +#endif // Portability macros #define PRAGMA_INTERFACE diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/utilities/ostream.cpp --- a/src/share/vm/utilities/ostream.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/utilities/ostream.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -829,7 +829,7 @@ server.sin_port = htons(port); server.sin_addr.s_addr = inet_addr(ip); - if (server.sin_addr.s_addr == (unsigned long)-1) { + if (server.sin_addr.s_addr == (uint32_t)-1) { #ifdef _WINDOWS struct hostent* host = hpi::get_host_by_name((char*)ip); #else diff -r 8b0b3490194f -r ad0b851458ff src/share/vm/utilities/vmError.cpp --- a/src/share/vm/utilities/vmError.cpp Wed Apr 09 11:18:58 2008 -0700 +++ b/src/share/vm/utilities/vmError.cpp Tue Apr 22 15:36:18 2008 -0700 @@ -170,7 +170,8 @@ out->print_raw_cr(Arguments::java_vendor_url_bug()); // If the crash is in native code, encourage user to submit a bug to the // provider of that code. - if (thread && thread->is_Java_thread()) { + if (thread && thread->is_Java_thread() && + !thread->is_hidden_from_external_view()) { JavaThread* jt = (JavaThread*)thread; if (jt->thread_state() == _thread_in_native) { out->print_cr("# The crash happened outside the Java Virtual Machine in native code.\n# See problematic frame for where to report the bug."); @@ -249,10 +250,10 @@ BEGIN - STEP(10, "(printing unexpected error message)") + STEP(10, "(printing fatal error message)") st->print_cr("#"); - st->print_cr("# An unexpected error has been detected by Java Runtime Environment:"); + st->print_cr("# A fatal error has been detected by the Java Runtime Environment:"); STEP(15, "(printing type of error)") diff -r 8b0b3490194f -r ad0b851458ff test/compiler/6659207/Test.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6659207/Test.java Tue Apr 22 15:36:18 2008 -0700 @@ -0,0 +1,60 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + */ + +/* + * @test + * @bug 6659207 + * @summary access violation in CompilerThread0 + */ + +public class Test { + static int[] array = new int[12]; + + static int index(int i) { + if (i == 0) return 0; + for (int n = 0; n < array.length; n++) + if (i < array[n]) return n; + return -1; + } + + static int test(int i) { + int result = 0; + i = index(i); + if (i >= 0) + if (array[i] != 0) + result++; + + if (i != -1) + array[i]++; + + return result; + } + + public static void main(String[] args) { + int total = 0; + for (int i = 0; i < 100000; i++) { + total += test(10); + } + System.out.println(total); + } +} diff -r 8b0b3490194f -r ad0b851458ff test/compiler/6661247/Test.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6661247/Test.java Tue Apr 22 15:36:18 2008 -0700 @@ -0,0 +1,155 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + */ + +/* + * @test + * @bug 6661247 + * @summary Internal bug in 32-bit HotSpot optimizer while bit manipulations + */ + +import java.util.Random; +import java.nio.*; + +// This isn't a completely reliable test for 6661247 since the results +// depend on what the local schedule looks like but it does reproduce +// the issue in current builds. + +public class Test { + + public static void test(boolean[] src, int srcPos, LongBuffer dest, long destPos, int count) { + int countStart = (destPos & 63) == 0 ? 0 : 64 - (int)(destPos & 63); + if (countStart > count) + countStart = count; + for (int srcPosMax = srcPos + countStart; srcPos < srcPosMax; srcPos++, destPos++) { + if (src[srcPos]) + dest.put((int)(destPos >>> 6), dest.get((int)(destPos >>> 6)) | 1L << (destPos & 63)); + else + dest.put((int)(destPos >>> 6), dest.get((int)(destPos >>> 6)) & ~(1L << (destPos & 63))); + } + count -= countStart; + int cnt = count >>> 6; + for (int k = (int)(destPos >>> 6), kMax = k + cnt; k < kMax; k++) { + int low = (src[srcPos] ? 1 : 0) + | (src[srcPos + 1] ? 1 << 1 : 0) + | (src[srcPos + 2] ? 1 << 2 : 0) + | (src[srcPos + 3] ? 1 << 3 : 0) + | (src[srcPos + 4] ? 1 << 4 : 0) + | (src[srcPos + 5] ? 1 << 5 : 0) + | (src[srcPos + 6] ? 1 << 6 : 0) + | (src[srcPos + 7] ? 1 << 7 : 0) + | (src[srcPos + 8] ? 1 << 8 : 0) + | (src[srcPos + 9] ? 1 << 9 : 0) + | (src[srcPos + 10] ? 1 << 10 : 0) + | (src[srcPos + 11] ? 1 << 11 : 0) + | (src[srcPos + 12] ? 1 << 12 : 0) + | (src[srcPos + 13] ? 1 << 13 : 0) + | (src[srcPos + 14] ? 1 << 14 : 0) + | (src[srcPos + 15] ? 1 << 15 : 0) + | (src[srcPos + 16] ? 1 << 16 : 0) + | (src[srcPos + 17] ? 1 << 17 : 0) + | (src[srcPos + 18] ? 1 << 18 : 0) + | (src[srcPos + 19] ? 1 << 19 : 0) + | (src[srcPos + 20] ? 1 << 20 : 0) + | (src[srcPos + 21] ? 1 << 21 : 0) + | (src[srcPos + 22] ? 1 << 22 : 0) + | (src[srcPos + 23] ? 1 << 23 : 0) + | (src[srcPos + 24] ? 1 << 24 : 0) + | (src[srcPos + 25] ? 1 << 25 : 0) + | (src[srcPos + 26] ? 1 << 26 : 0) + | (src[srcPos + 27] ? 1 << 27 : 0) + | (src[srcPos + 28] ? 1 << 28 : 0) + | (src[srcPos + 29] ? 1 << 29 : 0) + | (src[srcPos + 30] ? 1 << 30 : 0) + | (src[srcPos + 31] ? 1 << 31 : 0) + ; + srcPos += 32; + int high = (src[srcPos] ? 1 : 0) // PROBLEM! + | (src[srcPos + 1] ? 1 << 1 : 0) + | (src[srcPos + 2] ? 1 << 2 : 0) + | (src[srcPos + 3] ? 1 << 3 : 0) + | (src[srcPos + 4] ? 1 << 4 : 0) + | (src[srcPos + 5] ? 1 << 5 : 0) + | (src[srcPos + 6] ? 1 << 6 : 0) + | (src[srcPos + 7] ? 1 << 7 : 0) + | (src[srcPos + 8] ? 1 << 8 : 0) + | (src[srcPos + 9] ? 1 << 9 : 0) + | (src[srcPos + 10] ? 1 << 10 : 0) + | (src[srcPos + 11] ? 1 << 11 : 0) + | (src[srcPos + 12] ? 1 << 12 : 0) + | (src[srcPos + 13] ? 1 << 13 : 0) + | (src[srcPos + 14] ? 1 << 14 : 0) + | (src[srcPos + 15] ? 1 << 15 : 0) + | (src[srcPos + 16] ? 1 << 16 : 0) + | (src[srcPos + 17] ? 1 << 17 : 0) + | (src[srcPos + 18] ? 1 << 18 : 0) + | (src[srcPos + 19] ? 1 << 19 : 0) + | (src[srcPos + 20] ? 1 << 20 : 0) + | (src[srcPos + 21] ? 1 << 21 : 0) + | (src[srcPos + 22] ? 1 << 22 : 0) + | (src[srcPos + 23] ? 1 << 23 : 0) + | (src[srcPos + 24] ? 1 << 24 : 0) + | (src[srcPos + 25] ? 1 << 25 : 0) + | (src[srcPos + 26] ? 1 << 26 : 0) + | (src[srcPos + 27] ? 1 << 27 : 0) + | (src[srcPos + 28] ? 1 << 28 : 0) + | (src[srcPos + 29] ? 1 << 29 : 0) + | (src[srcPos + 30] ? 1 << 30 : 0) + | (src[srcPos + 31] ? 1 << 31 : 0) + ; + srcPos += 32; + dest.put(k, ((long)low & 0xFFFFFFFFL) | (((long)high) << 32)); + destPos += 64; + } + int countFinish = count & 63; + for (int srcPosMax = srcPos + countFinish; srcPos < srcPosMax; srcPos++, destPos++) { + if (src[srcPos]) + dest.put((int)(destPos >>> 6), dest.get((int)(destPos >>> 6)) | 1L << (destPos & 63)); + else + dest.put((int)(destPos >>> 6), dest.get((int)(destPos >>> 6)) & ~(1L << (destPos & 63))); + } + } + public static void main(String[] args) { + Random r = new Random(); + int entries = 1000; + boolean[] src = new boolean[entries * 64]; + long[] dest = new long[entries]; + long[] result = new long[entries]; + + for (int c = 0; c < 2000; c++) { + for (int i = 0; i < entries; i++) { + long l = r.nextLong(); + for (int bit = 0; bit < 64; bit++) { + src[i * 64 + bit] = (l & (1L << bit)) != 0; + } + dest[i] = 0; + result[i] = l; + } + test(src, 0, LongBuffer.wrap(dest, 0, dest.length), 0, src.length); + for (int i = 0; i < entries; i++) { + if (dest[i] != result[i]) { + throw new InternalError(i + ": " + Long.toHexString(dest[i]) + " != " + Long.toHexString(result[i])); + } + } + } + } +} diff -r 8b0b3490194f -r ad0b851458ff test/compiler/6663621/IVTest.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6663621/IVTest.java Tue Apr 22 15:36:18 2008 -0700 @@ -0,0 +1,116 @@ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + */ + +/** + * @test + * @bug 6663621 + * @summary JVM crashes while trying to execute api/java_security/Signature/SignatureTests.html#initSign tests. + */ + +public class IVTest { + static int paddedSize; + + static void padV15(byte[] padded) { + int psSize = padded.length; + int k = 0; + while (psSize-- > 0) { + padded[k++] = (byte)0xff; + } + } + + static void padV15_2(int paddedSize) { + byte[] padded = new byte[paddedSize]; + int psSize = padded.length; + int k = 0; + while (psSize-- > 0) { + padded[k++] = (byte)0xff; + } + } + + static void padV15_3() { + byte[] padded = new byte[paddedSize]; + int psSize = padded.length; + int k = 0; + while (psSize-- > 0) { + padded[k++] = (byte)0xff; + } + } + + static void padV15_4() { + byte[] padded = new byte[paddedSize]; + int psSize = padded.length; + for (int k = 0;psSize > 0; psSize--) { + int i = padded.length - psSize; + padded[i] = (byte)0xff; + } + } + + static void padV15_5() { + byte[] padded = new byte[paddedSize]; + int psSize = padded.length; + int k = psSize - 1; + for (int i = 0; i < psSize; i++) { + padded[k--] = (byte)0xff; + } + } + + public static void main(String argv[]) { + int bounds = 1024; + int lim = 500000; + long start = System.currentTimeMillis(); + for (int j = 0; j < lim; j++) { + paddedSize = j % bounds; + padV15(new byte[paddedSize]); + } + long end = System.currentTimeMillis(); + System.out.println(end - start); + start = System.currentTimeMillis(); + for (int j = 0; j < lim; j++) { + paddedSize = j % bounds; + padV15_2(paddedSize); + } + end = System.currentTimeMillis(); + System.out.println(end - start); + start = System.currentTimeMillis(); + for (int j = 0; j < lim; j++) { + paddedSize = j % bounds; + padV15_3(); + } + end = System.currentTimeMillis(); + System.out.println(end - start); + start = System.currentTimeMillis(); + for (int j = 0; j < lim; j++) { + paddedSize = j % bounds; + padV15_4(); + } + end = System.currentTimeMillis(); + System.out.println(end - start); + start = System.currentTimeMillis(); + for (int j = 0; j < lim; j++) { + paddedSize = j % bounds; + padV15_5(); + } + end = System.currentTimeMillis(); + System.out.println(end - start); + } +}