# HG changeset patch # User trims # Date 1250211545 25200 # Node ID 16314a31b9612d4f25bbc4bbe11298cbd4147fb7 # Parent f753dffae23e928571e2ad4b379ca3128eb2eff6# Parent 10d8c0d0d60e5e3f2deed0f2dea33b5bab8e98f9 Merge diff -r f753dffae23e -r 16314a31b961 agent/make/saenv.sh --- a/agent/make/saenv.sh Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/make/saenv.sh Thu Aug 13 17:59:05 2009 -0700 @@ -48,6 +48,8 @@ CPU=i386 fi else + LD_AUDIT_32=$STARTDIR/../src/os/solaris/proc/`uname -p`/libsaproc_audit.so + export LD_AUDIT_32 SA_LIBPATH=$STARTDIR/../src/os/solaris/proc/`uname -p`:$STARTDIR/solaris/`uname -p` OPTIONS="-Dsa.library.path=$SA_LIBPATH -Dsun.jvm.hotspot.debugger.useProcDebugger" CPU=sparc diff -r f753dffae23e -r 16314a31b961 agent/make/saenv64.sh --- a/agent/make/saenv64.sh Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/make/saenv64.sh Thu Aug 13 17:59:05 2009 -0700 @@ -43,6 +43,8 @@ fi fi +LD_AUDIT_64=$STARTDIR/../src/os/solaris/proc/$CPU/libsaproc_audit.so +export LD_AUDIT_64 SA_LIBPATH=$STARTDIR/../src/os/solaris/proc/$CPU:$STARTDIR/solaris/$CPU OPTIONS="-Dsa.library.path=$SA_LIBPATH -Dsun.jvm.hotspot.debugger.useProcDebugger" diff -r f753dffae23e -r 16314a31b961 agent/src/os/solaris/proc/Makefile --- a/agent/src/os/solaris/proc/Makefile Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/src/os/solaris/proc/Makefile Thu Aug 13 17:59:05 2009 -0700 @@ -56,24 +56,28 @@ @javah -classpath $(CLASSES_DIR) -jni sun.jvm.hotspot.debugger.proc.ProcDebuggerLocal CC -G -KPIC -I${JAVA_HOME}/include -I${JAVA_HOME}/include/solaris saproc.cpp \ -M mapfile -o $@/libsaproc.so -ldemangle + CC -o $@/libsaproc_audit.so -G -Kpic -z defs saproc_audit.cpp -lmapmalloc -ldl -lc amd64:: javahomecheck $(MKDIRS) $@ @javah -classpath $(CLASSES_DIR) -jni sun.jvm.hotspot.debugger.proc.ProcDebuggerLocal CC -G -KPIC -xarch=amd64 -I${JAVA_HOME}/include -I${JAVA_HOME}/include/solaris saproc.cpp \ -M mapfile -o $@/libsaproc.so -ldemangle + CC -xarch=amd64 -o $@/libsaproc_audit.so -G -Kpic -z defs saproc_audit.cpp -lmapmalloc -ldl -lc sparc:: javahomecheck $(MKDIRS) $@ @javah -classpath $(CLASSES_DIR) -jni sun.jvm.hotspot.debugger.proc.ProcDebuggerLocal CC -G -KPIC -xarch=v8 -I${JAVA_HOME}/include -I${JAVA_HOME}/include/solaris saproc.cpp \ -M mapfile -o $@/libsaproc.so -ldemangle + CC -xarch=v8 -o $@/libsaproc_audit.so -G -Kpic -z defs saproc_audit.cpp -lmapmalloc -ldl -lc sparcv9:: javahomecheck $(MKDIRS) $@ @javah -classpath $(CLASSES_DIR) -jni sun.jvm.hotspot.debugger.proc.ProcDebuggerLocal CC -G -KPIC -xarch=v9 -I${JAVA_HOME}/include -I${JAVA_HOME}/include/solaris saproc.cpp \ -M mapfile -o $@/libsaproc.so -ldemangle + CC -xarch=v9 -o $@/libsaproc_audit.so -G -Kpic -z defs saproc_audit.cpp -lmapmalloc -ldl -lc clean:: $(RM) -rf sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal.h diff -r f753dffae23e -r 16314a31b961 agent/src/os/solaris/proc/mapfile --- a/agent/src/os/solaris/proc/mapfile Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/src/os/solaris/proc/mapfile Thu Aug 13 17:59:05 2009 -0700 @@ -45,6 +45,8 @@ Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_resume0; Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_suspend0; Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_writeBytesToProcess0; + # this is needed by saproc_audit.c to redirect opens in libproc.so + libsaproc_open; local: *; }; diff -r f753dffae23e -r 16314a31b961 agent/src/os/solaris/proc/saproc.cpp --- a/agent/src/os/solaris/proc/saproc.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/src/os/solaris/proc/saproc.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -214,49 +214,58 @@ } } -static int find_file_hook(const char * name, int elf_checksum) { - init_alt_root(); - - if (_libsaproc_debug) { - printf("libsaproc DEBUG: find_file_hook %s 0x%x\n", name, elf_checksum); - } +// This function is a complete substitute for the open system call +// since it's also used to override open calls from libproc to +// implement as a pathmap style facility for the SA. If libproc +// starts using other interfaces then this might have to extended to +// cover other calls. +extern "C" int libsaproc_open(const char * name, int oflag, ...) { + if (oflag == O_RDONLY) { + init_alt_root(); - if (alt_root_len > 0) { - int fd = -1; - char alt_path[PATH_MAX+1]; - - strcpy(alt_path, alt_root); - strcat(alt_path, name); - fd = open(alt_path, O_RDONLY); - if (fd >= 0) { - if (_libsaproc_debug) { - printf("libsaproc DEBUG: find_file_hook substituted %s\n", alt_path); - } - return fd; + if (_libsaproc_debug) { + printf("libsaproc DEBUG: libsaproc_open %s\n", name); } - if (strrchr(name, '/')) { + if (alt_root_len > 0) { + int fd = -1; + char alt_path[PATH_MAX+1]; + strcpy(alt_path, alt_root); - strcat(alt_path, strrchr(name, '/')); + strcat(alt_path, name); fd = open(alt_path, O_RDONLY); if (fd >= 0) { if (_libsaproc_debug) { - printf("libsaproc DEBUG: find_file_hook substituted %s\n", alt_path); + printf("libsaproc DEBUG: libsaproc_open substituted %s\n", alt_path); } return fd; } + + if (strrchr(name, '/')) { + strcpy(alt_path, alt_root); + strcat(alt_path, strrchr(name, '/')); + fd = open(alt_path, O_RDONLY); + if (fd >= 0) { + if (_libsaproc_debug) { + printf("libsaproc DEBUG: libsaproc_open substituted %s\n", alt_path); + } + return fd; + } + } } } - return -1; + + { + mode_t mode; + va_list ap; + va_start(ap, oflag); + mode = va_arg(ap, mode_t); + va_end(ap); + + return open(name, oflag, mode); + } } -static int pathmap_open(const char* name) { - int fd = open(name, O_RDONLY); - if (fd < 0) { - fd = find_file_hook(name, 0); - } - return fd; -} static void * pathmap_dlopen(const char * name, int mode) { init_alt_root(); @@ -608,7 +617,7 @@ print_debug("looking for %s\n", classes_jsa); // open the classes[_g].jsa - int fd = pathmap_open(classes_jsa); + int fd = libsaproc_open(classes_jsa, O_RDONLY); if (fd < 0) { char errMsg[ERR_MSG_SIZE]; sprintf(errMsg, "can't open shared archive file %s", classes_jsa); @@ -1209,8 +1218,6 @@ return res; } -typedef int (*find_file_hook_t)(const char *, int elf_checksum); - /* * Class: sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal * Method: initIDs @@ -1230,16 +1237,6 @@ if (libproc_handle == 0) THROW_NEW_DEBUGGER_EXCEPTION("can't load libproc.so, if you are using Solaris 5.7 or below, copy libproc.so from 5.8!"); - // If possible, set shared object find file hook. - void (*set_hook)(find_file_hook_t) = (void(*)(find_file_hook_t))dlsym(libproc_handle, "Pset_find_file_hook"); - if (set_hook) { - // we found find file hook symbol, set up our hook function. - set_hook(find_file_hook); - } else if (getenv(SA_ALTROOT)) { - printf("libsaproc WARNING: %s set, but can't set file hook. " \ - "Did you use right version of libproc.so?\n", SA_ALTROOT); - } - p_ps_prochandle_ID = env->GetFieldID(clazz, "p_ps_prochandle", "J"); CHECK_EXCEPTION; diff -r f753dffae23e -r 16314a31b961 agent/src/os/solaris/proc/saproc_audit.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/agent/src/os/solaris/proc/saproc_audit.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -0,0 +1,98 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// This class sets up an interposer on open calls from libproc.so to +// support a pathmap facility in the SA. + +static uintptr_t* libproc_cookie; +static uintptr_t* libc_cookie; +static uintptr_t* libsaproc_cookie; + + +uint_t +la_version(uint_t version) +{ + return (LAV_CURRENT); +} + + +uint_t +la_objopen(Link_map * lmp, Lmid_t lmid, uintptr_t * cookie) +{ + if (strstr(lmp->l_name, "/libproc.so") != NULL) { + libproc_cookie = cookie; + return LA_FLG_BINDFROM; + } + if (strstr(lmp->l_name, "/libc.so") != NULL) { + libc_cookie = cookie; + return LA_FLG_BINDTO; + } + if (strstr(lmp->l_name, "/libsaproc.so") != NULL) { + libsaproc_cookie = cookie; + return LA_FLG_BINDTO | LA_FLG_BINDFROM; + } + return 0; +} + + +#if defined(_LP64) +uintptr_t +la_symbind64(Elf64_Sym *symp, uint_t symndx, uintptr_t *refcook, + uintptr_t *defcook, uint_t *sb_flags, const char *sym_name) +#else +uintptr_t +la_symbind32(Elf32_Sym *symp, uint_t symndx, uintptr_t *refcook, + uintptr_t *defcook, uint_t *sb_flags) +#endif +{ +#if !defined(_LP64) + const char *sym_name = (const char *)symp->st_name; +#endif + if (strcmp(sym_name, "open") == 0 && refcook == libproc_cookie) { + // redirect all open calls from libproc.so through libsaproc_open which will + // try the alternate library locations first. + void* handle = dlmopen(LM_ID_BASE, "libsaproc.so", RTLD_NOLOAD); + if (handle == NULL) { + fprintf(stderr, "libsaproc_audit.so: didn't find libsaproc.so during linking\n"); + } else { + uintptr_t libsaproc_open = (uintptr_t)dlsym(handle, "libsaproc_open"); + if (libsaproc_open == 0) { + fprintf(stderr, "libsaproc_audit.so: didn't find libsaproc_open during linking\n"); + } else { + return libsaproc_open; + } + } + } + return symp->st_value; +} diff -r f753dffae23e -r 16314a31b961 agent/src/share/classes/sun/jvm/hotspot/code/DebugInfoReadStream.java --- a/agent/src/share/classes/sun/jvm/hotspot/code/DebugInfoReadStream.java Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/code/DebugInfoReadStream.java Thu Aug 13 17:59:05 2009 -0700 @@ -81,8 +81,4 @@ Assert.that(false, "should not reach here"); return null; } - - public int readBCI() { - return readInt() + InvocationEntryBCI; - } } diff -r f753dffae23e -r 16314a31b961 agent/src/share/classes/sun/jvm/hotspot/code/PCDesc.java --- a/agent/src/share/classes/sun/jvm/hotspot/code/PCDesc.java Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/code/PCDesc.java Thu Aug 13 17:59:05 2009 -0700 @@ -82,6 +82,7 @@ tty.print(" "); sd.getMethod().printValueOn(tty); tty.print(" @" + sd.getBCI()); + tty.print(" reexecute=" + sd.getReexecute()); tty.println(); } } diff -r f753dffae23e -r 16314a31b961 agent/src/share/classes/sun/jvm/hotspot/code/ScopeDesc.java --- a/agent/src/share/classes/sun/jvm/hotspot/code/ScopeDesc.java Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/code/ScopeDesc.java Thu Aug 13 17:59:05 2009 -0700 @@ -41,6 +41,7 @@ private NMethod code; private Method method; private int bci; + private boolean reexecute; /** Decoding offsets */ private int decodeOffset; private int senderDecodeOffset; @@ -61,7 +62,7 @@ senderDecodeOffset = stream.readInt(); method = (Method) VM.getVM().getObjectHeap().newOop(stream.readOopHandle()); - bci = stream.readBCI(); + setBCIAndReexecute(stream.readInt()); // Decode offsets for body and sender localsDecodeOffset = stream.readInt(); expressionsDecodeOffset = stream.readInt(); @@ -78,7 +79,7 @@ senderDecodeOffset = stream.readInt(); method = (Method) VM.getVM().getObjectHeap().newOop(stream.readOopHandle()); - bci = stream.readBCI(); + setBCIAndReexecute(stream.readInt()); // Decode offsets for body and sender localsDecodeOffset = stream.readInt(); expressionsDecodeOffset = stream.readInt(); @@ -88,6 +89,7 @@ public NMethod getNMethod() { return code; } public Method getMethod() { return method; } public int getBCI() { return bci; } + public boolean getReexecute() {return reexecute;} /** Returns a List<ScopeValue> */ public List getLocals() { @@ -150,6 +152,7 @@ tty.print("ScopeDesc for "); method.printValueOn(tty); tty.println(" @bci " + bci); + tty.println(" reexecute: " + reexecute); } // FIXME: add more accessors @@ -157,6 +160,11 @@ //-------------------------------------------------------------------------------- // Internals only below this point // + private void setBCIAndReexecute(int combination) { + int InvocationEntryBci = VM.getVM().getInvocationEntryBCI(); + bci = (combination >> 1) + InvocationEntryBci; + reexecute = (combination & 1)==1 ? true : false; + } private DebugInfoReadStream streamAt(int decodeOffset) { return new DebugInfoReadStream(code, decodeOffset, objects); diff -r f753dffae23e -r 16314a31b961 agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java --- a/agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/memory/CompactibleFreeListSpace.java Thu Aug 13 17:59:05 2009 -0700 @@ -176,19 +176,6 @@ for (; cur.lessThan(limit);) { Address klassOop = cur.getAddressAt(addressSize); - // FIXME: need to do a better job here. - // can I use bitMap here? - if (klassOop == null) { - //Find the object size using Printezis bits and skip over - System.err.println("Finding object size using Printezis bits and skipping over..."); - long size = collector().blockSizeUsingPrintezisBits(cur); - if (size == -1) { - System.err.println("Printezis bits not set..."); - break; - } - cur = cur.addOffsetTo(adjustObjectSizeInBytes(size)); - } - if (FreeChunk.indicatesFreeChunk(cur)) { if (! cur.equals(regionStart)) { res.add(new MemRegion(regionStart, cur)); @@ -200,12 +187,21 @@ } // note that fc.size() gives chunk size in heap words cur = cur.addOffsetTo(chunkSize * addressSize); - System.err.println("Free chunk in CMS heap, size="+chunkSize * addressSize); regionStart = cur; } else if (klassOop != null) { Oop obj = heap.newOop(cur.addOffsetToAsOopHandle(0)); long objectSize = obj.getObjectSize(); cur = cur.addOffsetTo(adjustObjectSizeInBytes(objectSize)); + } else { + // FIXME: need to do a better job here. + // can I use bitMap here? + //Find the object size using Printezis bits and skip over + long size = collector().blockSizeUsingPrintezisBits(cur); + if (size == -1) { + System.err.println("Printezis bits not set..."); + break; + } + cur = cur.addOffsetTo(adjustObjectSizeInBytes(size)); } } return res; diff -r f753dffae23e -r 16314a31b961 agent/src/share/classes/sun/jvm/hotspot/memory/FreeChunk.java --- a/agent/src/share/classes/sun/jvm/hotspot/memory/FreeChunk.java Thu Aug 13 17:47:43 2009 -0700 +++ b/agent/src/share/classes/sun/jvm/hotspot/memory/FreeChunk.java Thu Aug 13 17:59:05 2009 -0700 @@ -63,7 +63,7 @@ public long size() { if (VM.getVM().isCompressedOopsEnabled()) { - Mark mark = new Mark(sizeField.getValue(addr)); + Mark mark = new Mark(addr.addOffsetTo(sizeField.getOffset())); return mark.getSize(); } else { Address size = sizeField.getValue(addr); @@ -83,7 +83,7 @@ public boolean isFree() { if (VM.getVM().isCompressedOopsEnabled()) { - Mark mark = new Mark(sizeField.getValue(addr)); + Mark mark = new Mark(addr.addOffsetTo(sizeField.getOffset())); return mark.isCmsFreeChunk(); } else { Address prev = prevField.getValue(addr); diff -r f753dffae23e -r 16314a31b961 make/jprt.properties --- a/make/jprt.properties Thu Aug 13 17:47:43 2009 -0700 +++ b/make/jprt.properties Thu Aug 13 17:59:05 2009 -0700 @@ -306,7 +306,6 @@ ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParallelGC, \ ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParNewGC, \ ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_CMS, \ - ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_G1, \ ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCBasher_ParOldGC, \ ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_default, \ ${jprt.my.windows.x64}-{product|fastdebug}-c2-GCOld_SerialGC, \ diff -r f753dffae23e -r 16314a31b961 src/cpu/x86/vm/assembler_x86.cpp --- a/src/cpu/x86/vm/assembler_x86.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/cpu/x86/vm/assembler_x86.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -8335,15 +8335,13 @@ // Cannot assert, unverified entry point counts instructions (see .ad file) // vtableStubs also counts instructions in pd_code_size_limit. // Also do not verify_oop as this is called by verify_oop. - if (Universe::narrow_oop_base() == NULL) { - if (Universe::narrow_oop_shift() != 0) { - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - shlq(r, LogMinObjAlignmentInBytes); - } + if (Universe::narrow_oop_shift() != 0) { + assert (Address::times_8 == LogMinObjAlignmentInBytes && + Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); + // Don't use Shift since it modifies flags. + leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); } else { - assert (Address::times_8 == LogMinObjAlignmentInBytes && - Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); - leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); + assert (Universe::narrow_oop_base() == NULL, "sanity"); } } @@ -8358,6 +8356,7 @@ Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong"); leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); } else if (dst != src) { + assert (Universe::narrow_oop_base() == NULL, "sanity"); movq(dst, src); } } diff -r f753dffae23e -r 16314a31b961 src/os/solaris/vm/os_solaris.cpp --- a/src/os/solaris/vm/os_solaris.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/os/solaris/vm/os_solaris.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -1643,7 +1643,8 @@ inline hrtime_t getTimeNanos() { if (VM_Version::supports_cx8()) { const hrtime_t now = gethrtime(); - const hrtime_t prev = max_hrtime; + // Use atomic long load since 32-bit x86 uses 2 registers to keep long. + const hrtime_t prev = Atomic::load((volatile jlong*)&max_hrtime); if (now <= prev) return prev; // same or retrograde time; const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev); assert(obsv >= prev, "invariant"); // Monotonicity diff -r f753dffae23e -r 16314a31b961 src/os/windows/vm/os_windows.cpp --- a/src/os/windows/vm/os_windows.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/os/windows/vm/os_windows.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -616,12 +616,13 @@ } julong os::win32::available_memory() { - // FIXME: GlobalMemoryStatus() may return incorrect value if total memory - // is larger than 4GB - MEMORYSTATUS ms; - GlobalMemoryStatus(&ms); - - return (julong)ms.dwAvailPhys; + // Use GlobalMemoryStatusEx() because GlobalMemoryStatus() may return incorrect + // value if total memory is larger than 4GB + MEMORYSTATUSEX ms; + ms.dwLength = sizeof(ms); + GlobalMemoryStatusEx(&ms); + + return (julong)ms.ullAvailPhys; } julong os::physical_memory() { @@ -1579,16 +1580,17 @@ st->print("Memory:"); st->print(" %dk page", os::vm_page_size()>>10); - // FIXME: GlobalMemoryStatus() may return incorrect value if total memory - // is larger than 4GB - MEMORYSTATUS ms; - GlobalMemoryStatus(&ms); + // Use GlobalMemoryStatusEx() because GlobalMemoryStatus() may return incorrect + // value if total memory is larger than 4GB + MEMORYSTATUSEX ms; + ms.dwLength = sizeof(ms); + GlobalMemoryStatusEx(&ms); st->print(", physical %uk", os::physical_memory() >> 10); st->print("(%uk free)", os::available_memory() >> 10); - st->print(", swap %uk", ms.dwTotalPageFile >> 10); - st->print("(%uk free)", ms.dwAvailPageFile >> 10); + st->print(", swap %uk", ms.ullTotalPageFile >> 10); + st->print("(%uk free)", ms.ullAvailPageFile >> 10); st->cr(); } @@ -3135,11 +3137,13 @@ _processor_level = si.wProcessorLevel; _processor_count = si.dwNumberOfProcessors; - MEMORYSTATUS ms; + MEMORYSTATUSEX ms; + ms.dwLength = sizeof(ms); + // also returns dwAvailPhys (free physical memory bytes), dwTotalVirtual, dwAvailVirtual, // dwMemoryLoad (% of memory in use) - GlobalMemoryStatus(&ms); - _physical_memory = ms.dwTotalPhys; + GlobalMemoryStatusEx(&ms); + _physical_memory = ms.ullTotalPhys; OSVERSIONINFO oi; oi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); diff -r f753dffae23e -r 16314a31b961 src/os_cpu/solaris_sparc/vm/atomic_solaris_sparc.inline.hpp --- a/src/os_cpu/solaris_sparc/vm/atomic_solaris_sparc.inline.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/os_cpu/solaris_sparc/vm/atomic_solaris_sparc.inline.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -46,6 +46,8 @@ inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); } inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); } +inline jlong Atomic::load(volatile jlong* src) { return *src; } + #ifdef _GNU_SOURCE inline jint Atomic::add (jint add_value, volatile jint* dest) { diff -r f753dffae23e -r 16314a31b961 src/os_cpu/solaris_x86/vm/atomic_solaris_x86.inline.hpp --- a/src/os_cpu/solaris_x86/vm/atomic_solaris_x86.inline.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/os_cpu/solaris_x86/vm/atomic_solaris_x86.inline.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -99,6 +99,8 @@ return (void*)_Atomic_cmpxchg_long((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, (int) os::is_MP()); } +inline jlong Atomic::load(volatile jlong* src) { return *src; } + #else // !AMD64 inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) { @@ -131,6 +133,15 @@ inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) { return (void*)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value); } + +extern "C" void _Atomic_load_long(volatile jlong* src, volatile jlong* dst); + +inline jlong Atomic::load(volatile jlong* src) { + volatile jlong dest; + _Atomic_load_long(src, &dest); + return dest; +} + #endif // AMD64 #ifdef _GNU_SOURCE diff -r f753dffae23e -r 16314a31b961 src/os_cpu/solaris_x86/vm/solaris_x86_32.il --- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.il Thu Aug 13 17:47:43 2009 -0700 +++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.il Thu Aug 13 17:59:05 2009 -0700 @@ -97,6 +97,15 @@ popl %ebx .end + // Support for void Atomic::load(volatile jlong* src, volatile jlong* dest). + .inline _Atomic_load_long,2 + movl 0(%esp), %eax // src + fildll (%eax) + movl 4(%esp), %eax // dest + fistpll (%eax) + .end + + // Support for OrderAccess::acquire() .inline _OrderAccess_acquire,0 movl 0(%esp), %eax diff -r f753dffae23e -r 16314a31b961 src/share/vm/c1/c1_IR.cpp --- a/src/share/vm/c1/c1_IR.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/c1/c1_IR.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -208,6 +208,15 @@ return scope->caller_bci(); } +bool IRScopeDebugInfo::should_reexecute() { + ciMethod* cur_method = scope()->method(); + int cur_bci = bci(); + if (cur_method != NULL && cur_bci != SynchronizationEntryBCI) { + Bytecodes::Code code = cur_method->java_code_at_bci(cur_bci); + return Interpreter::bytecode_should_reexecute(code); + } else + return false; +} // Implementation of CodeEmitInfo @@ -253,7 +262,7 @@ void CodeEmitInfo::record_debug_info(DebugInformationRecorder* recorder, int pc_offset) { // record the safepoint before recording the debug info for enclosing scopes recorder->add_safepoint(pc_offset, _oop_map->deep_copy()); - _scope_debug_info->record_debug_info(recorder, pc_offset); + _scope_debug_info->record_debug_info(recorder, pc_offset, true/*topmost*/); recorder->end_safepoint(pc_offset); } diff -r f753dffae23e -r 16314a31b961 src/share/vm/c1/c1_IR.hpp --- a/src/share/vm/c1/c1_IR.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/c1/c1_IR.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -239,15 +239,20 @@ GrowableArray* monitors() { return _monitors; } IRScopeDebugInfo* caller() { return _caller; } - void record_debug_info(DebugInformationRecorder* recorder, int pc_offset) { + //Whether we should reexecute this bytecode for deopt + bool should_reexecute(); + + void record_debug_info(DebugInformationRecorder* recorder, int pc_offset, bool topmost) { if (caller() != NULL) { // Order is significant: Must record caller first. - caller()->record_debug_info(recorder, pc_offset); + caller()->record_debug_info(recorder, pc_offset, false/*topmost*/); } DebugToken* locvals = recorder->create_scope_values(locals()); DebugToken* expvals = recorder->create_scope_values(expressions()); DebugToken* monvals = recorder->create_monitor_values(monitors()); - recorder->describe_scope(pc_offset, scope()->method(), bci(), locvals, expvals, monvals); + // reexecute allowed only for the topmost frame + bool reexecute = topmost ? should_reexecute() : false; + recorder->describe_scope(pc_offset, scope()->method(), bci(), reexecute, locvals, expvals, monvals); } }; diff -r f753dffae23e -r 16314a31b961 src/share/vm/c1/c1_LIRAssembler.cpp --- a/src/share/vm/c1/c1_LIRAssembler.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/c1/c1_LIRAssembler.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -379,7 +379,8 @@ ValueStack* s = nth_oldest(vstack, n, s_bci); if (s == NULL) break; IRScope* scope = s->scope(); - debug_info->describe_scope(pc_offset, scope->method(), s_bci); + //Always pass false for reexecute since these ScopeDescs are never used for deopt + debug_info->describe_scope(pc_offset, scope->method(), s_bci, false/*reexecute*/); } debug_info->end_non_safepoint(pc_offset); diff -r f753dffae23e -r 16314a31b961 src/share/vm/ci/ciObjectFactory.cpp --- a/src/share/vm/ci/ciObjectFactory.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/ci/ciObjectFactory.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -219,24 +219,27 @@ ASSERT_IN_VM; #ifdef ASSERT - oop last = NULL; - for (int j = 0; j< _ci_objects->length(); j++) { - oop o = _ci_objects->at(j)->get_oop(); - assert(last < o, "out of order"); - last = o; + if (CIObjectFactoryVerify) { + oop last = NULL; + for (int j = 0; j< _ci_objects->length(); j++) { + oop o = _ci_objects->at(j)->get_oop(); + assert(last < o, "out of order"); + last = o; + } } #endif // ASSERT int len = _ci_objects->length(); int index = find(key, _ci_objects); #ifdef ASSERT - for (int i=0; i<_ci_objects->length(); i++) { - if (_ci_objects->at(i)->get_oop() == key) { - assert(index == i, " bad lookup"); + if (CIObjectFactoryVerify) { + for (int i=0; i<_ci_objects->length(); i++) { + if (_ci_objects->at(i)->get_oop() == key) { + assert(index == i, " bad lookup"); + } } } #endif if (!is_found_at(index, key, _ci_objects)) { - // Check in the non-perm area before putting it in the list. NonPermObject* &bucket = find_non_perm(key); if (bucket != NULL) { @@ -539,11 +542,13 @@ objects->at_put(index, obj); } #ifdef ASSERT - oop last = NULL; - for (int j = 0; j< objects->length(); j++) { - oop o = objects->at(j)->get_oop(); - assert(last < o, "out of order"); - last = o; + if (CIObjectFactoryVerify) { + oop last = NULL; + for (int j = 0; j< objects->length(); j++) { + oop o = objects->at(j)->get_oop(); + assert(last < o, "out of order"); + last = o; + } } #endif // ASSERT } diff -r f753dffae23e -r 16314a31b961 src/share/vm/classfile/classFileParser.cpp --- a/src/share/vm/classfile/classFileParser.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/classfile/classFileParser.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -547,7 +547,6 @@ int length, Handle class_loader, Handle protection_domain, - PerfTraceTime* vmtimer, symbolHandle class_name, TRAPS) { ClassFileStream* cfs = stream(); @@ -575,13 +574,11 @@ guarantee_property(unresolved_klass->byte_at(0) != JVM_SIGNATURE_ARRAY, "Bad interface name in class file %s", CHECK_(nullHandle)); - vmtimer->suspend(); // do not count recursive loading twice // Call resolve_super so classcircularity is checked klassOop k = SystemDictionary::resolve_super_or_fail(class_name, unresolved_klass, class_loader, protection_domain, false, CHECK_(nullHandle)); interf = KlassHandle(THREAD, k); - vmtimer->resume(); if (LinkWellKnownClasses) // my super type is well known to me cp->klass_at_put(interface_index, interf()); // eagerly resolve @@ -2558,7 +2555,15 @@ ClassFileStream* cfs = stream(); // Timing - PerfTraceTime vmtimer(ClassLoader::perf_accumulated_time()); + assert(THREAD->is_Java_thread(), "must be a JavaThread"); + JavaThread* jt = (JavaThread*) THREAD; + + PerfClassTraceTime ctimer(ClassLoader::perf_class_parse_time(), + ClassLoader::perf_class_parse_selftime(), + NULL, + jt->get_thread_stat()->perf_recursion_counts_addr(), + jt->get_thread_stat()->perf_timers_addr(), + PerfClassTraceTime::PARSE_CLASS); _has_finalizer = _has_empty_finalizer = _has_vanilla_constructor = false; @@ -2738,7 +2743,7 @@ if (itfs_len == 0) { local_interfaces = objArrayHandle(THREAD, Universe::the_empty_system_obj_array()); } else { - local_interfaces = parse_interfaces(cp, itfs_len, class_loader, protection_domain, &vmtimer, _class_name, CHECK_(nullHandle)); + local_interfaces = parse_interfaces(cp, itfs_len, class_loader, protection_domain, _class_name, CHECK_(nullHandle)); } // Fields (offsets are filled in later) @@ -2782,6 +2787,7 @@ protection_domain, true, CHECK_(nullHandle)); + KlassHandle kh (THREAD, k); super_klass = instanceKlassHandle(THREAD, kh()); if (LinkWellKnownClasses) // my super class is well known to me diff -r f753dffae23e -r 16314a31b961 src/share/vm/classfile/classFileParser.hpp --- a/src/share/vm/classfile/classFileParser.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/classfile/classFileParser.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -61,7 +61,6 @@ int length, Handle class_loader, Handle protection_domain, - PerfTraceTime* vmtimer, symbolHandle class_name, TRAPS); diff -r f753dffae23e -r 16314a31b961 src/share/vm/classfile/classLoader.cpp --- a/src/share/vm/classfile/classLoader.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/classfile/classLoader.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -48,9 +48,26 @@ PerfCounter* ClassLoader::_perf_accumulated_time = NULL; PerfCounter* ClassLoader::_perf_classes_inited = NULL; PerfCounter* ClassLoader::_perf_class_init_time = NULL; +PerfCounter* ClassLoader::_perf_class_init_selftime = NULL; +PerfCounter* ClassLoader::_perf_classes_verified = NULL; PerfCounter* ClassLoader::_perf_class_verify_time = NULL; +PerfCounter* ClassLoader::_perf_class_verify_selftime = NULL; PerfCounter* ClassLoader::_perf_classes_linked = NULL; PerfCounter* ClassLoader::_perf_class_link_time = NULL; +PerfCounter* ClassLoader::_perf_class_link_selftime = NULL; +PerfCounter* ClassLoader::_perf_class_parse_time = NULL; +PerfCounter* ClassLoader::_perf_class_parse_selftime = NULL; +PerfCounter* ClassLoader::_perf_sys_class_lookup_time = NULL; +PerfCounter* ClassLoader::_perf_shared_classload_time = NULL; +PerfCounter* ClassLoader::_perf_sys_classload_time = NULL; +PerfCounter* ClassLoader::_perf_app_classload_time = NULL; +PerfCounter* ClassLoader::_perf_app_classload_selftime = NULL; +PerfCounter* ClassLoader::_perf_app_classload_count = NULL; +PerfCounter* ClassLoader::_perf_define_appclasses = NULL; +PerfCounter* ClassLoader::_perf_define_appclass_time = NULL; +PerfCounter* ClassLoader::_perf_define_appclass_selftime = NULL; +PerfCounter* ClassLoader::_perf_app_classfile_bytes_read = NULL; +PerfCounter* ClassLoader::_perf_sys_classfile_bytes_read = NULL; PerfCounter* ClassLoader::_sync_systemLoaderLockContentionRate = NULL; PerfCounter* ClassLoader::_sync_nonSystemLoaderLockContentionRate = NULL; PerfCounter* ClassLoader::_sync_JVMFindLoadedClassLockFreeCounter = NULL; @@ -152,6 +169,9 @@ hpi::close(file_handle); // construct ClassFileStream if (num_read == (size_t)st.st_size) { + if (UsePerfData) { + ClassLoader::perf_sys_classfile_bytes_read()->inc(num_read); + } return new ClassFileStream(buffer, st.st_size, _dir); // Resource allocated } } @@ -198,6 +218,9 @@ buffer = NEW_RESOURCE_ARRAY(u1, filesize); if (!(*ReadEntry)(_zip, entry, buffer, filename)) return NULL; } + if (UsePerfData) { + ClassLoader::perf_sys_classfile_bytes_read()->inc(filesize); + } // return result return new ClassFileStream(buffer, filesize, _zip_name); // Resource allocated } @@ -825,7 +848,9 @@ ClassFileStream* stream = NULL; int classpath_index = 0; { - PerfTraceTime vmtimer(perf_accumulated_time()); + PerfClassTraceTime vmtimer(perf_sys_class_lookup_time(), + ((JavaThread*) THREAD)->get_thread_stat()->perf_timers_addr(), + PerfClassTraceTime::CLASS_LOAD); ClassPathEntry* e = _first_entry; while (e != NULL) { stream = e->open_stream(name); @@ -890,11 +915,29 @@ // jvmstat performance counters NEWPERFTICKCOUNTER(_perf_accumulated_time, SUN_CLS, "time"); NEWPERFTICKCOUNTER(_perf_class_init_time, SUN_CLS, "classInitTime"); + NEWPERFTICKCOUNTER(_perf_class_init_selftime, SUN_CLS, "classInitTime.self"); NEWPERFTICKCOUNTER(_perf_class_verify_time, SUN_CLS, "classVerifyTime"); + NEWPERFTICKCOUNTER(_perf_class_verify_selftime, SUN_CLS, "classVerifyTime.self"); NEWPERFTICKCOUNTER(_perf_class_link_time, SUN_CLS, "classLinkedTime"); - + NEWPERFTICKCOUNTER(_perf_class_link_selftime, SUN_CLS, "classLinkedTime.self"); NEWPERFEVENTCOUNTER(_perf_classes_inited, SUN_CLS, "initializedClasses"); NEWPERFEVENTCOUNTER(_perf_classes_linked, SUN_CLS, "linkedClasses"); + NEWPERFEVENTCOUNTER(_perf_classes_verified, SUN_CLS, "verifiedClasses"); + + NEWPERFTICKCOUNTER(_perf_class_parse_time, SUN_CLS, "parseClassTime"); + NEWPERFTICKCOUNTER(_perf_class_parse_selftime, SUN_CLS, "parseClassTime.self"); + NEWPERFTICKCOUNTER(_perf_sys_class_lookup_time, SUN_CLS, "lookupSysClassTime"); + NEWPERFTICKCOUNTER(_perf_shared_classload_time, SUN_CLS, "sharedClassLoadTime"); + NEWPERFTICKCOUNTER(_perf_sys_classload_time, SUN_CLS, "sysClassLoadTime"); + NEWPERFTICKCOUNTER(_perf_app_classload_time, SUN_CLS, "appClassLoadTime"); + NEWPERFTICKCOUNTER(_perf_app_classload_selftime, SUN_CLS, "appClassLoadTime.self"); + NEWPERFEVENTCOUNTER(_perf_app_classload_count, SUN_CLS, "appClassLoadCount"); + NEWPERFTICKCOUNTER(_perf_define_appclasses, SUN_CLS, "defineAppClasses"); + NEWPERFTICKCOUNTER(_perf_define_appclass_time, SUN_CLS, "defineAppClassTime"); + NEWPERFTICKCOUNTER(_perf_define_appclass_selftime, SUN_CLS, "defineAppClassTime.self"); + NEWPERFBYTECOUNTER(_perf_app_classfile_bytes_read, SUN_CLS, "appClassBytes"); + NEWPERFBYTECOUNTER(_perf_sys_classfile_bytes_read, SUN_CLS, "sysClassBytes"); + // The following performance counters are added for measuring the impact // of the bug fix of 6365597. They are mainly focused on finding out diff -r f753dffae23e -r 16314a31b961 src/share/vm/classfile/classLoader.hpp --- a/src/share/vm/classfile/classLoader.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/classfile/classLoader.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -149,9 +149,26 @@ static PerfCounter* _perf_accumulated_time; static PerfCounter* _perf_classes_inited; static PerfCounter* _perf_class_init_time; + static PerfCounter* _perf_class_init_selftime; + static PerfCounter* _perf_classes_verified; static PerfCounter* _perf_class_verify_time; + static PerfCounter* _perf_class_verify_selftime; static PerfCounter* _perf_classes_linked; static PerfCounter* _perf_class_link_time; + static PerfCounter* _perf_class_link_selftime; + static PerfCounter* _perf_class_parse_time; + static PerfCounter* _perf_class_parse_selftime; + static PerfCounter* _perf_sys_class_lookup_time; + static PerfCounter* _perf_shared_classload_time; + static PerfCounter* _perf_sys_classload_time; + static PerfCounter* _perf_app_classload_time; + static PerfCounter* _perf_app_classload_selftime; + static PerfCounter* _perf_app_classload_count; + static PerfCounter* _perf_define_appclasses; + static PerfCounter* _perf_define_appclass_time; + static PerfCounter* _perf_define_appclass_selftime; + static PerfCounter* _perf_app_classfile_bytes_read; + static PerfCounter* _perf_sys_classfile_bytes_read; static PerfCounter* _sync_systemLoaderLockContentionRate; static PerfCounter* _sync_nonSystemLoaderLockContentionRate; @@ -196,12 +213,29 @@ static void print_bootclasspath(); // Timing - static PerfCounter* perf_accumulated_time() { return _perf_accumulated_time; } - static PerfCounter* perf_classes_inited() { return _perf_classes_inited; } - static PerfCounter* perf_class_init_time() { return _perf_class_init_time; } - static PerfCounter* perf_class_verify_time() { return _perf_class_verify_time; } - static PerfCounter* perf_classes_linked() { return _perf_classes_linked; } - static PerfCounter* perf_class_link_time() { return _perf_class_link_time; } + static PerfCounter* perf_accumulated_time() { return _perf_accumulated_time; } + static PerfCounter* perf_classes_inited() { return _perf_classes_inited; } + static PerfCounter* perf_class_init_time() { return _perf_class_init_time; } + static PerfCounter* perf_class_init_selftime() { return _perf_class_init_selftime; } + static PerfCounter* perf_classes_verified() { return _perf_classes_verified; } + static PerfCounter* perf_class_verify_time() { return _perf_class_verify_time; } + static PerfCounter* perf_class_verify_selftime() { return _perf_class_verify_selftime; } + static PerfCounter* perf_classes_linked() { return _perf_classes_linked; } + static PerfCounter* perf_class_link_time() { return _perf_class_link_time; } + static PerfCounter* perf_class_link_selftime() { return _perf_class_link_selftime; } + static PerfCounter* perf_class_parse_time() { return _perf_class_parse_time; } + static PerfCounter* perf_class_parse_selftime() { return _perf_class_parse_selftime; } + static PerfCounter* perf_sys_class_lookup_time() { return _perf_sys_class_lookup_time; } + static PerfCounter* perf_shared_classload_time() { return _perf_shared_classload_time; } + static PerfCounter* perf_sys_classload_time() { return _perf_sys_classload_time; } + static PerfCounter* perf_app_classload_time() { return _perf_app_classload_time; } + static PerfCounter* perf_app_classload_selftime() { return _perf_app_classload_selftime; } + static PerfCounter* perf_app_classload_count() { return _perf_app_classload_count; } + static PerfCounter* perf_define_appclasses() { return _perf_define_appclasses; } + static PerfCounter* perf_define_appclass_time() { return _perf_define_appclass_time; } + static PerfCounter* perf_define_appclass_selftime() { return _perf_define_appclass_selftime; } + static PerfCounter* perf_app_classfile_bytes_read() { return _perf_app_classfile_bytes_read; } + static PerfCounter* perf_sys_classfile_bytes_read() { return _perf_sys_classfile_bytes_read; } // Record how often system loader lock object is contended static PerfCounter* sync_systemLoaderLockContentionRate() { @@ -307,3 +341,118 @@ static int compile_the_world_counter() { return _compile_the_world_counter; } #endif //PRODUCT }; + +// PerfClassTraceTime is used to measure time for class loading related events. +// This class tracks cumulative time and exclusive time for specific event types. +// During the execution of one event, other event types (e.g. class loading and +// resolution) as well as recursive calls of the same event type could happen. +// Only one elapsed timer (cumulative) and one thread-local self timer (exclusive) +// (i.e. only one event type) are active at a time even multiple PerfClassTraceTime +// instances have been created as multiple events are happening. +class PerfClassTraceTime { + public: + enum { + CLASS_LOAD = 0, + PARSE_CLASS = 1, + CLASS_LINK = 2, + CLASS_VERIFY = 3, + CLASS_CLINIT = 4, + DEFINE_CLASS = 5, + EVENT_TYPE_COUNT = 6 + }; + protected: + // _t tracks time from initialization to destruction of this timer instance + // including time for all other event types, and recursive calls of this type. + // When a timer is called recursively, the elapsedTimer _t would not be used. + elapsedTimer _t; + PerfLongCounter* _timep; + PerfLongCounter* _selftimep; + PerfLongCounter* _eventp; + // pointer to thread-local recursion counter and timer array + // The thread_local timers track cumulative time for specific event types + // exclusive of time for other event types, but including recursive calls + // of the same type. + int* _recursion_counters; + elapsedTimer* _timers; + int _event_type; + int _prev_active_event; + + public: + + inline PerfClassTraceTime(PerfLongCounter* timep, /* counter incremented with inclusive time */ + PerfLongCounter* selftimep, /* counter incremented with exclusive time */ + PerfLongCounter* eventp, /* event counter */ + int* recursion_counters, /* thread-local recursion counter array */ + elapsedTimer* timers, /* thread-local timer array */ + int type /* event type */ ) : + _timep(timep), _selftimep(selftimep), _eventp(eventp), _recursion_counters(recursion_counters), _timers(timers), _event_type(type) { + initialize(); + } + + inline PerfClassTraceTime(PerfLongCounter* timep, /* counter incremented with inclusive time */ + elapsedTimer* timers, /* thread-local timer array */ + int type /* event type */ ) : + _timep(timep), _selftimep(NULL), _eventp(NULL), _recursion_counters(NULL), _timers(timers), _event_type(type) { + initialize(); + } + + void initialize() { + if (!UsePerfData) return; + + if (_eventp != NULL) { + // increment the event counter + _eventp->inc(); + } + + // stop the current active thread-local timer to measure inclusive time + _prev_active_event = -1; + for (int i=0; i < EVENT_TYPE_COUNT; i++) { + if (_timers[i].is_active()) { + assert(_prev_active_event == -1, "should have only one active timer"); + _prev_active_event = i; + _timers[i].stop(); + } + } + + if (_recursion_counters == NULL || (_recursion_counters[_event_type])++ == 0) { + // start the inclusive timer if not recursively called + _t.start(); + } + + // start thread-local timer of the given event type + if (!_timers[_event_type].is_active()) { + _timers[_event_type].start(); + } + } + + inline void suspend() { _t.stop(); _timers[_event_type].stop(); } + inline void resume() { _t.start(); _timers[_event_type].start(); } + + ~PerfClassTraceTime() { + if (!UsePerfData) return; + + // stop the thread-local timer as the event completes + // and resume the thread-local timer of the event next on the stack + _timers[_event_type].stop(); + jlong selftime = _timers[_event_type].ticks(); + + if (_prev_active_event >= 0) { + _timers[_prev_active_event].start(); + } + + if (_recursion_counters != NULL && --(_recursion_counters[_event_type]) > 0) return; + + // increment the counters only on the leaf call + _t.stop(); + _timep->inc(_t.ticks()); + if (_selftimep != NULL) { + _selftimep->inc(selftime); + } + // add all class loading related event selftime to the accumulated time counter + ClassLoader::perf_accumulated_time()->inc(selftime); + + // reset the timer + _timers[_event_type].reset(); + } +}; + diff -r f753dffae23e -r 16314a31b961 src/share/vm/classfile/javaClasses.cpp --- a/src/share/vm/classfile/javaClasses.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/classfile/javaClasses.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -1229,10 +1229,13 @@ // Compiled java method case. if (decode_offset != 0) { + bool dummy_reexecute = false; DebugInfoReadStream stream(nm, decode_offset); decode_offset = stream.read_int(); method = (methodOop)nm->oop_at(stream.read_int()); - bci = stream.read_bci(); + //fill_in_stack_trace does not need the reexecute information which is designed + //for the deopt to reexecute + bci = stream.read_bci_and_reexecute(dummy_reexecute); } else { if (fr.is_first_frame()) break; address pc = fr.pc(); diff -r f753dffae23e -r 16314a31b961 src/share/vm/classfile/systemDictionary.cpp --- a/src/share/vm/classfile/systemDictionary.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/classfile/systemDictionary.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -1306,13 +1306,18 @@ instanceKlassHandle SystemDictionary::load_instance_class(symbolHandle class_name, Handle class_loader, TRAPS) { instanceKlassHandle nh = instanceKlassHandle(); // null Handle if (class_loader.is_null()) { + // Search the shared system dictionary for classes preloaded into the // shared spaces. instanceKlassHandle k; - k = load_shared_class(class_name, class_loader, THREAD); + { + PerfTraceTime vmtimer(ClassLoader::perf_shared_classload_time()); + k = load_shared_class(class_name, class_loader, THREAD); + } if (k.is_null()) { // Use VM class loader + PerfTraceTime vmtimer(ClassLoader::perf_sys_classload_time()); k = ClassLoader::load_classfile(class_name, CHECK_(nh)); } @@ -1334,6 +1339,16 @@ // Use user specified class loader to load class. Call loadClass operation on class_loader. ResourceMark rm(THREAD); + assert(THREAD->is_Java_thread(), "must be a JavaThread"); + JavaThread* jt = (JavaThread*) THREAD; + + PerfClassTraceTime vmtimer(ClassLoader::perf_app_classload_time(), + ClassLoader::perf_app_classload_selftime(), + ClassLoader::perf_app_classload_count(), + jt->get_thread_stat()->perf_recursion_counts_addr(), + jt->get_thread_stat()->perf_timers_addr(), + PerfClassTraceTime::CLASS_LOAD); + Handle s = java_lang_String::create_from_symbol(class_name, CHECK_(nh)); // Translate to external class name format, i.e., convert '/' chars to '.' Handle string = java_lang_String::externalize_classname(s, CHECK_(nh)); diff -r f753dffae23e -r 16314a31b961 src/share/vm/code/debugInfo.hpp --- a/src/share/vm/code/debugInfo.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/code/debugInfo.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -255,7 +255,8 @@ ScopeValue* read_object_value(); ScopeValue* get_cached_object(); // BCI encoding is mostly unsigned, but -1 is a distinguished value - int read_bci() { return read_int() + InvocationEntryBci; } + // Decoding based on encoding: bci = InvocationEntryBci + read_int()/2; reexecute = read_int()%2 == 1 ? true : false; + int read_bci_and_reexecute(bool& reexecute) { int i = read_int(); reexecute = (i & 1) ? true : false; return (i >> 1) + InvocationEntryBci; } }; // DebugInfoWriteStream specializes CompressedWriteStream for @@ -268,5 +269,6 @@ public: DebugInfoWriteStream(DebugInformationRecorder* recorder, int initial_size); void write_handle(jobject h); - void write_bci(int bci) { write_int(bci - InvocationEntryBci); } + //Encoding bci and reexecute into one word as (bci - InvocationEntryBci)*2 + reexecute + void write_bci_and_reexecute(int bci, bool reexecute) { write_int(((bci - InvocationEntryBci) << 1) + (reexecute ? 1 : 0)); } }; diff -r f753dffae23e -r 16314a31b961 src/share/vm/code/debugInfoRec.cpp --- a/src/share/vm/code/debugInfoRec.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/code/debugInfoRec.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -280,6 +280,7 @@ void DebugInformationRecorder::describe_scope(int pc_offset, ciMethod* method, int bci, + bool reexecute, DebugToken* locals, DebugToken* expressions, DebugToken* monitors) { @@ -297,7 +298,7 @@ // serialize scope jobject method_enc = (method == NULL)? NULL: method->encoding(); stream()->write_int(oop_recorder()->find_index(method_enc)); - stream()->write_bci(bci); + stream()->write_bci_and_reexecute(bci, reexecute); assert(method == NULL || (method->is_native() && bci == 0) || (!method->is_native() && 0 <= bci && bci < method->code_size()) || diff -r f753dffae23e -r 16314a31b961 src/share/vm/code/debugInfoRec.hpp --- a/src/share/vm/code/debugInfoRec.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/code/debugInfoRec.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -87,6 +87,7 @@ void describe_scope(int pc_offset, ciMethod* method, int bci, + bool reexecute, DebugToken* locals = NULL, DebugToken* expressions = NULL, DebugToken* monitors = NULL); diff -r f753dffae23e -r 16314a31b961 src/share/vm/code/scopeDesc.cpp --- a/src/share/vm/code/scopeDesc.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/code/scopeDesc.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -46,6 +46,7 @@ _decode_offset = parent->_sender_decode_offset; _objects = parent->_objects; decode_body(); + assert(_reexecute == false, "reexecute not allowed"); } @@ -56,6 +57,7 @@ _sender_decode_offset = DebugInformationRecorder::serialized_null; _method = methodHandle(_code->method()); _bci = InvocationEntryBci; + _reexecute = false; _locals_decode_offset = DebugInformationRecorder::serialized_null; _expressions_decode_offset = DebugInformationRecorder::serialized_null; _monitors_decode_offset = DebugInformationRecorder::serialized_null; @@ -65,7 +67,8 @@ _sender_decode_offset = stream->read_int(); _method = methodHandle((methodOop) stream->read_oop()); - _bci = stream->read_bci(); + _bci = stream->read_bci_and_reexecute(_reexecute); + // decode offsets for body and sender _locals_decode_offset = stream->read_int(); _expressions_decode_offset = stream->read_int(); @@ -170,6 +173,7 @@ st->print("ScopeDesc[%d]@" PTR_FORMAT " ", _decode_offset, _code->instructions_begin()); st->print_cr(" offset: %d", _decode_offset); st->print_cr(" bci: %d", bci()); + st->print_cr(" reexecute: %s", should_reexecute() ? "true" : "false"); st->print_cr(" locals: %d", _locals_decode_offset); st->print_cr(" stack: %d", _expressions_decode_offset); st->print_cr(" monitor: %d", _monitors_decode_offset); diff -r f753dffae23e -r 16314a31b961 src/share/vm/code/scopeDesc.hpp --- a/src/share/vm/code/scopeDesc.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/code/scopeDesc.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -39,7 +39,8 @@ DebugInfoReadStream buffer(code, pc_desc->scope_decode_offset()); int ignore_sender = buffer.read_int(); _method = methodOop(buffer.read_oop()); - _bci = buffer.read_bci(); + bool dummy_reexecute; //only methodOop and bci are needed! + _bci = buffer.read_bci_and_reexecute(dummy_reexecute); } methodOop method() { return _method; } @@ -60,8 +61,9 @@ ScopeDesc(const nmethod* code, int decode_offset); // JVM state - methodHandle method() const { return _method; } - int bci() const { return _bci; } + methodHandle method() const { return _method; } + int bci() const { return _bci; } + bool should_reexecute() const { return _reexecute; } GrowableArray* locals(); GrowableArray* expressions(); @@ -86,6 +88,7 @@ // JVM state methodHandle _method; int _bci; + bool _reexecute; // Decoding offsets int _decode_offset; diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -25,11 +25,21 @@ #include "incls/_precompiled.incl" #include "incls/_concurrentG1Refine.cpp.incl" +// Possible sizes for the card counts cache: odd primes that roughly double in size. +// (See jvmtiTagMap.cpp). +int ConcurrentG1Refine::_cc_cache_sizes[] = { + 16381, 32771, 76831, 150001, 307261, + 614563, 1228891, 2457733, 4915219, 9830479, + 19660831, 39321619, 78643219, 157286461, -1 + }; + ConcurrentG1Refine::ConcurrentG1Refine() : - _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL), + _card_counts(NULL), _card_epochs(NULL), + _n_card_counts(0), _max_n_card_counts(0), + _cache_size_index(0), _expand_card_counts(false), _hot_cache(NULL), _def_use_cache(false), _use_cache(false), - _n_periods(0), _total_cards(0), _total_travs(0), + _n_periods(0), _threads(NULL), _n_threads(0) { if (G1ConcRefine) { @@ -57,32 +67,51 @@ } void ConcurrentG1Refine::init() { - if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - _n_card_counts = - (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); - _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts); - for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0; - ModRefBarrierSet* bs = g1h->mr_bs(); + if (G1ConcRSLogCacheSize > 0) { + _g1h = G1CollectedHeap::heap(); + _max_n_card_counts = + (unsigned) (_g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift); + + size_t max_card_num = ((size_t)1 << (sizeof(unsigned)*BitsPerByte-1)) - 1; + guarantee(_max_n_card_counts < max_card_num, "card_num representation"); + + int desired = _max_n_card_counts / InitialCacheFraction; + for (_cache_size_index = 0; + _cc_cache_sizes[_cache_size_index] >= 0; _cache_size_index++) { + if (_cc_cache_sizes[_cache_size_index] >= desired) break; + } + _cache_size_index = MAX2(0, (_cache_size_index - 1)); + + int initial_size = _cc_cache_sizes[_cache_size_index]; + if (initial_size < 0) initial_size = _max_n_card_counts; + + // Make sure we don't go bigger than we will ever need + _n_card_counts = MIN2((unsigned) initial_size, _max_n_card_counts); + + _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts); + _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts); + + Copy::fill_to_bytes(&_card_counts[0], + _n_card_counts * sizeof(CardCountCacheEntry)); + Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); + + ModRefBarrierSet* bs = _g1h->mr_bs(); guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition"); - CardTableModRefBS* ctbs = (CardTableModRefBS*)bs; - _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start()); - if (G1ConcRSCountTraversals) { - _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); - _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256); - for (int i = 0; i < 256; i++) { - _cur_card_count_histo[i] = 0; - _cum_card_count_histo[i] = 0; - } - } - } - if (G1ConcRSLogCacheSize > 0) { + _ct_bs = (CardTableModRefBS*)bs; + _ct_bot = _ct_bs->byte_for_const(_g1h->reserved_region().start()); + _def_use_cache = true; _use_cache = true; _hot_cache_size = (1 << G1ConcRSLogCacheSize); _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size); _n_hot = 0; _hot_cache_idx = 0; + + // For refining the cards in the hot cache in parallel + int n_workers = (ParallelGCThreads > 0 ? + _g1h->workers()->total_workers() : 1); + _hot_cache_par_chunk_size = MAX2(1, _hot_cache_size / n_workers); + _hot_cache_par_claimed_idx = 0; } } @@ -95,15 +124,11 @@ } ConcurrentG1Refine::~ConcurrentG1Refine() { - if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { + if (G1ConcRSLogCacheSize > 0) { assert(_card_counts != NULL, "Logic"); - FREE_C_HEAP_ARRAY(unsigned char, _card_counts); - assert(_cur_card_count_histo != NULL, "Logic"); - FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo); - assert(_cum_card_count_histo != NULL, "Logic"); - FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo); - } - if (G1ConcRSLogCacheSize > 0) { + FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts); + assert(_card_epochs != NULL, "Logic"); + FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs); assert(_hot_cache != NULL, "Logic"); FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); } @@ -123,165 +148,232 @@ } } - -int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) { - size_t card_num = (card_ptr - _ct_bot); - guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds"); - unsigned char cnt = _card_counts[card_num]; - if (cnt < 255) _card_counts[card_num]++; - return cnt; - _total_travs++; +bool ConcurrentG1Refine::is_young_card(jbyte* card_ptr) { + HeapWord* start = _ct_bs->addr_for(card_ptr); + HeapRegion* r = _g1h->heap_region_containing(start); + if (r != NULL && r->is_young()) { + return true; + } + // This card is not associated with a heap region + // so can't be young. + return false; } -jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) { - int count = add_card_count(card_ptr); - // Count previously unvisited cards. - if (count == 0) _total_cards++; - // We'll assume a traversal unless we store it in the cache. +jbyte* ConcurrentG1Refine::add_card_count(jbyte* card_ptr, int* count, bool* defer) { + unsigned new_card_num = ptr_2_card_num(card_ptr); + unsigned bucket = hash(new_card_num); + assert(0 <= bucket && bucket < _n_card_counts, "Bounds"); + + CardCountCacheEntry* count_ptr = &_card_counts[bucket]; + CardEpochCacheEntry* epoch_ptr = &_card_epochs[bucket]; + + // We have to construct a new entry if we haven't updated the counts + // during the current period, or if the count was updated for a + // different card number. + unsigned int new_epoch = (unsigned int) _n_periods; + julong new_epoch_entry = make_epoch_entry(new_card_num, new_epoch); + + while (true) { + // Fetch the previous epoch value + julong prev_epoch_entry = epoch_ptr->_value; + julong cas_res; + + if (extract_epoch(prev_epoch_entry) != new_epoch) { + // This entry has not yet been updated during this period. + // Note: we update the epoch value atomically to ensure + // that there is only one winner that updates the cached + // card_ptr value even though all the refine threads share + // the same epoch value. + + cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, + (volatile jlong*)&epoch_ptr->_value, + (jlong) prev_epoch_entry); + + if (cas_res == prev_epoch_entry) { + // We have successfully won the race to update the + // epoch and card_num value. Make it look like the + // count and eviction count were previously cleared. + count_ptr->_count = 1; + count_ptr->_evict_count = 0; + *count = 0; + // We can defer the processing of card_ptr + *defer = true; + return card_ptr; + } + // We did not win the race to update the epoch field, so some other + // thread must have done it. The value that gets returned by CAS + // should be the new epoch value. + assert(extract_epoch(cas_res) == new_epoch, "unexpected epoch"); + // We could 'continue' here or just re-read the previous epoch value + prev_epoch_entry = epoch_ptr->_value; + } + + // The epoch entry for card_ptr has been updated during this period. + unsigned old_card_num = extract_card_num(prev_epoch_entry); + + // The card count that will be returned to caller + *count = count_ptr->_count; + + // Are we updating the count for the same card? + if (new_card_num == old_card_num) { + // Same card - just update the count. We could have more than one + // thread racing to update count for the current card. It should be + // OK not to use a CAS as the only penalty should be some missed + // increments of the count which delays identifying the card as "hot". + + if (*count < max_jubyte) count_ptr->_count++; + // We can defer the processing of card_ptr + *defer = true; + return card_ptr; + } + + // Different card - evict old card info + if (count_ptr->_evict_count < max_jubyte) count_ptr->_evict_count++; + if (count_ptr->_evict_count > G1CardCountCacheExpandThreshold) { + // Trigger a resize the next time we clear + _expand_card_counts = true; + } + + cas_res = (julong) Atomic::cmpxchg((jlong) new_epoch_entry, + (volatile jlong*)&epoch_ptr->_value, + (jlong) prev_epoch_entry); + + if (cas_res == prev_epoch_entry) { + // We successfully updated the card num value in the epoch entry + count_ptr->_count = 0; // initialize counter for new card num + + // Even though the region containg the card at old_card_num was not + // in the young list when old_card_num was recorded in the epoch + // cache it could have been added to the free list and subsequently + // added to the young list in the intervening time. If the evicted + // card is in a young region just return the card_ptr and the evicted + // card will not be cleaned. See CR 6817995. + + jbyte* old_card_ptr = card_num_2_ptr(old_card_num); + if (is_young_card(old_card_ptr)) { + *count = 0; + // We can defer the processing of card_ptr + *defer = true; + return card_ptr; + } + + // We do not want to defer processing of card_ptr in this case + // (we need to refine old_card_ptr and card_ptr) + *defer = false; + return old_card_ptr; + } + // Someone else beat us - try again. + } +} + +jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr, bool* defer) { + int count; + jbyte* cached_ptr = add_card_count(card_ptr, &count, defer); + assert(cached_ptr != NULL, "bad cached card ptr"); + assert(!is_young_card(cached_ptr), "shouldn't get a card in young region"); + + // The card pointer we obtained from card count cache is not hot + // so do not store it in the cache; return it for immediate + // refining. if (count < G1ConcRSHotCardLimit) { - _total_travs++; - return card_ptr; + return cached_ptr; } - // Otherwise, it's hot. + + // Otherwise, the pointer we got from the _card_counts is hot. jbyte* res = NULL; MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag); if (_n_hot == _hot_cache_size) { - _total_travs++; res = _hot_cache[_hot_cache_idx]; _n_hot--; } // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx. - _hot_cache[_hot_cache_idx] = card_ptr; + _hot_cache[_hot_cache_idx] = cached_ptr; _hot_cache_idx++; if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0; _n_hot++; + + if (res != NULL) { + // Even though the region containg res was not in the young list + // when it was recorded in the hot cache it could have been added + // to the free list and subsequently added to the young list in + // the intervening time. If res is in a young region, return NULL + // so that res is not cleaned. See CR 6817995. + + if (is_young_card(res)) { + res = NULL; + } + } + return res; } - void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) { assert(!use_cache(), "cache should be disabled"); - int start_ind = _hot_cache_idx-1; - for (int i = 0; i < _n_hot; i++) { - int ind = start_ind - i; - if (ind < 0) ind = ind + _hot_cache_size; - jbyte* entry = _hot_cache[ind]; - if (entry != NULL) { - g1rs->concurrentRefineOneCard(entry, worker_i); - } - } - _n_hot = 0; - _hot_cache_idx = 0; -} + int start_idx; + + while ((start_idx = _hot_cache_par_claimed_idx) < _n_hot) { // read once + int end_idx = start_idx + _hot_cache_par_chunk_size; -void ConcurrentG1Refine::clear_and_record_card_counts() { - if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return; - _n_periods++; - if (G1ConcRSCountTraversals) { - for (size_t i = 0; i < _n_card_counts; i++) { - unsigned char bucket = _card_counts[i]; - _cur_card_count_histo[bucket]++; - _card_counts[i] = 0; - } - gclog_or_tty->print_cr("Card counts:"); - for (int i = 0; i < 256; i++) { - if (_cur_card_count_histo[i] > 0) { - gclog_or_tty->print_cr(" %3d: %9d", i, _cur_card_count_histo[i]); - _cum_card_count_histo[i] += _cur_card_count_histo[i]; - _cur_card_count_histo[i] = 0; + if (start_idx == + Atomic::cmpxchg(end_idx, &_hot_cache_par_claimed_idx, start_idx)) { + // The current worker has successfully claimed the chunk [start_idx..end_idx) + end_idx = MIN2(end_idx, _n_hot); + for (int i = start_idx; i < end_idx; i++) { + jbyte* entry = _hot_cache[i]; + if (entry != NULL) { + g1rs->concurrentRefineOneCard(entry, worker_i); + } } } - } else { - assert(G1ConcRSLogCacheSize > 0, "Logic"); - Copy::fill_to_words((HeapWord*)(&_card_counts[0]), - _n_card_counts / HeapWordSize); } } -void -ConcurrentG1Refine:: -print_card_count_histo_range(unsigned* histo, int from, int to, - float& cum_card_pct, - float& cum_travs_pct) { - unsigned cards = 0; - unsigned travs = 0; - guarantee(to <= 256, "Precondition"); - for (int i = from; i < to-1; i++) { - cards += histo[i]; - travs += histo[i] * i; - } - if (to == 256) { - unsigned histo_card_sum = 0; - unsigned histo_trav_sum = 0; - for (int i = 1; i < 255; i++) { - histo_trav_sum += histo[i] * i; - } - cards += histo[255]; - // correct traversals for the last one. - unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum); - travs += travs_255; +void ConcurrentG1Refine::expand_card_count_cache() { + if (_n_card_counts < _max_n_card_counts) { + int new_idx = _cache_size_index+1; + int new_size = _cc_cache_sizes[new_idx]; + if (new_size < 0) new_size = _max_n_card_counts; + + // Make sure we don't go bigger than we will ever need + new_size = MIN2((unsigned) new_size, _max_n_card_counts); - } else { - cards += histo[to-1]; - travs += histo[to-1] * (to-1); - } - float fperiods = (float)_n_periods; - float f_tot_cards = (float)_total_cards/fperiods; - float f_tot_travs = (float)_total_travs/fperiods; - if (cards > 0) { - float fcards = (float)cards/fperiods; - float ftravs = (float)travs/fperiods; - if (to == 256) { - gclog_or_tty->print(" %4d- %10.2f%10.2f", from, fcards, ftravs); - } else { - gclog_or_tty->print(" %4d-%4d %10.2f%10.2f", from, to-1, fcards, ftravs); + // Expand the card count and card epoch tables + if (new_size > (int)_n_card_counts) { + // We can just free and allocate a new array as we're + // not interested in preserving the contents + assert(_card_counts != NULL, "Logic!"); + assert(_card_epochs != NULL, "Logic!"); + FREE_C_HEAP_ARRAY(CardCountCacheEntry, _card_counts); + FREE_C_HEAP_ARRAY(CardEpochCacheEntry, _card_epochs); + _n_card_counts = new_size; + _card_counts = NEW_C_HEAP_ARRAY(CardCountCacheEntry, _n_card_counts); + _card_epochs = NEW_C_HEAP_ARRAY(CardEpochCacheEntry, _n_card_counts); + _cache_size_index = new_idx; } - float pct_cards = fcards*100.0/f_tot_cards; - cum_card_pct += pct_cards; - float pct_travs = ftravs*100.0/f_tot_travs; - cum_travs_pct += pct_travs; - gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f", - pct_cards, cum_card_pct, - pct_travs, cum_travs_pct); } } -void ConcurrentG1Refine::print_final_card_counts() { - if (!G1ConcRSCountTraversals) return; +void ConcurrentG1Refine::clear_and_record_card_counts() { + if (G1ConcRSLogCacheSize == 0) return; - gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.", - _total_travs, _total_cards); - float fperiods = (float)_n_periods; - gclog_or_tty->print_cr(" This is an average of %8.2f traversals, %8.2f cards, " - "per collection.", (float)_total_travs/fperiods, - (float)_total_cards/fperiods); - gclog_or_tty->print_cr(" This is an average of %8.2f traversals/distinct " - "dirty card.\n", - _total_cards > 0 ? - (float)_total_travs/(float)_total_cards : 0.0); - +#ifndef PRODUCT + double start = os::elapsedTime(); +#endif - gclog_or_tty->print_cr("Histogram:\n\n%10s %10s%10s%10s%10s%10s%10s", - "range", "# cards", "# travs", "% cards", "(cum)", - "% travs", "(cum)"); - gclog_or_tty->print_cr("------------------------------------------------------------" - "-------------"); - float cum_cards_pct = 0.0; - float cum_travs_pct = 0.0; - for (int i = 1; i < 10; i++) { - print_card_count_histo_range(_cum_card_count_histo, i, i+1, - cum_cards_pct, cum_travs_pct); + if (_expand_card_counts) { + expand_card_count_cache(); + _expand_card_counts = false; + // Only need to clear the epochs. + Copy::fill_to_bytes(&_card_epochs[0], _n_card_counts * sizeof(CardEpochCacheEntry)); } - for (int i = 10; i < 100; i += 10) { - print_card_count_histo_range(_cum_card_count_histo, i, i+10, - cum_cards_pct, cum_travs_pct); - } - print_card_count_histo_range(_cum_card_count_histo, 100, 150, - cum_cards_pct, cum_travs_pct); - print_card_count_histo_range(_cum_card_count_histo, 150, 200, - cum_cards_pct, cum_travs_pct); - print_card_count_histo_range(_cum_card_count_histo, 150, 255, - cum_cards_pct, cum_travs_pct); - print_card_count_histo_range(_cum_card_count_histo, 255, 256, - cum_cards_pct, cum_travs_pct); + + int this_epoch = (int) _n_periods; + assert((this_epoch+1) <= max_jint, "to many periods"); + // Update epoch + _n_periods++; + +#ifndef PRODUCT + double elapsed = os::elapsedTime() - start; + _g1h->g1_policy()->record_cc_clear_time(elapsed * 1000.0); +#endif } diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -29,29 +29,117 @@ class ConcurrentG1Refine: public CHeapObj { ConcurrentG1RefineThread** _threads; int _n_threads; + // The cache for card refinement. - bool _use_cache; - bool _def_use_cache; - size_t _n_periods; - size_t _total_cards; - size_t _total_travs; + bool _use_cache; + bool _def_use_cache; + + size_t _n_periods; // Used as clearing epoch + + // An evicting cache of the number of times each card + // is accessed. Reduces, but does not eliminate, the amount + // of duplicated processing of dirty cards. + + enum SomePrivateConstants { + epoch_bits = 32, + card_num_shift = epoch_bits, + epoch_mask = AllBits, + card_num_mask = AllBits, + + // The initial cache size is approximately this fraction + // of a maximal cache (i.e. the size needed for all cards + // in the heap) + InitialCacheFraction = 512 + }; + + const static julong card_num_mask_in_place = + (julong) card_num_mask << card_num_shift; + + typedef struct { + julong _value; // | card_num | epoch | + } CardEpochCacheEntry; + + julong make_epoch_entry(unsigned int card_num, unsigned int epoch) { + assert(0 <= card_num && card_num < _max_n_card_counts, "Bounds"); + assert(0 <= epoch && epoch <= _n_periods, "must be"); + + return ((julong) card_num << card_num_shift) | epoch; + } + + unsigned int extract_epoch(julong v) { + return (v & epoch_mask); + } + + unsigned int extract_card_num(julong v) { + return (v & card_num_mask_in_place) >> card_num_shift; + } + + typedef struct { + unsigned char _count; + unsigned char _evict_count; + } CardCountCacheEntry; - unsigned char* _card_counts; + CardCountCacheEntry* _card_counts; + CardEpochCacheEntry* _card_epochs; + + // The current number of buckets in the card count cache unsigned _n_card_counts; + + // The max number of buckets required for the number of + // cards for the entire reserved heap + unsigned _max_n_card_counts; + + // Possible sizes of the cache: odd primes that roughly double in size. + // (See jvmtiTagMap.cpp). + static int _cc_cache_sizes[]; + + // The index in _cc_cache_sizes corresponding to the size of + // _card_counts. + int _cache_size_index; + + bool _expand_card_counts; + const jbyte* _ct_bot; - unsigned* _cur_card_count_histo; - unsigned* _cum_card_count_histo; - jbyte** _hot_cache; - int _hot_cache_size; - int _n_hot; - int _hot_cache_idx; + + jbyte** _hot_cache; + int _hot_cache_size; + int _n_hot; + int _hot_cache_idx; + + int _hot_cache_par_chunk_size; + volatile int _hot_cache_par_claimed_idx; + + // Needed to workaround 6817995 + CardTableModRefBS* _ct_bs; + G1CollectedHeap* _g1h; + + // Expands the array that holds the card counts to the next size up + void expand_card_count_cache(); + + // hash a given key (index of card_ptr) with the specified size + static unsigned int hash(size_t key, int size) { + return (unsigned int) key % size; + } + + // hash a given key (index of card_ptr) + unsigned int hash(size_t key) { + return hash(key, _n_card_counts); + } + + unsigned ptr_2_card_num(jbyte* card_ptr) { + return (unsigned) (card_ptr - _ct_bot); + } + + jbyte* card_num_2_ptr(unsigned card_num) { + return (jbyte*) (_ct_bot + card_num); + } // Returns the count of this card after incrementing it. - int add_card_count(jbyte* card_ptr); + jbyte* add_card_count(jbyte* card_ptr, int* count, bool* defer); - void print_card_count_histo_range(unsigned* histo, int from, int to, - float& cum_card_pct, - float& cum_travs_pct); + // Returns true if this card is in a young region + bool is_young_card(jbyte* card_ptr); + public: ConcurrentG1Refine(); ~ConcurrentG1Refine(); @@ -65,11 +153,16 @@ // If this is the first entry for the slot, writes into the cache and // returns NULL. If it causes an eviction, returns the evicted pointer. // Otherwise, its a cache hit, and returns NULL. - jbyte* cache_insert(jbyte* card_ptr); + jbyte* cache_insert(jbyte* card_ptr, bool* defer); // Process the cached entries. void clean_up_cache(int worker_i, G1RemSet* g1rs); + // Set up for parallel processing of the cards in the hot cache + void clear_hot_cache_claimed_index() { + _hot_cache_par_claimed_idx = 0; + } + // Discard entries in the hot cache. void clear_hot_cache() { _hot_cache_idx = 0; _n_hot = 0; @@ -84,7 +177,6 @@ } void clear_and_record_card_counts(); - void print_final_card_counts(); static size_t thread_num(); }; diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -104,17 +104,17 @@ double start_vtime_sec; // only used when G1SmoothConcRefine is on int prev_buffer_num; // only used when G1SmoothConcRefine is on // This thread activation threshold - int threshold = DCQBarrierProcessCompletedThreshold * _worker_id; + int threshold = G1UpdateBufferQueueProcessingThreshold * _worker_id; // Next thread activation threshold - int next_threshold = threshold + DCQBarrierProcessCompletedThreshold; - int deactivation_threshold = MAX2(threshold - DCQBarrierProcessCompletedThreshold / 2, 0); + int next_threshold = threshold + G1UpdateBufferQueueProcessingThreshold; + int deactivation_threshold = MAX2(threshold - G1UpdateBufferQueueProcessingThreshold / 2, 0); if (G1SmoothConcRefine) { lower_limit = 0; start_vtime_sec = os::elapsedVTime(); prev_buffer_num = (int) dcqs.completed_buffers_num(); } else { - lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. + lower_limit = G1UpdateBufferQueueProcessingThreshold / 4; // For now. } while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) { double end_vtime_sec; diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -80,8 +80,8 @@ int max_completed_queue, Mutex* lock, PtrQueueSet* fl_owner) { PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner); - set_buffer_size(DCQBarrierQueueBufferSize); - set_process_completed_threshold(DCQBarrierProcessCompletedThreshold); + set_buffer_size(G1UpdateBufferSize); + set_process_completed_threshold(G1UpdateBufferQueueProcessingThreshold); _shared_dirty_card_queue.set_lock(lock); _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon); diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -1591,7 +1591,7 @@ JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, - G1DirtyCardQueueMax, + G1UpdateBufferQueueMaxLength, Shared_DirtyCardQ_lock); if (G1DeferredRSUpdate) { @@ -1637,6 +1637,9 @@ void G1CollectedHeap::iterate_dirty_card_closure(bool concurrent, int worker_i) { + // Clean cards in the hot card cache + concurrent_g1_refine()->clean_up_cache(worker_i, g1_rem_set()); + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); int n_completed_buffers = 0; while (dcqs.apply_closure_to_completed_buffer(worker_i, 0, true)) { @@ -1645,9 +1648,6 @@ g1_policy()->record_update_rs_processed_buffers(worker_i, (double) n_completed_buffers); dcqs.clear_n_completed_buffers(); - // Finish up the queue... - if (worker_i == 0) concurrent_g1_refine()->clean_up_cache(worker_i, - g1_rem_set()); assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!"); } @@ -2414,8 +2414,6 @@ } void G1CollectedHeap::print_tracing_info() const { - concurrent_g1_refine()->print_final_card_counts(); - // We'll overload this to mean "trace GC pause statistics." if (TraceGen0Time || TraceGen1Time) { // The "G1CollectorPolicy" is keeping track of these stats, so delegate @@ -2845,6 +2843,11 @@ if (PrintHeapAtGC) { Universe::print_heap_after_gc(); } + if (G1SummarizeRSetStats && + (G1SummarizeRSetStatsPeriod > 0) && + (total_collections() % G1SummarizeRSetStatsPeriod == 0)) { + g1_rem_set()->print_summary_info(); + } } void G1CollectedHeap::set_gc_alloc_region(int purpose, HeapRegion* r) { @@ -4106,6 +4109,8 @@ g1_rem_set()->prepare_for_oops_into_collection_set_do(); concurrent_g1_refine()->set_use_cache(false); + concurrent_g1_refine()->clear_hot_cache_claimed_index(); + int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); set_par_threads(n_workers); G1ParTask g1_par_task(this, n_workers, _task_queues); @@ -4138,6 +4143,7 @@ } g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + concurrent_g1_refine()->clear_hot_cache(); concurrent_g1_refine()->set_use_cache(true); finalize_for_evac_failure(); diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -94,7 +94,14 @@ _summary(new Summary()), _abandoned_summary(new AbandonedSummary()), +#ifndef PRODUCT _cur_clear_ct_time_ms(0.0), + _min_clear_cc_time_ms(-1.0), + _max_clear_cc_time_ms(-1.0), + _cur_clear_cc_time_ms(0.0), + _cum_clear_cc_time_ms(0.0), + _num_cc_clears(0L), +#endif _region_num_young(0), _region_num_tenured(0), @@ -1648,6 +1655,15 @@ print_stats(1, "Object Copying", obj_copy_time); } } +#ifndef PRODUCT + print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms); + print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms); + print_stats(1, "Min Clear CC", _min_clear_cc_time_ms); + print_stats(1, "Max Clear CC", _max_clear_cc_time_ms); + if (_num_cc_clears > 0) { + print_stats(1, "Avg Clear CC", _cum_clear_cc_time_ms / ((double)_num_cc_clears)); + } +#endif print_stats(1, "Other", other_time_ms); for (int i = 0; i < _aux_num; ++i) { if (_cur_aux_times_set[i]) { diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -112,7 +112,6 @@ return 8*M; } - double _cur_collection_start_sec; size_t _cur_collection_pause_used_at_start_bytes; size_t _cur_collection_pause_used_regions_at_start; @@ -122,6 +121,15 @@ double _cur_clear_ct_time_ms; bool _satb_drain_time_set; +#ifndef PRODUCT + // Card Table Count Cache stats + double _min_clear_cc_time_ms; // min + double _max_clear_cc_time_ms; // max + double _cur_clear_cc_time_ms; // clearing time during current pause + double _cum_clear_cc_time_ms; // cummulative clearing time + jlong _num_cc_clears; // number of times the card count cache has been cleared +#endif + double _cur_CH_strong_roots_end_sec; double _cur_CH_strong_roots_dur_ms; double _cur_G1_strong_roots_end_sec; @@ -931,6 +939,18 @@ _cur_aux_times_ms[i] += ms; } +#ifndef PRODUCT + void record_cc_clear_time(double ms) { + if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms) + _min_clear_cc_time_ms = ms; + if (_max_clear_cc_time_ms < 0.0 || ms >= _max_clear_cc_time_ms) + _max_clear_cc_time_ms = ms; + _cur_clear_cc_time_ms = ms; + _cum_clear_cc_time_ms += ms; + _num_cc_clears++; + } +#endif + // Record the fact that "bytes" bytes allocated in a region. void record_before_bytes(size_t bytes); void record_after_bytes(size_t bytes); diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/g1RemSet.cpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -676,6 +676,55 @@ static IntHistogram out_of_histo(50, 50); +void HRInto_G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i) { + // Construct the region representing the card. + HeapWord* start = _ct_bs->addr_for(card_ptr); + // And find the region containing it. + HeapRegion* r = _g1->heap_region_containing(start); + assert(r != NULL, "unexpected null"); + + HeapWord* end = _ct_bs->addr_for(card_ptr + 1); + MemRegion dirtyRegion(start, end); + +#if CARD_REPEAT_HISTO + init_ct_freq_table(_g1->g1_reserved_obj_bytes()); + ct_freq_note_card(_ct_bs->index_for(start)); +#endif + + UpdateRSOopClosure update_rs_oop_cl(this, worker_i); + update_rs_oop_cl.set_from(r); + FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl); + + // Undirty the card. + *card_ptr = CardTableModRefBS::clean_card_val(); + // We must complete this write before we do any of the reads below. + OrderAccess::storeload(); + // And process it, being careful of unallocated portions of TLAB's. + HeapWord* stop_point = + r->oops_on_card_seq_iterate_careful(dirtyRegion, + &filter_then_update_rs_oop_cl); + // If stop_point is non-null, then we encountered an unallocated region + // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the + // card and re-enqueue: if we put off the card until a GC pause, then the + // unallocated portion will be filled in. Alternatively, we might try + // the full complexity of the technique used in "regular" precleaning. + if (stop_point != NULL) { + // The card might have gotten re-dirtied and re-enqueued while we + // worked. (In fact, it's pretty likely.) + if (*card_ptr != CardTableModRefBS::dirty_card_val()) { + *card_ptr = CardTableModRefBS::dirty_card_val(); + MutexLockerEx x(Shared_DirtyCardQ_lock, + Mutex::_no_safepoint_check_flag); + DirtyCardQueue* sdcq = + JavaThread::dirty_card_queue_set().shared_dirty_card_queue(); + sdcq->enqueue(card_ptr); + } + } else { + out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region()); + _conc_refine_cards++; + } +} + void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { // If the card is no longer dirty, nothing to do. if (*card_ptr != CardTableModRefBS::dirty_card_val()) return; @@ -716,61 +765,63 @@ return; } - // Should we defer it? - if (_cg1r->use_cache()) { - card_ptr = _cg1r->cache_insert(card_ptr); - // If it was not an eviction, nothing to do. - if (card_ptr == NULL) return; + // Should we defer processing the card? + // + // Previously the result from the insert_cache call would be + // either card_ptr (implying that card_ptr was currently "cold"), + // null (meaning we had inserted the card ptr into the "hot" + // cache, which had some headroom), or a "hot" card ptr + // extracted from the "hot" cache. + // + // Now that the _card_counts cache in the ConcurrentG1Refine + // instance is an evicting hash table, the result we get back + // could be from evicting the card ptr in an already occupied + // bucket (in which case we have replaced the card ptr in the + // bucket with card_ptr and "defer" is set to false). To avoid + // having a data structure (updates to which would need a lock) + // to hold these unprocessed dirty cards, we need to immediately + // process card_ptr. The actions needed to be taken on return + // from cache_insert are summarized in the following table: + // + // res defer action + // -------------------------------------------------------------- + // null false card evicted from _card_counts & replaced with + // card_ptr; evicted ptr added to hot cache. + // No need to process res; immediately process card_ptr + // + // null true card not evicted from _card_counts; card_ptr added + // to hot cache. + // Nothing to do. + // + // non-null false card evicted from _card_counts & replaced with + // card_ptr; evicted ptr is currently "cold" or + // caused an eviction from the hot cache. + // Immediately process res; process card_ptr. + // + // non-null true card not evicted from _card_counts; card_ptr is + // currently cold, or caused an eviction from hot + // cache. + // Immediately process res; no need to process card_ptr. - // OK, we have to reset the card start, region, etc. - start = _ct_bs->addr_for(card_ptr); - r = _g1->heap_region_containing(start); - if (r == NULL) { - guarantee(_g1->is_in_permanent(start), "Or else where?"); - return; // Not in the G1 heap (might be in perm, for example.) + jbyte* res = card_ptr; + bool defer = false; + if (_cg1r->use_cache()) { + jbyte* res = _cg1r->cache_insert(card_ptr, &defer); + if (res != NULL && (res != card_ptr || defer)) { + start = _ct_bs->addr_for(res); + r = _g1->heap_region_containing(start); + if (r == NULL) { + assert(_g1->is_in_permanent(start), "Or else where?"); + } else { + guarantee(!r->is_young(), "It was evicted in the current minor cycle."); + // Process card pointer we get back from the hot card cache + concurrentRefineOneCard_impl(res, worker_i); + } } - guarantee(!r->is_young(), "It was evicted in the current minor cycle."); } - HeapWord* end = _ct_bs->addr_for(card_ptr + 1); - MemRegion dirtyRegion(start, end); - -#if CARD_REPEAT_HISTO - init_ct_freq_table(_g1->g1_reserved_obj_bytes()); - ct_freq_note_card(_ct_bs->index_for(start)); -#endif - - UpdateRSOopClosure update_rs_oop_cl(this, worker_i); - update_rs_oop_cl.set_from(r); - FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r, &update_rs_oop_cl); - - // Undirty the card. - *card_ptr = CardTableModRefBS::clean_card_val(); - // We must complete this write before we do any of the reads below. - OrderAccess::storeload(); - // And process it, being careful of unallocated portions of TLAB's. - HeapWord* stop_point = - r->oops_on_card_seq_iterate_careful(dirtyRegion, - &filter_then_update_rs_oop_cl); - // If stop_point is non-null, then we encountered an unallocated region - // (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the - // card and re-enqueue: if we put off the card until a GC pause, then the - // unallocated portion will be filled in. Alternatively, we might try - // the full complexity of the technique used in "regular" precleaning. - if (stop_point != NULL) { - // The card might have gotten re-dirtied and re-enqueued while we - // worked. (In fact, it's pretty likely.) - if (*card_ptr != CardTableModRefBS::dirty_card_val()) { - *card_ptr = CardTableModRefBS::dirty_card_val(); - MutexLockerEx x(Shared_DirtyCardQ_lock, - Mutex::_no_safepoint_check_flag); - DirtyCardQueue* sdcq = - JavaThread::dirty_card_queue_set().shared_dirty_card_queue(); - sdcq->enqueue(card_ptr); - } - } else { - out_of_histo.add_entry(filter_then_update_rs_oop_cl.out_of_region()); - _conc_refine_cards++; + if (!defer) { + concurrentRefineOneCard_impl(card_ptr, worker_i); } } diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/g1RemSet.hpp --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -157,6 +157,10 @@ } } + // The routine that performs the actual work of refining a dirty + // card. + void concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i); + protected: template void write_ref_nv(HeapRegion* from, T* p); template void par_write_ref_nv(HeapRegion* from, T* p, int tid); diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/g1/g1_globals.hpp --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -34,7 +34,7 @@ product(intx, G1ConfidencePercent, 50, \ "Confidence level for MMU/pause predictions") \ \ - develop(intx, G1MarkingOverheadPercent, 0, \ + develop(intx, G1MarkingOverheadPercent, 0, \ "Overhead of concurrent marking") \ \ develop(bool, G1AccountConcurrentOverhead, false, \ @@ -47,7 +47,7 @@ develop(bool, G1Gen, true, \ "If true, it will enable the generational G1") \ \ - develop(intx, G1GCPercent, 10, \ + develop(intx, G1GCPercent, 10, \ "The desired percent time spent on GC") \ \ develop(intx, G1PolicyVerbose, 0, \ @@ -74,6 +74,12 @@ diagnostic(bool, G1SummarizeRSetStats, false, \ "Summarize remembered set processing info") \ \ + diagnostic(intx, G1SummarizeRSetStatsPeriod, 0, \ + "The period (in number of GCs) at which we will generate " \ + "update buffer processing info " \ + "(0 means do not periodically generate this info); " \ + "it also requires -XX:+G1SummarizeRSetStats") \ + \ diagnostic(bool, G1SummarizeZFStats, false, \ "Summarize zero-filling info") \ \ @@ -167,17 +173,20 @@ develop(bool, G1DisablePostBarrier, false, \ "Disable generation of post-barrier (i.e., RS barrier) ") \ \ - product(intx, G1DirtyCardQueueMax, 30, \ - "Maximum number of completed RS buffers before mutator threads " \ - "start processing them.") \ + product(intx, G1UpdateBufferSize, 256, \ + "Size of an update buffer") \ + \ + product(intx, G1UpdateBufferQueueProcessingThreshold, 5, \ + "Number of enqueued update buffers that will " \ + "trigger concurrent processing") \ + \ + product(intx, G1UpdateBufferQueueMaxLength, 30, \ + "Maximum number of enqueued update buffers before mutator " \ + "threads start processing new ones instead of enqueueing them") \ \ develop(intx, G1ConcRSLogCacheSize, 10, \ "Log base 2 of the length of conc RS hot-card cache.") \ \ - develop(bool, G1ConcRSCountTraversals, false, \ - "If true, gather data about the number of times CR traverses " \ - "cards ") \ - \ develop(intx, G1ConcRSHotCardLimit, 4, \ "The threshold that defines (>=) a hot card.") \ \ @@ -251,6 +260,10 @@ \ product(uintx, G1ParallelRSetThreads, 0, \ "If non-0 is the number of parallel rem set update threads, " \ - "otherwise the value is determined ergonomically.") + "otherwise the value is determined ergonomically.") \ + \ + develop(intx, G1CardCountCacheExpandThreshold, 16, \ + "Expand the card count cache if the number of collisions for " \ + "a particular entry exceeds this value.") G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG) diff -r f753dffae23e -r 16314a31b961 src/share/vm/gc_implementation/includeDB_gc_g1 --- a/src/share/vm/gc_implementation/includeDB_gc_g1 Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 Thu Aug 13 17:59:05 2009 -0700 @@ -45,11 +45,14 @@ concurrentG1Refine.cpp concurrentG1RefineThread.hpp concurrentG1Refine.cpp copy.hpp concurrentG1Refine.cpp g1CollectedHeap.inline.hpp +concurrentG1Refine.cpp g1CollectorPolicy.hpp concurrentG1Refine.cpp g1RemSet.hpp concurrentG1Refine.cpp space.inline.hpp +concurrentG1Refine.cpp heapRegionSeq.inline.hpp concurrentG1Refine.hpp globalDefinitions.hpp concurrentG1Refine.hpp allocation.hpp +concurrentG1Refine.hpp cardTableModRefBS.hpp concurrentG1Refine.hpp thread.hpp concurrentG1RefineThread.cpp concurrentG1Refine.hpp diff -r f753dffae23e -r 16314a31b961 src/share/vm/includeDB_core --- a/src/share/vm/includeDB_core Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/includeDB_core Thu Aug 13 17:59:05 2009 -0700 @@ -872,6 +872,7 @@ classFileParser.cpp symbolOop.hpp classFileParser.cpp symbolTable.hpp classFileParser.cpp systemDictionary.hpp +classFileParser.cpp threadService.hpp classFileParser.cpp timer.hpp classFileParser.cpp universe.inline.hpp classFileParser.cpp verificationType.hpp @@ -924,6 +925,7 @@ classLoader.cpp symbolOop.hpp classLoader.cpp systemDictionary.hpp classLoader.cpp threadCritical.hpp +classLoader.cpp threadService.hpp classLoader.cpp timer.hpp classLoader.cpp universe.inline.hpp classLoader.cpp vmSymbols.hpp @@ -4019,6 +4021,7 @@ systemDictionary.cpp resolutionErrors.hpp systemDictionary.cpp signature.hpp systemDictionary.cpp systemDictionary.hpp +systemDictionary.cpp threadService.hpp systemDictionary.cpp typeArrayKlass.hpp systemDictionary.cpp vmSymbols.hpp diff -r f753dffae23e -r 16314a31b961 src/share/vm/interpreter/abstractInterpreter.hpp --- a/src/share/vm/interpreter/abstractInterpreter.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/interpreter/abstractInterpreter.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -122,11 +122,15 @@ static int size_top_interpreter_activation(methodOop method); // Deoptimization support - static address continuation_for(methodOop method, - address bcp, - int callee_parameters, - bool is_top_frame, - bool& use_next_mdp); + // Compute the entry address for continuation after + static address deopt_continue_after_entry(methodOop method, + address bcp, + int callee_parameters, + bool is_top_frame); + // Compute the entry address for reexecution + static address deopt_reexecute_entry(methodOop method, address bcp); + // Deoptimization should reexecute this bytecode + static bool bytecode_should_reexecute(Bytecodes::Code code); // share implementation of size_activation and layout_activation: static int size_activation(methodOop method, diff -r f753dffae23e -r 16314a31b961 src/share/vm/interpreter/interpreter.cpp --- a/src/share/vm/interpreter/interpreter.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/interpreter/interpreter.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -284,76 +284,19 @@ //------------------------------------------------------------------------------------------------------------------------ // Deoptimization support -// If deoptimization happens, this method returns the point where to continue in -// interpreter. For calls (invokexxxx, newxxxx) the continuation is at next -// bci and the top of stack is in eax/edx/FPU tos. -// For putfield/getfield, put/getstatic, the continuation is at the same -// bci and the TOS is on stack. - -// Note: deopt_entry(type, 0) means reexecute bytecode -// deopt_entry(type, length) means continue at next bytecode - -address AbstractInterpreter::continuation_for(methodOop method, address bcp, int callee_parameters, bool is_top_frame, bool& use_next_mdp) { +// If deoptimization happens, this function returns the point of next bytecode to continue execution +address AbstractInterpreter::deopt_continue_after_entry(methodOop method, address bcp, int callee_parameters, bool is_top_frame) { assert(method->contains(bcp), "just checkin'"); Bytecodes::Code code = Bytecodes::java_code_at(bcp); + assert(!Interpreter::bytecode_should_reexecute(code), "should not reexecute"); int bci = method->bci_from(bcp); int length = -1; // initial value for debugging // compute continuation length length = Bytecodes::length_at(bcp); // compute result type BasicType type = T_ILLEGAL; - // when continuing after a compiler safepoint, re-execute the bytecode - // (an invoke is continued after the safepoint) - use_next_mdp = true; + switch (code) { - case Bytecodes::_lookupswitch: - case Bytecodes::_tableswitch: - case Bytecodes::_fast_binaryswitch: - case Bytecodes::_fast_linearswitch: - // recompute condtional expression folded into _if - case Bytecodes::_lcmp : - case Bytecodes::_fcmpl : - case Bytecodes::_fcmpg : - case Bytecodes::_dcmpl : - case Bytecodes::_dcmpg : - case Bytecodes::_ifnull : - case Bytecodes::_ifnonnull : - case Bytecodes::_goto : - case Bytecodes::_goto_w : - case Bytecodes::_ifeq : - case Bytecodes::_ifne : - case Bytecodes::_iflt : - case Bytecodes::_ifge : - case Bytecodes::_ifgt : - case Bytecodes::_ifle : - case Bytecodes::_if_icmpeq : - case Bytecodes::_if_icmpne : - case Bytecodes::_if_icmplt : - case Bytecodes::_if_icmpge : - case Bytecodes::_if_icmpgt : - case Bytecodes::_if_icmple : - case Bytecodes::_if_acmpeq : - case Bytecodes::_if_acmpne : - // special cases - case Bytecodes::_getfield : - case Bytecodes::_putfield : - case Bytecodes::_getstatic : - case Bytecodes::_putstatic : - case Bytecodes::_aastore : - // reexecute the operation and TOS value is on stack - assert(is_top_frame, "must be top frame"); - use_next_mdp = false; - return Interpreter::deopt_entry(vtos, 0); - break; - -#ifdef COMPILER1 - case Bytecodes::_athrow : - assert(is_top_frame, "must be top frame"); - use_next_mdp = false; - return Interpreter::rethrow_exception_entry(); - break; -#endif /* COMPILER1 */ - case Bytecodes::_invokevirtual : case Bytecodes::_invokespecial : case Bytecodes::_invokestatic : @@ -392,6 +335,70 @@ : Interpreter::return_entry(as_TosState(type), length); } +// If deoptimization happens, this function returns the point where the interpreter reexecutes +// the bytecode. +// Note: Bytecodes::_athrow is a special case in that it does not return +// Interpreter::deopt_entry(vtos, 0) like others +address AbstractInterpreter::deopt_reexecute_entry(methodOop method, address bcp) { + assert(method->contains(bcp), "just checkin'"); + Bytecodes::Code code = Bytecodes::java_code_at(bcp); +#ifdef COMPILER1 + if(code == Bytecodes::_athrow ) { + return Interpreter::rethrow_exception_entry(); + } +#endif /* COMPILER1 */ + return Interpreter::deopt_entry(vtos, 0); +} + +// If deoptimization happens, the interpreter should reexecute these bytecodes. +// This function mainly helps the compilers to set up the reexecute bit. +bool AbstractInterpreter::bytecode_should_reexecute(Bytecodes::Code code) { + switch (code) { + case Bytecodes::_lookupswitch: + case Bytecodes::_tableswitch: + case Bytecodes::_fast_binaryswitch: + case Bytecodes::_fast_linearswitch: + // recompute condtional expression folded into _if + case Bytecodes::_lcmp : + case Bytecodes::_fcmpl : + case Bytecodes::_fcmpg : + case Bytecodes::_dcmpl : + case Bytecodes::_dcmpg : + case Bytecodes::_ifnull : + case Bytecodes::_ifnonnull : + case Bytecodes::_goto : + case Bytecodes::_goto_w : + case Bytecodes::_ifeq : + case Bytecodes::_ifne : + case Bytecodes::_iflt : + case Bytecodes::_ifge : + case Bytecodes::_ifgt : + case Bytecodes::_ifle : + case Bytecodes::_if_icmpeq : + case Bytecodes::_if_icmpne : + case Bytecodes::_if_icmplt : + case Bytecodes::_if_icmpge : + case Bytecodes::_if_icmpgt : + case Bytecodes::_if_icmple : + case Bytecodes::_if_acmpeq : + case Bytecodes::_if_acmpne : + // special cases + case Bytecodes::_getfield : + case Bytecodes::_putfield : + case Bytecodes::_getstatic : + case Bytecodes::_putstatic : + case Bytecodes::_aastore : +#ifdef COMPILER1 + //special case of reexecution + case Bytecodes::_athrow : +#endif + return true; + + default: + return false; + } +} + void AbstractInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { // Quick & dirty stack overflow checking: bang the stack & handle trap. // Note that we do the banging after the frame is setup, since the exception diff -r f753dffae23e -r 16314a31b961 src/share/vm/interpreter/templateInterpreter.cpp --- a/src/share/vm/interpreter/templateInterpreter.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/interpreter/templateInterpreter.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -605,28 +605,41 @@ } } -// If deoptimization happens, this method returns the point where to continue in -// interpreter. For calls (invokexxxx, newxxxx) the continuation is at next -// bci and the top of stack is in eax/edx/FPU tos. -// For putfield/getfield, put/getstatic, the continuation is at the same -// bci and the TOS is on stack. +//------------------------------------------------------------------------------------------------------------------------ +// Deoptimization support -// Note: deopt_entry(type, 0) means reexecute bytecode -// deopt_entry(type, length) means continue at next bytecode +// If deoptimization happens, this function returns the point of next bytecode to continue execution +address TemplateInterpreter::deopt_continue_after_entry(methodOop method, address bcp, int callee_parameters, bool is_top_frame) { + return AbstractInterpreter::deopt_continue_after_entry(method, bcp, callee_parameters, is_top_frame); +} -address TemplateInterpreter::continuation_for(methodOop method, address bcp, int callee_parameters, bool is_top_frame, bool& use_next_mdp) { +// If deoptimization happens, this function returns the point where the interpreter reexecutes +// the bytecode. +// Note: Bytecodes::_athrow (C1 only) and Bytecodes::_return are the special cases +// that do not return "Interpreter::deopt_entry(vtos, 0)" +address TemplateInterpreter::deopt_reexecute_entry(methodOop method, address bcp) { assert(method->contains(bcp), "just checkin'"); Bytecodes::Code code = Bytecodes::java_code_at(bcp); if (code == Bytecodes::_return) { - // This is used for deopt during registration of finalizers - // during Object.. We simply need to resume execution at - // the standard return vtos bytecode to pop the frame normally. - // reexecuting the real bytecode would cause double registration - // of the finalizable object. - assert(is_top_frame, "must be on top"); - return _normal_table.entry(Bytecodes::_return).entry(vtos); + // This is used for deopt during registration of finalizers + // during Object.. We simply need to resume execution at + // the standard return vtos bytecode to pop the frame normally. + // reexecuting the real bytecode would cause double registration + // of the finalizable object. + return _normal_table.entry(Bytecodes::_return).entry(vtos); } else { - return AbstractInterpreter::continuation_for(method, bcp, callee_parameters, is_top_frame, use_next_mdp); + return AbstractInterpreter::deopt_reexecute_entry(method, bcp); + } +} + +// If deoptimization happens, the interpreter should reexecute this bytecode. +// This function mainly helps the compilers to set up the reexecute bit. +bool TemplateInterpreter::bytecode_should_reexecute(Bytecodes::Code code) { + if (code == Bytecodes::_return) { + //Yes, we consider Bytecodes::_return as a special case of reexecution + return true; + } else { + return AbstractInterpreter::bytecode_should_reexecute(code); } } diff -r f753dffae23e -r 16314a31b961 src/share/vm/interpreter/templateInterpreter.hpp --- a/src/share/vm/interpreter/templateInterpreter.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/interpreter/templateInterpreter.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -171,11 +171,15 @@ static void ignore_safepoints(); // ignores safepoints // Deoptimization support - static address continuation_for(methodOop method, - address bcp, - int callee_parameters, - bool is_top_frame, - bool& use_next_mdp); + // Compute the entry address for continuation after + static address deopt_continue_after_entry(methodOop method, + address bcp, + int callee_parameters, + bool is_top_frame); + // Deoptimization should reexecute this bytecode + static bool bytecode_should_reexecute(Bytecodes::Code code); + // Compute the address for reexecution + static address deopt_reexecute_entry(methodOop method, address bcp); #include "incls/_templateInterpreter_pd.hpp.incl" diff -r f753dffae23e -r 16314a31b961 src/share/vm/memory/cardTableModRefBS.cpp --- a/src/share/vm/memory/cardTableModRefBS.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/memory/cardTableModRefBS.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -253,8 +253,16 @@ } #endif // The guard page is always committed and should not be committed over. - HeapWord* const new_end_for_commit = MIN2(new_end_aligned, - _guard_region.start()); + // "guarded" is used for assertion checking below and recalls the fact + // that the would-be end of the new committed region would have + // penetrated the guard page. + HeapWord* new_end_for_commit = new_end_aligned; + + DEBUG_ONLY(bool guarded = false;) + if (new_end_for_commit > _guard_region.start()) { + new_end_for_commit = _guard_region.start(); + DEBUG_ONLY(guarded = true;) + } if (new_end_for_commit > cur_committed.end()) { // Must commit new pages. @@ -302,7 +310,7 @@ // not the aligned up expanded region. // jbyte* const end = byte_after(new_region.last()); jbyte* const end = (jbyte*) new_end_for_commit; - assert((end >= byte_after(new_region.last())) || collided, + assert((end >= byte_after(new_region.last())) || collided || guarded, "Expect to be beyond new region unless impacting another region"); // do nothing if we resized downward. #ifdef ASSERT diff -r f753dffae23e -r 16314a31b961 src/share/vm/memory/genCollectedHeap.cpp --- a/src/share/vm/memory/genCollectedHeap.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/memory/genCollectedHeap.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -482,6 +482,10 @@ for (int i = starting_level; i <= max_level; i++) { if (_gens[i]->should_collect(full, size, is_tlab)) { if (i == n_gens() - 1) { // a major collection is to happen + if (!complete) { + // The full_collections increment was missed above. + increment_total_full_collections(); + } pre_full_gc_dump(); // do any pre full gc dumps } // Timer for individual generations. Last argument is false: no CR diff -r f753dffae23e -r 16314a31b961 src/share/vm/memory/serialize.cpp --- a/src/share/vm/memory/serialize.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/memory/serialize.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -51,7 +51,7 @@ soc->do_tag(arrayOopDesc::base_offset_in_bytes(T_BYTE)); soc->do_tag(sizeof(constantPoolOopDesc)); soc->do_tag(sizeof(constantPoolCacheOopDesc)); - soc->do_tag(objArrayOopDesc::base_offset_in_bytes(T_BYTE)); + soc->do_tag(objArrayOopDesc::base_offset_in_bytes()); soc->do_tag(typeArrayOopDesc::base_offset_in_bytes(T_BYTE)); soc->do_tag(sizeof(symbolOopDesc)); soc->do_tag(sizeof(klassOopDesc)); diff -r f753dffae23e -r 16314a31b961 src/share/vm/oops/arrayKlass.cpp --- a/src/share/vm/oops/arrayKlass.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/oops/arrayKlass.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -140,6 +140,7 @@ THROW_0(vmSymbols::java_lang_NegativeArraySizeException()); } if (length > arrayOopDesc::max_array_length(T_ARRAY)) { + report_java_out_of_memory("Requested array size exceeds VM limit"); THROW_OOP_0(Universe::out_of_memory_error_array_size()); } int size = objArrayOopDesc::object_size(length); diff -r f753dffae23e -r 16314a31b961 src/share/vm/oops/instanceKlass.cpp --- a/src/share/vm/oops/instanceKlass.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/oops/instanceKlass.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -158,9 +158,6 @@ // timer handles recursion assert(THREAD->is_Java_thread(), "non-JavaThread in link_class_impl"); JavaThread* jt = (JavaThread*)THREAD; - PerfTraceTimedEvent vmtimer(ClassLoader::perf_class_link_time(), - ClassLoader::perf_classes_linked(), - jt->get_thread_stat()->class_link_recursion_count_addr()); // link super class before linking this class instanceKlassHandle super(THREAD, this_oop->super()); @@ -194,6 +191,15 @@ return true; } + // trace only the link time for this klass that includes + // the verification time + PerfClassTraceTime vmtimer(ClassLoader::perf_class_link_time(), + ClassLoader::perf_class_link_selftime(), + ClassLoader::perf_classes_linked(), + jt->get_thread_stat()->perf_recursion_counts_addr(), + jt->get_thread_stat()->perf_timers_addr(), + PerfClassTraceTime::CLASS_LINK); + // verification & rewriting { ObjectLocker ol(this_oop, THREAD); @@ -203,12 +209,14 @@ if (!this_oop->is_linked()) { if (!this_oop->is_rewritten()) { { - assert(THREAD->is_Java_thread(), "non-JavaThread in link_class_impl"); - JavaThread* jt = (JavaThread*)THREAD; // Timer includes any side effects of class verification (resolution, // etc), but not recursive entry into verify_code(). - PerfTraceTime timer(ClassLoader::perf_class_verify_time(), - jt->get_thread_stat()->class_verify_recursion_count_addr()); + PerfClassTraceTime timer(ClassLoader::perf_class_verify_time(), + ClassLoader::perf_class_verify_selftime(), + ClassLoader::perf_classes_verified(), + jt->get_thread_stat()->perf_recursion_counts_addr(), + jt->get_thread_stat()->perf_timers_addr(), + PerfClassTraceTime::CLASS_VERIFY); bool verify_ok = verify_code(this_oop, throw_verifyerror, THREAD); if (!verify_ok) { return false; @@ -350,9 +358,12 @@ JavaThread* jt = (JavaThread*)THREAD; // Timer includes any side effects of class initialization (resolution, // etc), but not recursive entry into call_class_initializer(). - PerfTraceTimedEvent timer(ClassLoader::perf_class_init_time(), - ClassLoader::perf_classes_inited(), - jt->get_thread_stat()->class_init_recursion_count_addr()); + PerfClassTraceTime timer(ClassLoader::perf_class_init_time(), + ClassLoader::perf_class_init_selftime(), + ClassLoader::perf_classes_inited(), + jt->get_thread_stat()->perf_recursion_counts_addr(), + jt->get_thread_stat()->perf_timers_addr(), + PerfClassTraceTime::CLASS_CLINIT); this_oop->call_class_initializer(THREAD); } @@ -497,6 +508,7 @@ objArrayOop instanceKlass::allocate_objArray(int n, int length, TRAPS) { if (length < 0) THROW_0(vmSymbols::java_lang_NegativeArraySizeException()); if (length > arrayOopDesc::max_array_length(T_OBJECT)) { + report_java_out_of_memory("Requested array size exceeds VM limit"); THROW_OOP_0(Universe::out_of_memory_error_array_size()); } int size = objArrayOopDesc::object_size(length); diff -r f753dffae23e -r 16314a31b961 src/share/vm/oops/objArrayKlass.cpp --- a/src/share/vm/oops/objArrayKlass.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/oops/objArrayKlass.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -39,6 +39,7 @@ assert(a->is_parsable(), "Can't publish unless parsable"); return a; } else { + report_java_out_of_memory("Requested array size exceeds VM limit"); THROW_OOP_0(Universe::out_of_memory_error_array_size()); } } else { diff -r f753dffae23e -r 16314a31b961 src/share/vm/oops/objArrayOop.hpp --- a/src/share/vm/oops/objArrayOop.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/oops/objArrayOop.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -38,6 +38,11 @@ } public: + // Returns the offset of the first element. + static int base_offset_in_bytes() { + return arrayOopDesc::base_offset_in_bytes(T_OBJECT); + } + // base is the address following the header. HeapWord* base() const { return (HeapWord*) arrayOopDesc::base(T_OBJECT); } diff -r f753dffae23e -r 16314a31b961 src/share/vm/oops/typeArrayKlass.cpp --- a/src/share/vm/oops/typeArrayKlass.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/oops/typeArrayKlass.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -80,6 +80,7 @@ assert(t->is_parsable(), "Don't publish unless parsable"); return t; } else { + report_java_out_of_memory("Requested array size exceeds VM limit"); THROW_OOP_0(Universe::out_of_memory_error_array_size()); } } else { diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/block.cpp --- a/src/share/vm/opto/block.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/block.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -910,7 +910,16 @@ !(b->head()->is_Loop() && n->is_Phi()) && // See (+++) comment in reg_split.cpp !(n->jvms() != NULL && n->jvms()->is_monitor_use(k)) ) { - assert( b->find_node(def) < j, "uses must follow definitions" ); + bool is_loop = false; + if (n->is_Phi()) { + for( uint l = 1; l < def->req(); l++ ) { + if (n == def->in(l)) { + is_loop = true; + break; // Some kind of loop + } + } + } + assert( is_loop || b->find_node(def) < j, "uses must follow definitions" ); } if( def->is_SafePointScalarObject() ) { assert(_bbs[def->_idx] == b, "SafePointScalarObject Node should be at the same block as its SafePoint node"); diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/bytecodeInfo.cpp --- a/src/share/vm/opto/bytecodeInfo.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/bytecodeInfo.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -37,6 +37,7 @@ // Keep a private copy of the caller_jvms: _caller_jvms = new (C) JVMState(caller_jvms->method(), caller_tree->caller_jvms()); _caller_jvms->set_bci(caller_jvms->bci()); + assert(!caller_jvms->should_reexecute(), "there should be no reexecute bytecode with inlining"); } assert(_caller_jvms->same_calls_as(caller_jvms), "consistent JVMS"); assert((caller_tree == NULL ? 0 : caller_tree->inline_depth() + 1) == inline_depth(), "correct (redundant) depth parameter"); diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/callnode.cpp --- a/src/share/vm/opto/callnode.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/callnode.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -223,6 +223,7 @@ JVMState::JVMState(ciMethod* method, JVMState* caller) { assert(method != NULL, "must be valid call site"); _method = method; + _reexecute = Reexecute_Undefined; debug_only(_bci = -99); // random garbage value debug_only(_map = (SafePointNode*)-1); _caller = caller; @@ -237,6 +238,7 @@ JVMState::JVMState(int stack_size) { _method = NULL; _bci = InvocationEntryBci; + _reexecute = Reexecute_Undefined; debug_only(_map = (SafePointNode*)-1); _caller = NULL; _depth = 1; @@ -269,6 +271,7 @@ if (p->_method != q->_method) return false; if (p->_method == NULL) return true; // bci is irrelevant if (p->_bci != q->_bci) return false; + if (p->_reexecute != q->_reexecute) return false; p = p->caller(); q = q->caller(); if (p == q) return true; @@ -490,6 +493,7 @@ if (!printed) _method->print_short_name(st); st->print(" @ bci:%d",_bci); + st->print(" reexecute:%s", _reexecute==Reexecute_True?"true":"false"); } else { st->print(" runtime stub"); } @@ -509,8 +513,8 @@ } _map->dump(2); } - st->print("JVMS depth=%d loc=%d stk=%d mon=%d scalar=%d end=%d mondepth=%d sp=%d bci=%d method=", - depth(), locoff(), stkoff(), monoff(), scloff(), endoff(), monitor_depth(), sp(), bci()); + st->print("JVMS depth=%d loc=%d stk=%d mon=%d scalar=%d end=%d mondepth=%d sp=%d bci=%d reexecute=%s method=", + depth(), locoff(), stkoff(), monoff(), scloff(), endoff(), monitor_depth(), sp(), bci(), should_reexecute()?"true":"false"); if (_method == NULL) { st->print_cr("(none)"); } else { @@ -537,6 +541,7 @@ JVMState* JVMState::clone_shallow(Compile* C) const { JVMState* n = has_method() ? new (C) JVMState(_method, _caller) : new (C) JVMState(0); n->set_bci(_bci); + n->_reexecute = _reexecute; n->set_locoff(_locoff); n->set_stkoff(_stkoff); n->set_monoff(_monoff); diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/callnode.hpp --- a/src/share/vm/opto/callnode.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/callnode.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -178,6 +178,13 @@ // This provides a way to map the optimized program back into the interpreter, // or to let the GC mark the stack. class JVMState : public ResourceObj { +public: + typedef enum { + Reexecute_Undefined = -1, // not defined -- will be translated into false later + Reexecute_False = 0, // false -- do not reexecute + Reexecute_True = 1 // true -- reexecute the bytecode + } ReexecuteState; //Reexecute State + private: JVMState* _caller; // List pointer for forming scope chains uint _depth; // One mroe than caller depth, or one. @@ -188,10 +195,12 @@ uint _endoff; // Offset to end of input edge mapping uint _sp; // Jave Expression Stack Pointer for this state int _bci; // Byte Code Index of this JVM point + ReexecuteState _reexecute; // Whether this bytecode need to be re-executed ciMethod* _method; // Method Pointer SafePointNode* _map; // Map node associated with this scope public: friend class Compile; + friend class PreserveReexecuteState; // Because JVMState objects live over the entire lifetime of the // Compile object, they are allocated into the comp_arena, which @@ -222,16 +231,18 @@ bool is_mon(uint i) const { return i >= _monoff && i < _scloff; } bool is_scl(uint i) const { return i >= _scloff && i < _endoff; } - uint sp() const { return _sp; } - int bci() const { return _bci; } - bool has_method() const { return _method != NULL; } - ciMethod* method() const { assert(has_method(), ""); return _method; } - JVMState* caller() const { return _caller; } - SafePointNode* map() const { return _map; } - uint depth() const { return _depth; } - uint debug_start() const; // returns locoff of root caller - uint debug_end() const; // returns endoff of self - uint debug_size() const { + uint sp() const { return _sp; } + int bci() const { return _bci; } + bool should_reexecute() const { return _reexecute==Reexecute_True; } + bool is_reexecute_undefined() const { return _reexecute==Reexecute_Undefined; } + bool has_method() const { return _method != NULL; } + ciMethod* method() const { assert(has_method(), ""); return _method; } + JVMState* caller() const { return _caller; } + SafePointNode* map() const { return _map; } + uint depth() const { return _depth; } + uint debug_start() const; // returns locoff of root caller + uint debug_end() const; // returns endoff of self + uint debug_size() const { return loc_size() + sp() + mon_size() + scl_size(); } uint debug_depth() const; // returns sum of debug_size values at all depths @@ -267,7 +278,9 @@ } void set_map(SafePointNode *map) { _map = map; } void set_sp(uint sp) { _sp = sp; } - void set_bci(int bci) { _bci = bci; } + // _reexecute is initialized to "undefined" for a new bci + void set_bci(int bci) {if(_bci != bci)_reexecute=Reexecute_Undefined; _bci = bci; } + void set_should_reexecute(bool reexec) {_reexecute = reexec ? Reexecute_True : Reexecute_False;} // Miscellaneous utility functions JVMState* clone_deep(Compile* C) const; // recursively clones caller chain diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/cfgnode.cpp --- a/src/share/vm/opto/cfgnode.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/cfgnode.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -1792,15 +1792,12 @@ if (UseCompressedOops && can_reshape && progress == NULL) { bool may_push = true; bool has_decodeN = false; - Node* in_decodeN = NULL; for (uint i=1; iis_DecodeN() && ii->bottom_type() == bottom_type()) { - // Note: in_decodeN is used only to define the type of new phi. - // Find a non dead path otherwise phi type will be wrong. + // Do optimization if a non dead path exist. if (ii->in(1)->bottom_type() != Type::TOP) { has_decodeN = true; - in_decodeN = ii->in(1); } } else if (!ii->is_Phi()) { may_push = false; @@ -1809,7 +1806,9 @@ if (has_decodeN && may_push) { PhaseIterGVN *igvn = phase->is_IterGVN(); - PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN); + // Make narrow type for new phi. + const Type* narrow_t = TypeNarrowOop::make(this->bottom_type()->is_ptr()); + PhiNode* new_phi = new (phase->C, r->req()) PhiNode(r, narrow_t); uint orig_cnt = req(); for (uint i=1; ias_Phi() == this) { new_ii = new_phi; } else { - new_ii = new (phase->C, 2) EncodePNode(ii, in_decodeN->bottom_type()); + new_ii = new (phase->C, 2) EncodePNode(ii, narrow_t); igvn->register_new_node_with_optimizer(new_ii); } } diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/graphKit.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -620,6 +620,16 @@ assert(kit->stopped(), "cutout code must stop, throw, return, etc."); } +//---------------------------PreserveReexecuteState---------------------------- +PreserveReexecuteState::PreserveReexecuteState(GraphKit* kit) { + _kit = kit; + _sp = kit->sp(); + _reexecute = kit->jvms()->_reexecute; +} +PreserveReexecuteState::~PreserveReexecuteState() { + _kit->jvms()->_reexecute = _reexecute; + _kit->set_sp(_sp); +} //------------------------------clone_map-------------------------------------- // Implementation of PreserveJVMState @@ -738,6 +748,18 @@ #endif //ASSERT +// Helper function for enforcing certain bytecodes to reexecute if +// deoptimization happens +static bool should_reexecute_implied_by_bytecode(JVMState *jvms) { + ciMethod* cur_method = jvms->method(); + int cur_bci = jvms->bci(); + if (cur_method != NULL && cur_bci != InvocationEntryBci) { + Bytecodes::Code code = cur_method->java_code_at_bci(cur_bci); + return Interpreter::bytecode_should_reexecute(code); + } else + return false; +} + // Helper function for adding JVMState and debug information to node void GraphKit::add_safepoint_edges(SafePointNode* call, bool must_throw) { // Add the safepoint edges to the call (or other safepoint). @@ -781,6 +803,13 @@ JVMState* out_jvms = youngest_jvms->clone_deep(C); call->set_jvms(out_jvms); // Start jvms list for call node + // For a known set of bytecodes, the interpreter should reexecute them if + // deoptimization happens. We set the reexecute state for them here + if (out_jvms->is_reexecute_undefined() && //don't change if already specified + should_reexecute_implied_by_bytecode(out_jvms)) { + out_jvms->set_should_reexecute(true); //NOTE: youngest_jvms not changed + } + // Presize the call: debug_only(uint non_debug_edges = call->req()); call->add_req_batch(top(), youngest_jvms->debug_depth()); diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/graphKit.hpp --- a/src/share/vm/opto/graphKit.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/graphKit.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -763,3 +763,16 @@ BuildCutout(GraphKit* kit, Node* p, float prob, float cnt = COUNT_UNKNOWN); ~BuildCutout(); }; + +// Helper class to preserve the original _reexecute bit and _sp and restore +// them back +class PreserveReexecuteState: public StackObj { + protected: + GraphKit* _kit; + uint _sp; + JVMState::ReexecuteState _reexecute; + + public: + PreserveReexecuteState(GraphKit* kit); + ~PreserveReexecuteState(); +}; diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/library_call.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -2064,7 +2064,7 @@ // See if it is a narrow oop array. if (adr_type->isa_aryptr()) { - if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes(type)) { + if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes()) { const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr(); if (elem_type != NULL) { sharpened_klass = elem_type->klass(); @@ -3169,78 +3169,85 @@ Node* end = is_copyOfRange? argument(2): argument(1); Node* array_type_mirror = is_copyOfRange? argument(3): argument(2); - _sp += nargs; // set original stack for use by uncommon_trap - array_type_mirror = do_null_check(array_type_mirror, T_OBJECT); - original = do_null_check(original, T_OBJECT); - _sp -= nargs; - - // Check if a null path was taken unconditionally. - if (stopped()) return true; - - Node* orig_length = load_array_length(original); - - Node* klass_node = load_klass_from_mirror(array_type_mirror, false, nargs, - NULL, 0); - _sp += nargs; // set original stack for use by uncommon_trap - klass_node = do_null_check(klass_node, T_OBJECT); - _sp -= nargs; - - RegionNode* bailout = new (C, 1) RegionNode(1); - record_for_igvn(bailout); - - // Despite the generic type of Arrays.copyOf, the mirror might be int, int[], etc. - // Bail out if that is so. - Node* not_objArray = generate_non_objArray_guard(klass_node, bailout); - if (not_objArray != NULL) { - // Improve the klass node's type from the new optimistic assumption: - ciKlass* ak = ciArrayKlass::make(env()->Object_klass()); - const Type* akls = TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/); - Node* cast = new (C, 2) CastPPNode(klass_node, akls); - cast->init_req(0, control()); - klass_node = _gvn.transform(cast); - } - - // Bail out if either start or end is negative. - generate_negative_guard(start, bailout, &start); - generate_negative_guard(end, bailout, &end); - - Node* length = end; - if (_gvn.type(start) != TypeInt::ZERO) { - length = _gvn.transform( new (C, 3) SubINode(end, start) ); - } - - // Bail out if length is negative. - // ...Not needed, since the new_array will throw the right exception. - //generate_negative_guard(length, bailout, &length); - - if (bailout->req() > 1) { - PreserveJVMState pjvms(this); - set_control( _gvn.transform(bailout) ); - _sp += nargs; // push the arguments back on the stack - uncommon_trap(Deoptimization::Reason_intrinsic, - Deoptimization::Action_maybe_recompile); - } - - if (!stopped()) { - // How many elements will we copy from the original? - // The answer is MinI(orig_length - start, length). - Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) ); - Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length); - - const bool raw_mem_only = true; - Node* newcopy = new_array(klass_node, length, nargs, raw_mem_only); - - // Generate a direct call to the right arraycopy function(s). - // We know the copy is disjoint but we might not know if the - // oop stores need checking. - // Extreme case: Arrays.copyOf((Integer[])x, 10, String[].class). - // This will fail a store-check if x contains any non-nulls. - bool disjoint_bases = true; - bool length_never_negative = true; - generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT, - original, start, newcopy, intcon(0), moved, - disjoint_bases, length_never_negative); - + Node* newcopy; + + //set the original stack and the reexecute bit for the interpreter to reexecute + //the bytecode that invokes Arrays.copyOf if deoptimization happens + { PreserveReexecuteState preexecs(this); + _sp += nargs; + jvms()->set_should_reexecute(true); + + array_type_mirror = do_null_check(array_type_mirror, T_OBJECT); + original = do_null_check(original, T_OBJECT); + + // Check if a null path was taken unconditionally. + if (stopped()) return true; + + Node* orig_length = load_array_length(original); + + Node* klass_node = load_klass_from_mirror(array_type_mirror, false, 0, + NULL, 0); + klass_node = do_null_check(klass_node, T_OBJECT); + + RegionNode* bailout = new (C, 1) RegionNode(1); + record_for_igvn(bailout); + + // Despite the generic type of Arrays.copyOf, the mirror might be int, int[], etc. + // Bail out if that is so. + Node* not_objArray = generate_non_objArray_guard(klass_node, bailout); + if (not_objArray != NULL) { + // Improve the klass node's type from the new optimistic assumption: + ciKlass* ak = ciArrayKlass::make(env()->Object_klass()); + const Type* akls = TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/); + Node* cast = new (C, 2) CastPPNode(klass_node, akls); + cast->init_req(0, control()); + klass_node = _gvn.transform(cast); + } + + // Bail out if either start or end is negative. + generate_negative_guard(start, bailout, &start); + generate_negative_guard(end, bailout, &end); + + Node* length = end; + if (_gvn.type(start) != TypeInt::ZERO) { + length = _gvn.transform( new (C, 3) SubINode(end, start) ); + } + + // Bail out if length is negative. + // ...Not needed, since the new_array will throw the right exception. + //generate_negative_guard(length, bailout, &length); + + if (bailout->req() > 1) { + PreserveJVMState pjvms(this); + set_control( _gvn.transform(bailout) ); + uncommon_trap(Deoptimization::Reason_intrinsic, + Deoptimization::Action_maybe_recompile); + } + + if (!stopped()) { + + // How many elements will we copy from the original? + // The answer is MinI(orig_length - start, length). + Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) ); + Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length); + + const bool raw_mem_only = true; + newcopy = new_array(klass_node, length, 0, raw_mem_only); + + // Generate a direct call to the right arraycopy function(s). + // We know the copy is disjoint but we might not know if the + // oop stores need checking. + // Extreme case: Arrays.copyOf((Integer[])x, 10, String[].class). + // This will fail a store-check if x contains any non-nulls. + bool disjoint_bases = true; + bool length_never_negative = true; + generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT, + original, start, newcopy, intcon(0), moved, + disjoint_bases, length_never_negative); + } + } //original reexecute and sp are set back here + + if(!stopped()) { push(newcopy); } @@ -3992,146 +3999,159 @@ // bool LibraryCallKit::inline_native_clone(bool is_virtual) { int nargs = 1; - Node* obj = null_check_receiver(callee()); - if (stopped()) return true; - Node* obj_klass = load_object_klass(obj); - const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr(); - const TypeOopPtr* toop = ((tklass != NULL) + PhiNode* result_val; + + //set the original stack and the reexecute bit for the interpreter to reexecute + //the bytecode that invokes Object.clone if deoptimization happens + { PreserveReexecuteState preexecs(this); + jvms()->set_should_reexecute(true); + + //null_check_receiver will adjust _sp (push and pop) + Node* obj = null_check_receiver(callee()); + if (stopped()) return true; + + _sp += nargs; + + Node* obj_klass = load_object_klass(obj); + const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr(); + const TypeOopPtr* toop = ((tklass != NULL) ? tklass->as_instance_type() : TypeInstPtr::NOTNULL); - // Conservatively insert a memory barrier on all memory slices. - // Do not let writes into the original float below the clone. - insert_mem_bar(Op_MemBarCPUOrder); - - // paths into result_reg: - enum { - _slow_path = 1, // out-of-line call to clone method (virtual or not) - _objArray_path, // plain array allocation, plus arrayof_oop_arraycopy - _array_path, // plain array allocation, plus arrayof_long_arraycopy - _instance_path, // plain instance allocation, plus arrayof_long_arraycopy - PATH_LIMIT - }; - RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT); - PhiNode* result_val = new(C, PATH_LIMIT) PhiNode(result_reg, - TypeInstPtr::NOTNULL); - PhiNode* result_i_o = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO); - PhiNode* result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY, - TypePtr::BOTTOM); - record_for_igvn(result_reg); - - const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; - int raw_adr_idx = Compile::AliasIdxRaw; - const bool raw_mem_only = true; - - Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL); - if (array_ctl != NULL) { - // It's an array. - PreserveJVMState pjvms(this); - set_control(array_ctl); - Node* obj_length = load_array_length(obj); - Node* obj_size = NULL; - Node* alloc_obj = new_array(obj_klass, obj_length, nargs, - raw_mem_only, &obj_size); - - if (!use_ReduceInitialCardMarks()) { - // If it is an oop array, it requires very special treatment, - // because card marking is required on each card of the array. - Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL); - if (is_obja != NULL) { - PreserveJVMState pjvms2(this); - set_control(is_obja); - // Generate a direct call to the right arraycopy function(s). - bool disjoint_bases = true; - bool length_never_negative = true; - generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT, - obj, intcon(0), alloc_obj, intcon(0), - obj_length, - disjoint_bases, length_never_negative); - result_reg->init_req(_objArray_path, control()); - result_val->init_req(_objArray_path, alloc_obj); - result_i_o ->set_req(_objArray_path, i_o()); - result_mem ->set_req(_objArray_path, reset_memory()); + // Conservatively insert a memory barrier on all memory slices. + // Do not let writes into the original float below the clone. + insert_mem_bar(Op_MemBarCPUOrder); + + // paths into result_reg: + enum { + _slow_path = 1, // out-of-line call to clone method (virtual or not) + _objArray_path, // plain array allocation, plus arrayof_oop_arraycopy + _array_path, // plain array allocation, plus arrayof_long_arraycopy + _instance_path, // plain instance allocation, plus arrayof_long_arraycopy + PATH_LIMIT + }; + RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT); + result_val = new(C, PATH_LIMIT) PhiNode(result_reg, + TypeInstPtr::NOTNULL); + PhiNode* result_i_o = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO); + PhiNode* result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY, + TypePtr::BOTTOM); + record_for_igvn(result_reg); + + const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; + int raw_adr_idx = Compile::AliasIdxRaw; + const bool raw_mem_only = true; + + + Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL); + if (array_ctl != NULL) { + // It's an array. + PreserveJVMState pjvms(this); + set_control(array_ctl); + Node* obj_length = load_array_length(obj); + Node* obj_size = NULL; + Node* alloc_obj = new_array(obj_klass, obj_length, 0, + raw_mem_only, &obj_size); + + if (!use_ReduceInitialCardMarks()) { + // If it is an oop array, it requires very special treatment, + // because card marking is required on each card of the array. + Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL); + if (is_obja != NULL) { + PreserveJVMState pjvms2(this); + set_control(is_obja); + // Generate a direct call to the right arraycopy function(s). + bool disjoint_bases = true; + bool length_never_negative = true; + generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT, + obj, intcon(0), alloc_obj, intcon(0), + obj_length, + disjoint_bases, length_never_negative); + result_reg->init_req(_objArray_path, control()); + result_val->init_req(_objArray_path, alloc_obj); + result_i_o ->set_req(_objArray_path, i_o()); + result_mem ->set_req(_objArray_path, reset_memory()); + } + } + // We can dispense with card marks if we know the allocation + // comes out of eden (TLAB)... In fact, ReduceInitialCardMarks + // causes the non-eden paths to simulate a fresh allocation, + // insofar that no further card marks are required to initialize + // the object. + + // Otherwise, there are no card marks to worry about. + + if (!stopped()) { + copy_to_clone(obj, alloc_obj, obj_size, true, false); + + // Present the results of the copy. + result_reg->init_req(_array_path, control()); + result_val->init_req(_array_path, alloc_obj); + result_i_o ->set_req(_array_path, i_o()); + result_mem ->set_req(_array_path, reset_memory()); } } - // We can dispense with card marks if we know the allocation - // comes out of eden (TLAB)... In fact, ReduceInitialCardMarks - // causes the non-eden paths to simulate a fresh allocation, - // insofar that no further card marks are required to initialize - // the object. - - // Otherwise, there are no card marks to worry about. + + // We only go to the instance fast case code if we pass a number of guards. + // The paths which do not pass are accumulated in the slow_region. + RegionNode* slow_region = new (C, 1) RegionNode(1); + record_for_igvn(slow_region); + if (!stopped()) { + // It's an instance (we did array above). Make the slow-path tests. + // If this is a virtual call, we generate a funny guard. We grab + // the vtable entry corresponding to clone() from the target object. + // If the target method which we are calling happens to be the + // Object clone() method, we pass the guard. We do not need this + // guard for non-virtual calls; the caller is known to be the native + // Object clone(). + if (is_virtual) { + generate_virtual_guard(obj_klass, slow_region); + } + + // The object must be cloneable and must not have a finalizer. + // Both of these conditions may be checked in a single test. + // We could optimize the cloneable test further, but we don't care. + generate_access_flags_guard(obj_klass, + // Test both conditions: + JVM_ACC_IS_CLONEABLE | JVM_ACC_HAS_FINALIZER, + // Must be cloneable but not finalizer: + JVM_ACC_IS_CLONEABLE, + slow_region); + } if (!stopped()) { - copy_to_clone(obj, alloc_obj, obj_size, true, false); - - // Present the results of the copy. - result_reg->init_req(_array_path, control()); - result_val->init_req(_array_path, alloc_obj); - result_i_o ->set_req(_array_path, i_o()); - result_mem ->set_req(_array_path, reset_memory()); - } - } - - // We only go to the instance fast case code if we pass a number of guards. - // The paths which do not pass are accumulated in the slow_region. - RegionNode* slow_region = new (C, 1) RegionNode(1); - record_for_igvn(slow_region); - if (!stopped()) { - // It's an instance (we did array above). Make the slow-path tests. - // If this is a virtual call, we generate a funny guard. We grab - // the vtable entry corresponding to clone() from the target object. - // If the target method which we are calling happens to be the - // Object clone() method, we pass the guard. We do not need this - // guard for non-virtual calls; the caller is known to be the native - // Object clone(). - if (is_virtual) { - generate_virtual_guard(obj_klass, slow_region); + // It's an instance, and it passed the slow-path tests. + PreserveJVMState pjvms(this); + Node* obj_size = NULL; + Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size); + + copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks()); + + // Present the results of the slow call. + result_reg->init_req(_instance_path, control()); + result_val->init_req(_instance_path, alloc_obj); + result_i_o ->set_req(_instance_path, i_o()); + result_mem ->set_req(_instance_path, reset_memory()); } - // The object must be cloneable and must not have a finalizer. - // Both of these conditions may be checked in a single test. - // We could optimize the cloneable test further, but we don't care. - generate_access_flags_guard(obj_klass, - // Test both conditions: - JVM_ACC_IS_CLONEABLE | JVM_ACC_HAS_FINALIZER, - // Must be cloneable but not finalizer: - JVM_ACC_IS_CLONEABLE, - slow_region); - } - - if (!stopped()) { - // It's an instance, and it passed the slow-path tests. - PreserveJVMState pjvms(this); - Node* obj_size = NULL; - Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size); - - copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks()); - - // Present the results of the slow call. - result_reg->init_req(_instance_path, control()); - result_val->init_req(_instance_path, alloc_obj); - result_i_o ->set_req(_instance_path, i_o()); - result_mem ->set_req(_instance_path, reset_memory()); - } - - // Generate code for the slow case. We make a call to clone(). - set_control(_gvn.transform(slow_region)); - if (!stopped()) { - PreserveJVMState pjvms(this); - CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_clone, is_virtual); - Node* slow_result = set_results_for_java_call(slow_call); - // this->control() comes from set_results_for_java_call - result_reg->init_req(_slow_path, control()); - result_val->init_req(_slow_path, slow_result); - result_i_o ->set_req(_slow_path, i_o()); - result_mem ->set_req(_slow_path, reset_memory()); - } - - // Return the combined state. - set_control( _gvn.transform(result_reg) ); - set_i_o( _gvn.transform(result_i_o) ); - set_all_memory( _gvn.transform(result_mem) ); + // Generate code for the slow case. We make a call to clone(). + set_control(_gvn.transform(slow_region)); + if (!stopped()) { + PreserveJVMState pjvms(this); + CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_clone, is_virtual); + Node* slow_result = set_results_for_java_call(slow_call); + // this->control() comes from set_results_for_java_call + result_reg->init_req(_slow_path, control()); + result_val->init_req(_slow_path, slow_result); + result_i_o ->set_req(_slow_path, i_o()); + result_mem ->set_req(_slow_path, reset_memory()); + } + + // Return the combined state. + set_control( _gvn.transform(result_reg) ); + set_i_o( _gvn.transform(result_i_o) ); + set_all_memory( _gvn.transform(result_mem) ); + } //original reexecute and sp are set back here push(_gvn.transform(result_val)); diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/mulnode.cpp --- a/src/share/vm/opto/mulnode.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/mulnode.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -608,16 +608,14 @@ } // Are we masking a long that was converted from an int with a mask - // that fits in 32-bits? Commute them and use an AndINode. - if (op == Op_ConvI2L && (mask & CONST64(0xFFFFFFFF00000000)) == 0) { - // If we are doing an UI2L conversion (i.e. the mask is - // 0x00000000FFFFFFFF) we cannot convert the AndL to an AndI - // because the AndI would be optimized away later in Identity. - if (mask != CONST64(0x00000000FFFFFFFF)) { - Node* andi = new (phase->C, 3) AndINode(in1->in(1), phase->intcon(mask)); - andi = phase->transform(andi); - return new (phase->C, 2) ConvI2LNode(andi); - } + // that fits in 32-bits? Commute them and use an AndINode. Don't + // convert masks which would cause a sign extension of the integer + // value. This check includes UI2L masks (0x00000000FFFFFFFF) which + // would be optimized away later in Identity. + if (op == Op_ConvI2L && (mask & CONST64(0xFFFFFFFF80000000)) == 0) { + Node* andi = new (phase->C, 3) AndINode(in1->in(1), phase->intcon(mask)); + andi = phase->transform(andi); + return new (phase->C, 2) ConvI2LNode(andi); } // Masking off sign bits? Dont make them! diff -r f753dffae23e -r 16314a31b961 src/share/vm/opto/output.cpp --- a/src/share/vm/opto/output.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/opto/output.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -911,8 +911,9 @@ ciMethod* scope_method = method ? method : _method; // Describe the scope here assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI"); + assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest"); // Now we can describe the scope. - debug_info()->describe_scope(safepoint_pc_offset,scope_method,jvms->bci(),locvals,expvals,monvals); + debug_info()->describe_scope(safepoint_pc_offset,scope_method,jvms->bci(),jvms->should_reexecute(),locvals,expvals,monvals); } // End jvms loop // Mark the end of the scope set. @@ -994,7 +995,8 @@ for (int depth = 1; depth <= max_depth; depth++) { JVMState* jvms = youngest_jvms->of_depth(depth); ciMethod* method = jvms->has_method() ? jvms->method() : NULL; - debug_info->describe_scope(pc_offset, method, jvms->bci()); + assert(!jvms->should_reexecute() || depth==max_depth, "reexecute allowed only for the youngest"); + debug_info->describe_scope(pc_offset, method, jvms->bci(), jvms->should_reexecute()); } // Mark the end of the scope set. diff -r f753dffae23e -r 16314a31b961 src/share/vm/prims/jvm.cpp --- a/src/share/vm/prims/jvm.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/prims/jvm.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -638,11 +638,54 @@ if (PrintJVMWarnings) warning("JVM_ResolveClass not implemented"); JVM_END -// Common implementation for JVM_FindClassFromBootLoader and -// JVM_FindClassFromLoader -static jclass jvm_find_class_from_class_loader(JNIEnv* env, const char* name, - jboolean init, jobject loader, - jboolean throwError, TRAPS) { + +// Returns a class loaded by the bootstrap class loader; or null +// if not found. ClassNotFoundException is not thrown. +// +// Rationale behind JVM_FindClassFromBootLoader +// a> JVM_FindClassFromClassLoader was never exported in the export tables. +// b> because of (a) java.dll has a direct dependecy on the unexported +// private symbol "_JVM_FindClassFromClassLoader@20". +// c> the launcher cannot use the private symbol as it dynamically opens +// the entry point, so if something changes, the launcher will fail +// unexpectedly at runtime, it is safest for the launcher to dlopen a +// stable exported interface. +// d> re-exporting JVM_FindClassFromClassLoader as public, will cause its +// signature to change from _JVM_FindClassFromClassLoader@20 to +// JVM_FindClassFromClassLoader and will not be backward compatible +// with older JDKs. +// Thus a public/stable exported entry point is the right solution, +// public here means public in linker semantics, and is exported only +// to the JDK, and is not intended to be a public API. + +JVM_ENTRY(jclass, JVM_FindClassFromBootLoader(JNIEnv* env, + const char* name)) + JVMWrapper2("JVM_FindClassFromBootLoader %s", name); + + // Java libraries should ensure that name is never null... + if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) { + // It's impossible to create this class; the name cannot fit + // into the constant pool. + return NULL; + } + + symbolHandle h_name = oopFactory::new_symbol_handle(name, CHECK_NULL); + klassOop k = SystemDictionary::resolve_or_null(h_name, CHECK_NULL); + if (k == NULL) { + return NULL; + } + + if (TraceClassResolution) { + trace_class_resolution(k); + } + return (jclass) JNIHandles::make_local(env, Klass::cast(k)->java_mirror()); +JVM_END + +JVM_ENTRY(jclass, JVM_FindClassFromClassLoader(JNIEnv* env, const char* name, + jboolean init, jobject loader, + jboolean throwError)) + JVMWrapper3("JVM_FindClassFromClassLoader %s throw %s", name, + throwError ? "error" : "exception"); // Java libraries should ensure that name is never null... if (name == NULL || (int)strlen(name) > symbolOopDesc::max_length()) { // It's impossible to create this class; the name cannot fit @@ -662,40 +705,6 @@ trace_class_resolution(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(result))); } return result; -} - -// Rationale behind JVM_FindClassFromBootLoader -// a> JVM_FindClassFromClassLoader was never exported in the export tables. -// b> because of (a) java.dll has a direct dependecy on the unexported -// private symbol "_JVM_FindClassFromClassLoader@20". -// c> the launcher cannot use the private symbol as it dynamically opens -// the entry point, so if something changes, the launcher will fail -// unexpectedly at runtime, it is safest for the launcher to dlopen a -// stable exported interface. -// d> re-exporting JVM_FindClassFromClassLoader as public, will cause its -// signature to change from _JVM_FindClassFromClassLoader@20 to -// JVM_FindClassFromClassLoader and will not be backward compatible -// with older JDKs. -// Thus a public/stable exported entry point is the right solution, -// public here means public in linker semantics, and is exported only -// to the JDK, and is not intended to be a public API. - -JVM_ENTRY(jclass, JVM_FindClassFromBootLoader(JNIEnv* env, - const char* name, - jboolean throwError)) - JVMWrapper3("JVM_FindClassFromBootLoader %s throw %s", name, - throwError ? "error" : "exception"); - return jvm_find_class_from_class_loader(env, name, JNI_FALSE, - (jobject)NULL, throwError, THREAD); -JVM_END - -JVM_ENTRY(jclass, JVM_FindClassFromClassLoader(JNIEnv* env, const char* name, - jboolean init, jobject loader, - jboolean throwError)) - JVMWrapper3("JVM_FindClassFromClassLoader %s throw %s", name, - throwError ? "error" : "exception"); - return jvm_find_class_from_class_loader(env, name, init, loader, - throwError, THREAD); JVM_END @@ -756,6 +765,20 @@ static jclass jvm_define_class_common(JNIEnv *env, const char *name, jobject loader, const jbyte *buf, jsize len, jobject pd, const char *source, TRAPS) { if (source == NULL) source = "__JVM_DefineClass__"; + assert(THREAD->is_Java_thread(), "must be a JavaThread"); + JavaThread* jt = (JavaThread*) THREAD; + + PerfClassTraceTime vmtimer(ClassLoader::perf_define_appclass_time(), + ClassLoader::perf_define_appclass_selftime(), + ClassLoader::perf_define_appclasses(), + jt->get_thread_stat()->perf_recursion_counts_addr(), + jt->get_thread_stat()->perf_timers_addr(), + PerfClassTraceTime::DEFINE_CLASS); + + if (UsePerfData) { + ClassLoader::perf_app_classfile_bytes_read()->inc(len); + } + // Since exceptions can be thrown, class initialization can take place // if name is NULL no check for class name in .class stream has to be made. symbolHandle class_name; @@ -3905,6 +3928,7 @@ // The Java level wrapper will perform the necessary security check allowing // us to pass the NULL as the initiating class loader. klassOop klass = SystemDictionary::resolve_or_fail(name, loader, protection_domain, throwError != 0, CHECK_NULL); + KlassHandle klass_handle(THREAD, klass); // Check if we should initialize the class if (init && klass_handle->oop_is_instance()) { diff -r f753dffae23e -r 16314a31b961 src/share/vm/prims/jvm.h --- a/src/share/vm/prims/jvm.h Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/prims/jvm.h Thu Aug 13 17:59:05 2009 -0700 @@ -390,15 +390,10 @@ jobject loader, jboolean throwError); /* - * Find a class from a boot class loader. Throw ClassNotFoundException - * or NoClassDefFoundError depending on the value of the last - * argument. This is the same as FindClassFromClassLoader but provided - * as a convenience method exported correctly on all platforms for - * JSR 277 launcher class loading. + * Find a class from a boot class loader. Returns NULL if class not found. */ JNIEXPORT jclass JNICALL -JVM_FindClassFromBootLoader(JNIEnv *env, const char *name, - jboolean throwError); +JVM_FindClassFromBootLoader(JNIEnv *env, const char *name); /* * Find a class from a given class. diff -r f753dffae23e -r 16314a31b961 src/share/vm/prims/jvmtiExport.cpp --- a/src/share/vm/prims/jvmtiExport.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/prims/jvmtiExport.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -1,5 +1,5 @@ /* - * Copyright 2003-2007 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2003-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2427,6 +2427,11 @@ return; } + if (ForceFullGCJVMTIEpilogues) { + // force 'Full GC' was done semantics for JVMTI GC epilogues + _full = true; + } + // GarbageCollectionStart event posted from VM thread - okay because // JVMTI is clear that the "world is stopped" and callback shouldn't // try to call into the VM. diff -r f753dffae23e -r 16314a31b961 src/share/vm/runtime/arguments.cpp --- a/src/share/vm/runtime/arguments.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/runtime/arguments.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -1054,7 +1054,15 @@ // Unless explicitly requested otherwise, size young gen // for "short" pauses ~ 4M*ParallelGCThreads - if (FLAG_IS_DEFAULT(MaxNewSize)) { // MaxNewSize not set at command-line + + // If either MaxNewSize or NewRatio is set on the command line, + // assume the user is trying to set the size of the young gen. + + if (FLAG_IS_DEFAULT(MaxNewSize) && FLAG_IS_DEFAULT(NewRatio)) { + + // Set MaxNewSize to our calculated preferred_max_new_size unless + // NewSize was set on the command line and it is larger than + // preferred_max_new_size. if (!FLAG_IS_DEFAULT(NewSize)) { // NewSize explicitly set at command-line FLAG_SET_ERGO(uintx, MaxNewSize, MAX2(NewSize, preferred_max_new_size)); } else { @@ -1063,15 +1071,32 @@ if(PrintGCDetails && Verbose) { // Too early to use gclog_or_tty tty->print_cr("Ergo set MaxNewSize: " SIZE_FORMAT, MaxNewSize); - } - } - // Unless explicitly requested otherwise, prefer a large - // Old to Young gen size so as to shift the collection load - // to the old generation concurrent collector - if (FLAG_IS_DEFAULT(NewRatio)) { + } + + // Unless explicitly requested otherwise, prefer a large + // Old to Young gen size so as to shift the collection load + // to the old generation concurrent collector + + // If this is only guarded by FLAG_IS_DEFAULT(NewRatio) + // then NewSize and OldSize may be calculated. That would + // generally lead to some differences with ParNewGC for which + // there was no obvious reason. Also limit to the case where + // MaxNewSize has not been set. + FLAG_SET_ERGO(intx, NewRatio, MAX2(NewRatio, new_ratio)); - size_t min_new = align_size_up(ScaleForWordSize(min_new_default), os::vm_page_size()); + // Code along this path potentially sets NewSize and OldSize + + // Calculate the desired minimum size of the young gen but if + // NewSize has been set on the command line, use it here since + // it should be the final value. + size_t min_new; + if (FLAG_IS_DEFAULT(NewSize)) { + min_new = align_size_up(ScaleForWordSize(min_new_default), + os::vm_page_size()); + } else { + min_new = NewSize; + } size_t prev_initial_size = initial_heap_size(); if (prev_initial_size != 0 && prev_initial_size < min_new+OldSize) { set_initial_heap_size(min_new+OldSize); @@ -1083,9 +1108,11 @@ initial_heap_size()/M, prev_initial_size/M); } } + // MaxHeapSize is aligned down in collectorPolicy - size_t max_heap = align_size_down(MaxHeapSize, - CardTableRS::ct_max_alignment_constraint()); + size_t max_heap = + align_size_down(MaxHeapSize, + CardTableRS::ct_max_alignment_constraint()); if(PrintGCDetails && Verbose) { // Too early to use gclog_or_tty @@ -1150,8 +1177,9 @@ // CMSParPromoteBlocksToClaim is a collector-specific flag, so // we'll let it to take precedence. jio_fprintf(defaultStream::error_stream(), - "Both OldPLABSize and CMSParPromoteBlocksToClaim options are specified " - "for the CMS collector. CMSParPromoteBlocksToClaim will take precedence.\n"); + "Both OldPLABSize and CMSParPromoteBlocksToClaim" + " options are specified for the CMS collector." + " CMSParPromoteBlocksToClaim will take precedence.\n"); } } } diff -r f753dffae23e -r 16314a31b961 src/share/vm/runtime/atomic.hpp --- a/src/share/vm/runtime/atomic.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/runtime/atomic.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -39,6 +39,8 @@ static void store_ptr(intptr_t store_value, volatile intptr_t* dest); static void store_ptr(void* store_value, volatile void* dest); + static jlong load(volatile jlong* src); + // Atomically add to a location, return updated value static jint add (jint add_value, volatile jint* dest); static intptr_t add_ptr(intptr_t add_value, volatile intptr_t* dest); diff -r f753dffae23e -r 16314a31b961 src/share/vm/runtime/globals.hpp --- a/src/share/vm/runtime/globals.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/runtime/globals.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -1082,6 +1082,9 @@ product(ccstr, TraceJVMTI, NULL, \ "Trace flags for JVMTI functions and events") \ \ + product(bool, ForceFullGCJVMTIEpilogues, false, \ + "Force 'Full GC' was done semantics for JVMTI GC epilogues") \ + \ /* This option can change an EMCP method into an obsolete method. */ \ /* This can affect tests that except specific methods to be EMCP. */ \ /* This option should be used with caution. */ \ @@ -2924,12 +2927,6 @@ "how many entries we'll try to leave on the stack during " \ "parallel GC") \ \ - product(intx, DCQBarrierQueueBufferSize, 256, \ - "Number of elements in a dirty card queue buffer") \ - \ - product(intx, DCQBarrierProcessCompletedThreshold, 5, \ - "Number of completed dirty card buffers to trigger processing.") \ - \ /* stack parameters */ \ product_pd(intx, StackYellowPages, \ "Number of yellow zone (recoverable overflows) pages") \ @@ -3037,6 +3034,9 @@ "Wait for this many CI accesses to occur in all compiles before " \ "beginning to throw OutOfMemoryErrors in each compile") \ \ + notproduct(bool, CIObjectFactoryVerify, false, \ + "enable potentially expensive verification in ciObjectFactory") \ + \ /* Priorities */ \ product_pd(bool, UseThreadPriorities, "Use native thread priorities") \ \ @@ -3287,7 +3287,7 @@ product(uintx, SharedReadWriteSize, 12*M, \ "Size of read-write space in permanent generation (in bytes)") \ \ - product(uintx, SharedReadOnlySize, 8*M, \ + product(uintx, SharedReadOnlySize, 10*M, \ "Size of read-only space in permanent generation (in bytes)") \ \ product(uintx, SharedMiscDataSize, 4*M, \ @@ -3312,7 +3312,7 @@ product(bool, AnonymousClasses, false, \ "support sun.misc.Unsafe.defineAnonymousClass") \ \ - product(bool, EnableMethodHandles, false, \ + experimental(bool, EnableMethodHandles, false, \ "support method handles (true by default under JSR 292)") \ \ diagnostic(intx, MethodHandlePushLimit, 3, \ @@ -3327,7 +3327,7 @@ diagnostic(bool, OptimizeMethodHandles, true, \ "when constructing method handles, try to improve them") \ \ - product(bool, EnableInvokeDynamic, false, \ + experimental(bool, EnableInvokeDynamic, false, \ "recognize the invokedynamic instruction") \ \ develop(bool, TraceInvokeDynamic, false, \ diff -r f753dffae23e -r 16314a31b961 src/share/vm/runtime/perfData.hpp --- a/src/share/vm/runtime/perfData.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/runtime/perfData.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -868,6 +868,10 @@ {counter = PerfDataManager::create_counter(counter_ns, counter_name, \ PerfData::U_Events,CHECK);} +#define NEWPERFBYTECOUNTER(counter, counter_ns, counter_name) \ + {counter = PerfDataManager::create_counter(counter_ns, counter_name, \ + PerfData::U_Bytes,CHECK);} + // Utility Classes /* diff -r f753dffae23e -r 16314a31b961 src/share/vm/runtime/vframe.hpp --- a/src/share/vm/runtime/vframe.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/runtime/vframe.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -402,7 +402,12 @@ DebugInfoReadStream buffer(nm(), decode_offset); _sender_decode_offset = buffer.read_int(); _method = methodOop(buffer.read_oop()); - _bci = buffer.read_bci(); + // Deoptimization needs reexecute bit to determine whether to reexecute the bytecode + // only at the time when it "unpack_frames", and the reexecute bit info could always + // be obtained from the scopeDesc in the compiledVFrame. As a result, we don't keep + // the reexecute bit here. + bool dummy_reexecute; + _bci = buffer.read_bci_and_reexecute(dummy_reexecute); assert(_method->is_method(), "checking type of decoded method"); } diff -r f753dffae23e -r 16314a31b961 src/share/vm/runtime/vframeArray.cpp --- a/src/share/vm/runtime/vframeArray.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/runtime/vframeArray.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -44,6 +44,7 @@ _method = vf->method(); _bci = vf->raw_bci(); + _reexecute = vf->should_reexecute(); int index; @@ -148,16 +149,20 @@ // C++ interpreter doesn't need a pc since it will figure out what to do when it // begins execution address pc; - bool use_next_mdp; // true if we should use the mdp associated with the next bci - // rather than the one associated with bcp + bool use_next_mdp = false; // true if we should use the mdp associated with the next bci + // rather than the one associated with bcp if (raw_bci() == SynchronizationEntryBCI) { // We are deoptimizing while hanging in prologue code for synchronized method bcp = method()->bcp_from(0); // first byte code pc = Interpreter::deopt_entry(vtos, 0); // step = 0 since we don't skip current bytecode - use_next_mdp = false; + } else if (should_reexecute()) { //reexecute this bytecode + assert(is_top_frame, "reexecute allowed only for the top frame"); + bcp = method()->bcp_from(bci()); + pc = Interpreter::deopt_reexecute_entry(method(), bcp); } else { bcp = method()->bcp_from(bci()); - pc = Interpreter::continuation_for(method(), bcp, callee_parameters, is_top_frame, use_next_mdp); + pc = Interpreter::deopt_continue_after_entry(method(), bcp, callee_parameters, is_top_frame); + use_next_mdp = true; } assert(Bytecodes::is_defined(*bcp), "must be a valid bytecode"); diff -r f753dffae23e -r 16314a31b961 src/share/vm/runtime/vframeArray.hpp --- a/src/share/vm/runtime/vframeArray.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/runtime/vframeArray.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -41,7 +41,8 @@ private: frame _frame; // the interpreter frame we will unpack into - int _bci; // raw bci for this vframe + int _bci; // raw bci for this vframe + bool _reexecute; // whether sould we reexecute this bytecode methodOop _method; // the method for this vframe MonitorChunk* _monitors; // active monitors for this vframe StackValueCollection* _locals; @@ -54,6 +55,7 @@ int bci(void) const; int raw_bci(void) const { return _bci; } + bool should_reexecute(void) const { return _reexecute; } methodOop method(void) const { return _method; } diff -r f753dffae23e -r 16314a31b961 src/share/vm/runtime/vframe_hp.cpp --- a/src/share/vm/runtime/vframe_hp.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/runtime/vframe_hp.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -276,6 +276,15 @@ return scope()->bci(); } +bool compiledVFrame::should_reexecute() const { + if (scope() == NULL) { + // native nmethods have no scope the method/bci is implied + nmethod* nm = code(); + assert(nm->is_native_method(), "must be native"); + return false; + } + return scope()->should_reexecute(); +} vframe* compiledVFrame::sender() const { const frame f = fr(); diff -r f753dffae23e -r 16314a31b961 src/share/vm/runtime/vframe_hp.hpp --- a/src/share/vm/runtime/vframe_hp.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/runtime/vframe_hp.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -25,11 +25,12 @@ class compiledVFrame: public javaVFrame { public: // JVM state - methodOop method() const; - int bci() const; - StackValueCollection* locals() const; - StackValueCollection* expressions() const; - GrowableArray* monitors() const; + methodOop method() const; + int bci() const; + bool should_reexecute() const; + StackValueCollection* locals() const; + StackValueCollection* expressions() const; + GrowableArray* monitors() const; void set_locals(StackValueCollection* values) const; diff -r f753dffae23e -r 16314a31b961 src/share/vm/services/threadService.cpp --- a/src/share/vm/services/threadService.cpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/services/threadService.cpp Thu Aug 13 17:59:05 2009 -0700 @@ -688,10 +688,9 @@ _contended_enter_count = 0; _monitor_wait_count = 0; _sleep_count = 0; - _class_init_recursion_count = 0; - _class_verify_recursion_count = 0; _count_pending_reset = false; _timer_pending_reset = false; + memset((void*) _perf_recursion_counts, 0, sizeof(_perf_recursion_counts)); } ThreadSnapshot::ThreadSnapshot(JavaThread* thread) { diff -r f753dffae23e -r 16314a31b961 src/share/vm/services/threadService.hpp --- a/src/share/vm/services/threadService.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/services/threadService.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -120,9 +120,8 @@ bool _timer_pending_reset; // Keep accurate times for potentially recursive class operations - int _class_init_recursion_count; - int _class_verify_recursion_count; - int _class_link_recursion_count; + int _perf_recursion_counts[6]; + elapsedTimer _perf_timers[6]; // utility functions void check_and_reset_count() { @@ -165,9 +164,8 @@ void reset_count_stat() { _count_pending_reset = true; } void reset_time_stat() { _timer_pending_reset = true; } - int* class_init_recursion_count_addr() { return &_class_init_recursion_count; } - int* class_verify_recursion_count_addr() { return &_class_verify_recursion_count; } - int* class_link_recursion_count_addr() { return &_class_link_recursion_count; } + int* perf_recursion_counts_addr() { return _perf_recursion_counts; } + elapsedTimer* perf_timers_addr() { return _perf_timers; } }; // Thread snapshot to represent the thread state and statistics diff -r f753dffae23e -r 16314a31b961 src/share/vm/utilities/taskqueue.hpp --- a/src/share/vm/utilities/taskqueue.hpp Thu Aug 13 17:47:43 2009 -0700 +++ b/src/share/vm/utilities/taskqueue.hpp Thu Aug 13 17:59:05 2009 -0700 @@ -22,94 +22,90 @@ * */ -#ifdef LP64 -typedef juint TAG_TYPE; -// for a taskqueue size of 4M -#define LOG_TASKQ_SIZE 22 -#else -typedef jushort TAG_TYPE; -// for a taskqueue size of 16K -#define LOG_TASKQ_SIZE 14 -#endif - class TaskQueueSuper: public CHeapObj { protected: - // The first free element after the last one pushed (mod _n). + // Internal type for indexing the queue; also used for the tag. + typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t; + + // The first free element after the last one pushed (mod N). volatile uint _bottom; - // log2 of the size of the queue. - enum SomeProtectedConstants { - Log_n = LOG_TASKQ_SIZE + enum { + N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K + MOD_N_MASK = N - 1 // To compute x mod N efficiently. }; -#undef LOG_TASKQ_SIZE + + class Age { + public: + Age(size_t data = 0) { _data = data; } + Age(const Age& age) { _data = age._data; } + Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } - // Size of the queue. - uint n() { return (1 << Log_n); } - // For computing "x mod n" efficiently. - uint n_mod_mask() { return n() - 1; } + Age get() const volatile { return _data; } + void set(Age age) volatile { _data = age._data; } + + idx_t top() const volatile { return _fields._top; } + idx_t tag() const volatile { return _fields._tag; } - struct Age { - TAG_TYPE _top; - TAG_TYPE _tag; + // Increment top; if it wraps, increment tag also. + void increment() { + _fields._top = increment_index(_fields._top); + if (_fields._top == 0) ++_fields._tag; + } - TAG_TYPE tag() const { return _tag; } - TAG_TYPE top() const { return _top; } + Age cmpxchg(const Age new_age, const Age old_age) volatile { + return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data, + (volatile intptr_t *)&_data, + (intptr_t)old_age._data); + } + + bool operator ==(const Age& other) const { return _data == other._data; } - Age() { _tag = 0; _top = 0; } - - friend bool operator ==(const Age& a1, const Age& a2) { - return a1.tag() == a2.tag() && a1.top() == a2.top(); - } + private: + struct fields { + idx_t _top; + idx_t _tag; + }; + union { + size_t _data; + fields _fields; + }; }; - Age _age; - // These make sure we do single atomic reads and writes. - Age get_age() { - uint res = *(volatile uint*)(&_age); - return *(Age*)(&res); + + volatile Age _age; + + // These both operate mod N. + static uint increment_index(uint ind) { + return (ind + 1) & MOD_N_MASK; } - void set_age(Age a) { - *(volatile uint*)(&_age) = *(uint*)(&a); + static uint decrement_index(uint ind) { + return (ind - 1) & MOD_N_MASK; } - TAG_TYPE get_top() { - return get_age().top(); - } - - // These both operate mod _n. - uint increment_index(uint ind) { - return (ind + 1) & n_mod_mask(); - } - uint decrement_index(uint ind) { - return (ind - 1) & n_mod_mask(); - } - - // Returns a number in the range [0.._n). If the result is "n-1", it - // should be interpreted as 0. + // Returns a number in the range [0..N). If the result is "N-1", it should be + // interpreted as 0. uint dirty_size(uint bot, uint top) { - return ((int)bot - (int)top) & n_mod_mask(); + return (bot - top) & MOD_N_MASK; } // Returns the size corresponding to the given "bot" and "top". uint size(uint bot, uint top) { uint sz = dirty_size(bot, top); - // Has the queue "wrapped", so that bottom is less than top? - // There's a complicated special case here. A pair of threads could - // perform pop_local and pop_global operations concurrently, starting - // from a state in which _bottom == _top+1. The pop_local could - // succeed in decrementing _bottom, and the pop_global in incrementing - // _top (in which case the pop_global will be awarded the contested - // queue element.) The resulting state must be interpreted as an empty - // queue. (We only need to worry about one such event: only the queue - // owner performs pop_local's, and several concurrent threads - // attempting to perform the pop_global will all perform the same CAS, - // and only one can succeed. Any stealing thread that reads after - // either the increment or decrement will see an empty queue, and will - // not join the competitors. The "sz == -1 || sz == _n-1" state will - // not be modified by concurrent queues, so the owner thread can reset - // the state to _bottom == top so subsequent pushes will be performed - // normally. - if (sz == (n()-1)) return 0; - else return sz; + // Has the queue "wrapped", so that bottom is less than top? There's a + // complicated special case here. A pair of threads could perform pop_local + // and pop_global operations concurrently, starting from a state in which + // _bottom == _top+1. The pop_local could succeed in decrementing _bottom, + // and the pop_global in incrementing _top (in which case the pop_global + // will be awarded the contested queue element.) The resulting state must + // be interpreted as an empty queue. (We only need to worry about one such + // event: only the queue owner performs pop_local's, and several concurrent + // threads attempting to perform the pop_global will all perform the same + // CAS, and only one can succeed.) Any stealing thread that reads after + // either the increment or decrement will see an empty queue, and will not + // join the competitors. The "sz == -1 || sz == N-1" state will not be + // modified by concurrent queues, so the owner thread can reset the state to + // _bottom == top so subsequent pushes will be performed normally. + return (sz == N - 1) ? 0 : sz; } public: @@ -122,22 +118,21 @@ // The "careful" version admits the possibility of pop_local/pop_global // races. uint size() { - return size(_bottom, get_top()); + return size(_bottom, _age.top()); } uint dirty_size() { - return dirty_size(_bottom, get_top()); + return dirty_size(_bottom, _age.top()); } void set_empty() { _bottom = 0; - _age = Age(); + _age.set(0); } // Maximum number of elements allowed in the queue. This is two less // than the actual queue size, for somewhat complicated reasons. - uint max_elems() { return n() - 2; } - + uint max_elems() { return N - 2; } }; template class GenericTaskQueue: public TaskQueueSuper { @@ -179,12 +174,12 @@ template GenericTaskQueue::GenericTaskQueue():TaskQueueSuper() { - assert(sizeof(Age) == sizeof(int), "Depends on this."); + assert(sizeof(Age) == sizeof(size_t), "Depends on this."); } template void GenericTaskQueue::initialize() { - _elems = NEW_C_HEAP_ARRAY(E, n()); + _elems = NEW_C_HEAP_ARRAY(E, N); guarantee(_elems != NULL, "Allocation failed."); } @@ -208,14 +203,14 @@ template bool GenericTaskQueue::push_slow(E t, uint dirty_n_elems) { - if (dirty_n_elems == n() - 1) { + if (dirty_n_elems == N - 1) { // Actually means 0, so do the push. uint localBot = _bottom; _elems[localBot] = t; _bottom = increment_index(localBot); return true; - } else - return false; + } + return false; } template @@ -230,53 +225,45 @@ // then have the owner thread do a pop followed by another push. Without // the incrementing of "tag", the pop_global's CAS could succeed, // allowing it to believe it has claimed the stale element.) - Age newAge; - newAge._top = localBot; - newAge._tag = oldAge.tag() + 1; + Age newAge((idx_t)localBot, oldAge.tag() + 1); // Perhaps a competing pop_global has already incremented "top", in which // case it wins the element. if (localBot == oldAge.top()) { - Age tempAge; // No competing pop_global has yet incremented "top"; we'll try to // install new_age, thus claiming the element. - assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit."); - *(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); + Age tempAge = _age.cmpxchg(newAge, oldAge); if (tempAge == oldAge) { // We win. - assert(dirty_size(localBot, get_top()) != n() - 1, - "Shouldn't be possible..."); + assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); return true; } } - // We fail; a completing pop_global gets the element. But the queue is - // empty (and top is greater than bottom.) Fix this representation of - // the empty queue to become the canonical one. - set_age(newAge); - assert(dirty_size(localBot, get_top()) != n() - 1, - "Shouldn't be possible..."); + // We lose; a completing pop_global gets the element. But the queue is empty + // and top is greater than bottom. Fix this representation of the empty queue + // to become the canonical one. + _age.set(newAge); + assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); return false; } template bool GenericTaskQueue::pop_global(E& t) { - Age newAge; - Age oldAge = get_age(); + Age oldAge = _age.get(); uint localBot = _bottom; uint n_elems = size(localBot, oldAge.top()); if (n_elems == 0) { return false; } + t = _elems[oldAge.top()]; - newAge = oldAge; - newAge._top = increment_index(newAge.top()); - if ( newAge._top == 0 ) newAge._tag++; - Age resAge; - *(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge); + Age newAge(oldAge); + newAge.increment(); + Age resAge = _age.cmpxchg(newAge, oldAge); + // Note that using "_bottom" here might fail, since a pop_local might // have decremented it. - assert(dirty_size(localBot, newAge._top) != n() - 1, - "Shouldn't be possible..."); - return (resAge == oldAge); + assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity"); + return resAge == oldAge; } template @@ -459,7 +446,7 @@ return offer_termination(NULL); } - // As above, but it also terminates of the should_exit_termination() + // As above, but it also terminates if the should_exit_termination() // method of the terminator parameter returns true. If terminator is // NULL, then it is ignored. bool offer_termination(TerminatorTerminator* terminator); @@ -492,11 +479,10 @@ } #else uint localBot = _bottom; - assert((localBot >= 0) && (localBot < n()), "_bottom out of range."); - TAG_TYPE top = get_top(); + assert((localBot >= 0) && (localBot < N), "_bottom out of range."); + idx_t top = _age.top(); uint dirty_n_elems = dirty_size(localBot, top); - assert((dirty_n_elems >= 0) && (dirty_n_elems < n()), - "n_elems out of range."); + assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range."); if (dirty_n_elems < max_elems()) { _elems[localBot] = t; _bottom = increment_index(localBot); @@ -517,12 +503,12 @@ return true; #else uint localBot = _bottom; - // This value cannot be n-1. That can only occur as a result of + // This value cannot be N-1. That can only occur as a result of // the assignment to bottom in this method. If it does, this method // resets the size( to 0 before the next call (which is sequential, // since this is pop_local.) - uint dirty_n_elems = dirty_size(localBot, get_top()); - assert(dirty_n_elems != n() - 1, "Shouldn't be possible..."); + uint dirty_n_elems = dirty_size(localBot, _age.top()); + assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); if (dirty_n_elems == 0) return false; localBot = decrement_index(localBot); _bottom = localBot; @@ -534,15 +520,14 @@ // If there's still at least one element in the queue, based on the // "_bottom" and "age" we've read, then there can be no interference with // a "pop_global" operation, and we're done. - TAG_TYPE tp = get_top(); // XXX + idx_t tp = _age.top(); // XXX if (size(localBot, tp) > 0) { - assert(dirty_size(localBot, tp) != n() - 1, - "Shouldn't be possible..."); + assert(dirty_size(localBot, tp) != N - 1, "sanity"); return true; } else { // Otherwise, the queue contained exactly one element; we take the slow // path. - return pop_local_slow(localBot, get_age()); + return pop_local_slow(localBot, _age.get()); } #endif } diff -r f753dffae23e -r 16314a31b961 test/compiler/6826736/Test.java --- a/test/compiler/6826736/Test.java Thu Aug 13 17:47:43 2009 -0700 +++ b/test/compiler/6826736/Test.java Thu Aug 13 17:59:05 2009 -0700 @@ -27,7 +27,7 @@ * @bug 6826736 * @summary CMS: core dump with -XX:+UseCompressedOops * - * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xbatch -XX:+ScavengeALot -XX:+UseCompressedOops -XX:HeapBaseMinAddress=32g -XX:CompileThreshold=100 -XX:CompileOnly=Test.test -XX:-BlockLayoutRotateLoops -XX:LoopUnrollLimit=0 Test + * @run main/othervm/timeout=600 -XX:+IgnoreUnrecognizedVMOptions -Xbatch -XX:+ScavengeALot -XX:+UseCompressedOops -XX:HeapBaseMinAddress=32g -XX:CompileThreshold=100 -XX:CompileOnly=Test.test -XX:-BlockLayoutRotateLoops -XX:LoopUnrollLimit=0 Test */ public class Test { diff -r f753dffae23e -r 16314a31b961 test/compiler/6833129/Test.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/compiler/6833129/Test.java Thu Aug 13 17:59:05 2009 -0700 @@ -0,0 +1,62 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/** + * @test + * @bug 6833129 + * @summary Object.clone() and Arrays.copyOf ignore coping with -XX:+DeoptimizeALot + * @run main/othervm -Xbatch -XX:+DeoptimizeALot Test + */ + +public class Test{ + public static void init(int src[]) { + for (int i =0; i { + protected static final int DEFAULT_INITIAL_CAPACITY = 16; + private static final int MAXIMUM_CAPACITY = 1 << 30; + private static final float DEFAULT_LOAD_FACTOR = 0.75f; + + protected Entry[] table; + + private int size; + protected int threshold; + private final float loadFactor; + private final ReferenceQueue queue = new ReferenceQueue(); + + public WeakPool() + { + this.loadFactor = DEFAULT_LOAD_FACTOR; + threshold = DEFAULT_INITIAL_CAPACITY; + table = new Entry[DEFAULT_INITIAL_CAPACITY]; + } + + /** + * Check for equality of non-null reference x and possibly-null y. By + * default uses Object.equals. + */ + private boolean eq(Object x, Object y) + { + return x == y || x.equals(y); + } + + /** + * Return index for hash code h. + */ + private int indexFor(int h, int length) + { + return h & length - 1; + } + + /** + * Expunge stale entries from the table. + */ + private void expungeStaleEntries() + { + Object r; + while ((r = queue.poll()) != null) + { + Entry e = (Entry) r; + int h = e.hash; + int i = indexFor(h, table.length); + + // System.out.println("EXPUNGING " + h); + Entry prev = table[i]; + Entry p = prev; + while (p != null) + { + Entry next = p.next; + if (p == e) + { + if (prev == e) + { + table[i] = next; + } + else + { + prev.next = next; + } + e.next = null; // Help GC + size--; + break; + } + prev = p; + p = next; + } + } + } + + /** + * Return the table after first expunging stale entries + */ + private Entry[] getTable() + { + expungeStaleEntries(); + return table; + } + + /** + * Returns the number of key-value mappings in this map. + * This result is a snapshot, and may not reflect unprocessed + * entries that will be removed before next attempted access + * because they are no longer referenced. + */ + public int size() + { + if (size == 0) + { + return 0; + } + expungeStaleEntries(); + return size; + } + + /** + * Returns true if this map contains no key-value mappings. + * This result is a snapshot, and may not reflect unprocessed + * entries that will be removed before next attempted access + * because they are no longer referenced. + */ + public boolean isEmpty() + { + return size() == 0; + } + + /** + * Returns the value stored in the pool that equals the requested key + * or null if the map contains no mapping for + * this key (or the key is null) + * + * @param key the key whose equals value is to be returned. + * @return the object that is equal the specified key, or + * null if key is null or no object in the pool equals the key. + */ + public V get(V key) + { + if (key == null) + { + return null; + } + int h = key.hashCode(); + Entry[] tab = getTable(); + int index = indexFor(h, tab.length); + Entry e = tab[index]; + while (e != null) + { + V candidate = e.get(); + if (e.hash == h && eq(key, candidate)) + { + return candidate; + } + e = e.next; + } + return null; + } + + /** + * Returns the entry associated with the specified key in the HashMap. + * Returns null if the HashMap contains no mapping for this key. + */ + Entry getEntry(Object key) + { + int h = key.hashCode(); + Entry[] tab = getTable(); + int index = indexFor(h, tab.length); + Entry e = tab[index]; + while (e != null && !(e.hash == h && eq(key, e.get()))) + { + e = e.next; + } + return e; + } + + /** + * Places the object into the pool. If the object is null, nothing happens. + * If an equal object already exists, it is not replaced. + * + * @param key the object to put into the pool. key may be null. + * @return the object in the pool that is equal to the key, or the newly placed key if no such object existed when put was called + */ + public V put(V key) + { + if (key == null) + { + return null; + } + int h = key.hashCode(); + Entry[] tab = getTable(); + int i = indexFor(h, tab.length); + + for (Entry e = tab[i]; e != null; e = e.next) + { + V candidate = e.get(); + if (h == e.hash && eq(key, candidate)) + { + return candidate; + } + } + + tab[i] = new Entry(key, queue, h, tab[i]); + + if (++size >= threshold) + { + resize(tab.length * 2); + } + + // System.out.println("Added " + key + " to pool"); + return key; + } + + /** + * Rehashes the contents of this map into a new array with a + * larger capacity. This method is called automatically when the + * number of keys in this map reaches its threshold. + *

+ * If current capacity is MAXIMUM_CAPACITY, this method does not + * resize the map, but but sets threshold to Integer.MAX_VALUE. + * This has the effect of preventing future calls. + * + * @param newCapacity the new capacity, MUST be a power of two; + * must be greater than current capacity unless current + * capacity is MAXIMUM_CAPACITY (in which case value + * is irrelevant). + */ + void resize(int newCapacity) + { + Entry[] oldTable = getTable(); + int oldCapacity = oldTable.length; + if (oldCapacity == MAXIMUM_CAPACITY) + { + threshold = Integer.MAX_VALUE; + return; + } + + Entry[] newTable = new Entry[newCapacity]; + transfer(oldTable, newTable); + table = newTable; + + /* + * If ignoring null elements and processing ref queue caused massive + * shrinkage, then restore old table. This should be rare, but avoids + * unbounded expansion of garbage-filled tables. + */ + if (size >= threshold / 2) + { + threshold = (int) (newCapacity * loadFactor); + } + else + { + expungeStaleEntries(); + transfer(newTable, oldTable); + table = oldTable; + } + } + + /** + * Transfer all entries from src to dest tables + */ + private void transfer(Entry[] src, Entry[] dest) + { + for (int j = 0; j < src.length; ++j) + { + Entry e = src[j]; + src[j] = null; + while (e != null) + { + Entry next = e.next; + Object key = e.get(); + if (key == null) + { + e.next = null; // Help GC + size--; + } + else + { + int i = indexFor(e.hash, dest.length); + e.next = dest[i]; + dest[i] = e; + } + e = next; + } + } + } + + /** + * Removes the object in the pool that equals the key. + * + * @param key + * @return previous value associated with specified key, or null + * if there was no mapping for key or the key is null. + */ + public V removeFromPool(V key) + { + if (key == null) + { + return null; + } + int h = key.hashCode(); + Entry[] tab = getTable(); + int i = indexFor(h, tab.length); + Entry prev = tab[i]; + Entry e = prev; + + while (e != null) + { + Entry next = e.next; + V candidate = e.get(); + if (h == e.hash && eq(key, candidate)) + { + size--; + if (prev == e) + { + tab[i] = next; + } + else + { + prev.next = next; + } + return candidate; + } + prev = e; + e = next; + } + + return null; + } + + /** + * Removes all mappings from this map. + */ + public void clear() + { + // clear out ref queue. We don't need to expunge entries + // since table is getting cleared. + while (queue.poll() != null) + { + // nop + } + + table = new Entry[DEFAULT_INITIAL_CAPACITY]; + threshold = DEFAULT_INITIAL_CAPACITY; + size = 0; + + // Allocation of array may have caused GC, which may have caused + // additional entries to go stale. Removing these entries from the + // reference queue will make them eligible for reclamation. + while (queue.poll() != null) + { + // nop + } + } + + /** + * The entries in this hash table extend WeakReference, using its main ref + * field as the key. + */ + protected static class Entry + extends WeakReference + { + private final int hash; + private Entry next; + + /** + * Create new entry. + */ + Entry(final V key, final ReferenceQueue queue, final int hash, final Entry next) + { + super(key, queue); + this.hash = hash; + this.next = next; + } + + public V getKey() + { + return super.get(); + } + + public boolean equals(Object o) + { + if (!(o instanceof WeakPool.Entry)) + { + return false; + } + WeakPool.Entry that = (WeakPool.Entry) o; + V k1 = this.getKey(); + V k2 = that.getKey(); + return (k1==k2 || k1.equals(k2)); + } + + public int hashCode() + { + return this.hash; + } + + public String toString() + { + return String.valueOf(this.getKey()); + } + } +} + +final class MultiSynonymKey { + private List keys; + + public MultiSynonymKey() { + keys = new ArrayList(); + } + + public MultiSynonymKey(MyList... arg) { + keys = Arrays.asList(arg); + } + + public List getKeys() { + return keys; + } + + public int hashCode() { + return this.getKeys().hashCode(); + } + + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + + if (!(obj instanceof MultiSynonymKey)) { + return false; + } + + MultiSynonymKey that = (MultiSynonymKey) obj; + return this.getKeys().equals(that.getKeys()); + } + + public String toString() { + return this.getClass().getName() + this.getKeys().toString(); + } +} + +public class Test extends Thread { + static public Test test; + static private byte[] arg1; + static private byte[] arg2; + static public WeakPool wp; + public volatile MultiSynonymKey ml1; + public volatile MultiSynonymKey ml2; + private volatile MultiSynonymKey ml3; + + public void run() { + int count=0; + while (true) { + try { + Thread.sleep(10); + } catch (Exception e) {} + synchronized (wp) { + ml2 = new MultiSynonymKey(new DoubletonList(new String(arg1), new String(arg2))); + wp.put(ml2); + ml3 = new MultiSynonymKey(new DoubletonList(new String(arg1), new String(arg2))); + } + try { + Thread.sleep(10); + } catch (Exception e) {} + synchronized (wp) { + ml1 = new MultiSynonymKey(new SingletonList(new String(arg1))); + wp.put(ml1); + ml3 = new MultiSynonymKey(new SingletonList(new String(arg1))); + } + if (count++==100) + System.exit(95); + } + } + + public static void main(String[] args) throws Exception { + wp = new WeakPool(); + test = new Test(); + + test.arg1 = args[0].getBytes(); + test.arg2 = args[1].getBytes(); + + test.ml1 = new MultiSynonymKey(new SingletonList(new String(test.arg1))); + test.ml2 = new MultiSynonymKey(new DoubletonList(new String(test.arg1), new String(test.arg2))); + test.ml3 = new MultiSynonymKey(new DoubletonList(new String(test.arg1), new String(test.arg2))); + + wp.put(test.ml1); + wp.put(test.ml2); + + test.setDaemon(true); + test.start(); + + int counter = 0; + while (true) { + synchronized (wp) { + MultiSynonymKey foo = test.ml3; + + if (wp.put(foo) == foo) { + // System.out.println("foo " + counter); + // System.out.println(foo); + } + } + counter++; + } + } + + private boolean eq(Object x, Object y) { + return x == y || x.equals(y); + } +}